Fetching timeseries using Web API

This tutorial shows how to fetch timeseries from multiple locations via the Nexus Web API and combine them into a single wide-format table, where each row is a timestamp and each column is a location. The approach works correctly even when locations have timeseries of different lengths or with different timestamps.

Info

This tutorial assumes that you have:

Python installed on your computer, with the pandas and requests packages
obtained a valid Nexus Web API token (with Data Consumer authorisation; see API Basics for more info)

"""Example: fetch groundwater_head_m_msl timeseries events for groundwater_monitoring_tube features.

Results in a Pandas DataFrame like:

timestamp                  B44C0214-002 GMW000000061643_1   B40H1475
--------------------------------------------------------------------
2026-01-04 00:00:00+00:00  0.539        13.765              NaN
2026-01-05 00:00:00+00:00  0.544        13.761              NaN
2026-01-06 00:00:00+00:00  NaN          13.752              12.95
"""
from io import StringIO

import pandas as pd
import requests

# FILL IN THESE CONSTANTS
# -----------------------
WEB_API_TOKEN = ""
WORLD_ALIAS = ""
FEATURE_TYPE = "groundwater_monitoring_tube"  # To list options, run get_available_feature_types() below
PARAMETER = "groundwater_head_m_msl"  # To list options, run get_available_timeseries_parameters() below
TIME_RANGE = "-P3M/+P0M"  # Only last 3 months. For options, see Web API docs > WFS > temporal-filtering
MAX_FEATURES = 3  # For this demo we request timeseries for only 3 features/locations
# -----------------------

URL_WFS = "https://nexus.stellaspark.com/api/v1/wfs/"
URL_EVENTS = "https://nexus.stellaspark.com/api/v1/events/"
URL_SYNC_TASKS = "https://nexus.stellaspark.com/api/v1/sync_tasks/"

def get_available_feature_types(world: str, token: str) -> list[str]:
    """Get Nexus feature_types in a given world."""
    print("Get available feature_types.")
    url = f"{URL_SYNC_TASKS}?world={world}&page_size=50&token={token}"
    feature_types = []
    while url:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        feature_types.extend(dc["feature_type"] for result in data["results"] for dc in result["dataconnections"])
        url = data["next"]
    return sorted(set(feature_types))

def get_available_timeseries_parameters(world: str, token: str, feature_type: str = None) -> list[str]:
    """Get Nexus timeseries parameters in a given world, optionally filtered by feature_type."""
    print("Get available timeseries parameters.")
    url = f"{URL_WFS}?world={world}&request=getfeature&outputformat=json&typenames=timeseries&distinct_on=parameter&token={token}"  # noqa
    response = requests.get(url)
    response.raise_for_status()
    features = response.json()["features"]
    if feature_type:
        features = [x for x in features if x["properties"]["feature_type"] == feature_type]
    parameters = [x["properties"]["parameter"] for x in features]
    return sorted(set(parameters))

print("Step 1: Validate that the chosen FEATURE_TYPE and PARAMETER exist in the world.")
feature_types = get_available_feature_types(world=WORLD_ALIAS, token=WEB_API_TOKEN)
if FEATURE_TYPE not in feature_types:
    msg = f"FEATURE_TYPE '{FEATURE_TYPE}' does not exist in world '{WORLD_ALIAS}'"
    raise AssertionError(f"{msg}. Choose from {', '.join(feature_types)}.")

parameters = get_available_timeseries_parameters(world=WORLD_ALIAS, token=WEB_API_TOKEN, feature_type=FEATURE_TYPE)
if PARAMETER not in parameters:
    msg = f"PARAMETER '{PARAMETER}' does not exist in world '{WORLD_ALIAS}' for feature_type '{FEATURE_TYPE}'"
    raise AssertionError(f"{msg}. Choose from {', '.join(parameters)}.")

print("Step 2: Find locations and their timeseries IDs.")
# The WFS 'feature_event' view returns features enriched with their linked timeseries metadata. By default only
# the most recent event is included (event_limit=1), which is enough to obtain the timeseries_id. The cql_filter
# limits results to locations that have a timeseries for the parameter of interest and that actually have events
# in the requested time range.
params = {
    "world": WORLD_ALIAS,
    "request": "getfeature",
    "typenames": FEATURE_TYPE,
    "view": "feature_event",
    "cql_filter": f"timeseries_parameter = '{PARAMETER}' AND event_timestamp during {TIME_RANGE}",
    "count": MAX_FEATURES,
    "outputformat": "json",
    "token": WEB_API_TOKEN,
}
response = requests.get(URL_WFS, params=params)
response.raise_for_status()
features = response.json()["features"]

print("Step 3: Fetch events per location from the Events API.")
# The Events API returns CSV, so StringIO is used to read it directly into a DataFrame. All timestamps in Nexus are
# in UTC. Nexus returns at most 100 000 events per request, so use count + startindex to paginate through longer
# series (see the Events API docs).
df_features_events = []
for feature in features:
    properties = feature["properties"]
    params = {
        "world": WORLD_ALIAS,
        "timeseries_id": properties["timeseries_id"],
        "range": TIME_RANGE,
        "token": WEB_API_TOKEN,
    }
    response = requests.get(URL_EVENTS, params=params)
    response.raise_for_status()
    df_feature_events = pd.read_csv(StringIO(response.text), parse_dates=["timestamp"])
    # Use feature id_src if feature has no label
    df_feature_events["location"] = properties.get("label", properties["id_src"])
    df_features_events.append(df_feature_events)

print("Step 4: Merge all features/locations into one dataframe.")
# Each Series is indexed by timestamp, so pd.DataFrame aligns values by timestamp across all locations. To account
# for unequal-length timeseries we use NaN values.
series = {df["location"].iloc[0]: df.set_index("timestamp")["value"] for df in df_features_events}
df = pd.DataFrame(series)

print(df.head())