Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions ocf_data_sampler/load/nwp/nwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import numpy as np
import xarray as xr

from ocf_data_sampler.load.nwp.providers.cloudcasting import open_cloudcasting
from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
from ocf_data_sampler.load.nwp.providers.gdm import open_gdm
from ocf_data_sampler.load.nwp.providers.gfs import open_gfs
from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
from ocf_data_sampler.load.nwp.providers.loaders import (
open_cloudcasting,
open_gdm,
open_gfs,
open_icon_eu,
open_ifs,
open_ukv,
)

_OPEN_NWP_FUNCTIONS: dict[str, Callable[..., xr.DataArray]] = {
"ukv": open_ukv,
Expand Down
49 changes: 0 additions & 49 deletions ocf_data_sampler/load/nwp/providers/cloudcasting.py

This file was deleted.

34 changes: 0 additions & 34 deletions ocf_data_sampler/load/nwp/providers/ecmwf.py

This file was deleted.

31 changes: 0 additions & 31 deletions ocf_data_sampler/load/nwp/providers/gdm.py

This file was deleted.

41 changes: 0 additions & 41 deletions ocf_data_sampler/load/nwp/providers/gfs.py

This file was deleted.

37 changes: 0 additions & 37 deletions ocf_data_sampler/load/nwp/providers/icon.py

This file was deleted.

108 changes: 108 additions & 0 deletions ocf_data_sampler/load/nwp/providers/loaders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""NWP provider loaders.

All providers follow the same shape:
open zarr -> normalise dim/coord names -> shared post-processing.
Comment on lines +3 to +4
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
All providers follow the same shape:
open zarr -> normalise dim/coord names -> shared post-processing.
All providers follow the same pipeline:
open zarr -> standardise dim/coord names -> shared post-processing.

Just a few suggestions on wording so it doesn't get confusing since shape often refers to the shape of the data and normalise to normalisation of data which we do elsewhere in this repo so avoiding those words in here to avoid confusion


`_open_regular_grid_nwp` is the shared tail. Per-provider functions only
handle the open + renaming step that differs between data sources.
"""

import logging

import xarray as xr

from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
from ocf_data_sampler.load.utils import (
check_time_unique_increasing,
get_xr_data_array_from_xr_dataset,
make_spatial_coords_increasing,
)

_log = logging.getLogger(__name__)


def _open_regular_grid_nwp(
ds: xr.Dataset | xr.DataArray,
x_coord: str,
y_coord: str,
) -> xr.DataArray:
"""Shared post-processing for any regular-grid NWP dataset.

Expects dims/coords already normalised to: init_time_utc, step, channel,
plus the given x_coord/y_coord spatial dims.
"""
check_time_unique_increasing(ds.init_time_utc)
ds = make_spatial_coords_increasing(ds, x_coord=x_coord, y_coord=y_coord)
ds = ds.transpose("init_time_utc", "step", "channel", x_coord, y_coord)

if isinstance(ds, xr.Dataset):
return get_xr_data_array_from_xr_dataset(ds)
return ds


def open_ifs(zarr_path: str | list[str]) -> xr.DataArray:
"""Opens ECMWF IFS / MetOffice Global NWP data."""
ds = open_zarr_paths(zarr_path, backend="tensorstore")
# LEGACY SUPPORT - older zarrs use "init_time"/"variable" dim names
ds = ds.rename({"init_time": "init_time_utc", "variable": "channel"})
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So open_ifs and open_gdm are almost identical apart from the renaming, they could just be one generic open standard_lat_long grid with some checks to see if init_time or variable are in the ds and in each case if they are then rename them or use the rename map pattern which is in open_cloudcasting

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there is one issue actually which is when using open_zarr_paths you have to specify whether to use init_time or init_time_utc but we could have a try/except in there which tries init_time and if not then init_time_utc

return _open_regular_grid_nwp(ds, x_coord="longitude", y_coord="latitude")


def open_gdm(zarr_path: str | list[str]) -> xr.DataArray:
"""Opens GDM (e.g. GenCast) NWP data."""
ds = open_zarr_paths(zarr_path, backend="tensorstore", time_dim="init_time_utc")
return _open_regular_grid_nwp(ds, x_coord="longitude", y_coord="latitude")


def open_gfs(zarr_path: str | list[str], public: bool = False) -> xr.DataArray:
"""Opens GFS NWP data."""
_log.info("Loading NWP GFS data")
ds = open_zarr_paths(
zarr_path,
time_dim="init_time_utc",
public=public,
backend="dask",
)
nwp = ds.to_array(dim="channel")
del ds
return _open_regular_grid_nwp(nwp, x_coord="longitude", y_coord="latitude")


def open_icon_eu(zarr_path: str | list[str]) -> xr.DataArray:
"""Opens DWD ICON-EU data.

ICON-EU is expected to be on a regular lat/lon grid with a 'channel' dim.
Only the first 78 (one-hour) steps are used; the rest are 3-hour steps.
"""
ds = open_zarr_paths(zarr_path, time_dim="init_time_utc", backend="dask")
if "icon_eu_data" not in ds.data_vars:
raise ValueError("Could not find 'icon_eu_data' DataArray in the ICON-EU Zarr file.")
nwp = ds["icon_eu_data"].isel(step=slice(0, 78))
return _open_regular_grid_nwp(nwp, x_coord="longitude", y_coord="latitude")


def open_ukv(zarr_path: str | list[str]) -> xr.DataArray:
"""Opens UKV NWP data (OSGB grid)."""
ds = open_zarr_paths(zarr_path, backend="tensorstore")
# Only rename keys actually present - new UKV data already uses the target names
rename_map = {
"init_time": "init_time_utc",
"variable": "channel",
"x": "x_osgb",
"y": "y_osgb",
}
ds = ds.rename({k: v for k, v in rename_map.items() if k in ds.coords})
return _open_regular_grid_nwp(ds, x_coord="x_osgb", y_coord="y_osgb")


def open_cloudcasting(zarr_path: str | list[str]) -> xr.DataArray:
"""Opens OCF cloudcasting satellite-prediction data (geostationary grid).

References:
[1] https://www.openclimatefix.org/projects/cloud-forecasting
[2] https://github.com/ClimeTrend/cloudcasting
[3] https://github.com/openclimatefix/sat_pred
"""
ds = open_zarr_paths(zarr_path, backend="tensorstore")
ds = ds.rename({"init_time": "init_time_utc", "variable": "channel"})
return _open_regular_grid_nwp(ds, x_coord="x_geostationary", y_coord="y_geostationary")
42 changes: 0 additions & 42 deletions ocf_data_sampler/load/nwp/providers/ukv.py

This file was deleted.

Loading