-
Notifications
You must be signed in to change notification settings - Fork 1
Add Pystp netcdf driver #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9cdb298
89d5e2d
72c70a0
f090582
13000cc
598114e
ec4e33c
60ccb5e
c7bb13d
254dd2c
9380416
806ffef
575f590
b8bf933
2b3ff37
bf2920b
615d5bb
cf54ca2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| import netCDF4 | ||
| import numpy as np | ||
| from typing import Any | ||
|
|
||
|
|
||
| class Driver: | ||
| """NetCDF4 driver implementing the PyISTP Driver protocol.""" | ||
|
|
||
| def __init__(self, file): | ||
| # Accept either a file path (str) or a bytes buffer | ||
| if isinstance(file, bytes): | ||
| self._ds = netCDF4.Dataset("in_memory.nc", memory=file) | ||
| else: | ||
| self._ds = netCDF4.Dataset(str(file), "r") | ||
|
|
||
| def variables(self): | ||
| return list(self._ds.variables.keys()) | ||
|
|
||
| def has_variable(self, name): | ||
| return name in self._ds.variables | ||
|
|
||
| def variable_attributes(self, var): | ||
| if var not in self._ds.variables: | ||
| return [] | ||
| return list(self._ds[var].ncattrs()) | ||
|
|
||
| def variable_attribute_value(self, var, attr): | ||
| if var not in self._ds.variables: | ||
| return None | ||
| try: | ||
| return self._ds[var].getncattr(attr) | ||
| except AttributeError: | ||
| return None | ||
|
|
||
| def is_char(self, var): | ||
| if var not in self._ds.variables: | ||
| return False | ||
| return self._ds[var].dtype == str | ||
|
|
||
| def is_nrv(self, var): # NOSONAR | ||
| # NRV concept does not exist in NetCDF4 | ||
| return False | ||
|
|
||
| def shape(self, var): | ||
| return tuple(self._ds[var].shape) | ||
|
|
||
| def attributes(self): | ||
| return list(self._ds.ncattrs()) | ||
|
|
||
| def attribute(self, key): | ||
| try: | ||
| return self._ds.getncattr(key) | ||
| except AttributeError: | ||
| return None | ||
|
|
||
| # Mapping from numpy dtype kinds to CDF type strings | ||
| _DTYPE_TO_CDF = { | ||
| 'f4': 'CDF_FLOAT', | ||
| 'f8': 'CDF_DOUBLE', | ||
| 'i1': 'CDF_INT1', | ||
| 'i2': 'CDF_INT2', | ||
| 'i4': 'CDF_INT4', | ||
| 'i8': 'CDF_INT8', | ||
| 'u1': 'CDF_UINT1', | ||
| 'u2': 'CDF_UINT2', | ||
| 'u4': 'CDF_UINT4', | ||
| 'S': 'CDF_CHAR', | ||
| } | ||
|
|
||
| # Milliseconds between CDF epoch (year 0000) and Unix epoch (1970-01-01) | ||
| _CDF_EPOCH_OFFSET_MS = 62_167_219_200_000 | ||
|
|
||
| def _get_units(self, var): | ||
| v = self._ds[var] | ||
| for key in v.ncattrs(): | ||
| if key.lower() == 'units': | ||
| return v.getncattr(key) | ||
| return None | ||
|
|
||
| def _is_cf_time(self, var): | ||
| """Return True if the variable uses CF time conventions | ||
| (units attribute containing 'since').""" | ||
| units = self._get_units(var) | ||
| return isinstance(units, str) and 'since' in units | ||
|
|
||
| def _is_cdf_epoch(self, var): | ||
| """Return True if the variable uses CDF_EPOCH convention | ||
| (float64, units='ms').""" | ||
| units = self._get_units(var) | ||
| return (isinstance(units, str) | ||
| and units.strip().lower() == 'ms' | ||
| and self._ds[var].dtype == np.float64) | ||
|
|
||
| def _cf_time_to_datetime64(self, var): | ||
| """Convert a CF time variable (units with 'since') to | ||
| datetime64[ns].""" | ||
| v = self._ds[var] | ||
| units = v.getncattr('units') | ||
| # netCDF4.num2date converts CF floats to cftime objects | ||
| dates: Any = netCDF4.num2date( | ||
| v[:], units, only_use_cftime_datetimes=False | ||
| ) | ||
| # Convert to datetime64[ns] via ISO string representation | ||
| return np.array([np.datetime64(str(d), 'ns') for d in dates]) | ||
|
|
||
| def _cdf_epoch_to_datetime64(self, var): | ||
| """Convert CDF_EPOCH (ms since year 0000) to datetime64[ns].""" | ||
| ms = np.array(self._ds[var][:], dtype=np.float64) | ||
| unix_ms = ms - self._CDF_EPOCH_OFFSET_MS | ||
| return (unix_ms * 1_000_000).astype('datetime64[ns]') | ||
|
|
||
| def _is_unix_ms_time(self, var): | ||
| units = self._get_units(var) | ||
| return isinstance(units, str) and units.strip().lower() == 'milliseconds' | ||
|
|
||
| def _unix_ms_time_to_datetime64(self, var): | ||
| """Convert ms since Unix epoch (1970-01-01) to datetime64[ns].""" | ||
| ms = np.array(self._ds[var][:], dtype=np.int64) | ||
| return (ms * 1_000_000).astype('datetime64[ns]') | ||
|
|
||
| def values(self, var, is_metadata_variable=False): # NOSONAR | ||
| v = self._ds[var] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my opinion, it is not the responsibility of the pyistp driver to interpret the data and convert it into datetime64; this should instead be handled by the consuming tool (in our case, the Speasy codec).
If we move this interpretation logic into Speasy, we will be able to adapt it more easily depending on the provider. |
||
| if self._is_cf_time(var): | ||
| return self._cf_time_to_datetime64(var) | ||
| if self._is_cdf_epoch(var): | ||
| return self._cdf_epoch_to_datetime64(var) | ||
| if self._is_unix_ms_time(var): | ||
| return self._unix_ms_time_to_datetime64(var) | ||
| if v.dtype == str: | ||
| # Native NetCDF4 string — return as numpy array of strings | ||
| raw = v[()] | ||
| if isinstance(raw, str): | ||
| raw = [raw] | ||
| return np.array(raw) | ||
| return np.array(v[:]) | ||
|
|
||
| def cdf_type(self, var): | ||
| if self._is_cf_time(var) or self._is_unix_ms_time(var): | ||
| return 'CDF_TIME_TT2000' | ||
| if self._is_cdf_epoch(var): | ||
| return 'CDF_EPOCH' | ||
| v = self._ds[var] | ||
| if v.dtype == str: | ||
| return 'CDF_CHAR' | ||
| dtype_str = v.dtype.str.lstrip('<>=!') | ||
| return self._DTYPE_TO_CDF.get(dtype_str, f'CDF_UNKNOWN_{dtype_str}') | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,3 +8,5 @@ coverage | |
| Sphinx | ||
| twine | ||
| ddt | ||
| netCDF4 | ||
| pytest-cov | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As mentioned in another comment, I would like to take into account the fact that the driver used to read the master file may differ from the one used to read the data file.