From cfabab6034430dc46351a1476abc6cb1c4669932 Mon Sep 17 00:00:00 2001
From: rhaegar325 <rhaegar227@gmail.com>
Date: Fri, 26 Jun 2026 12:53:04 +1000
Subject: [PATCH 1/4] fix a couple of issues related to Oyr data cmorisation

---
 src/access_moppy/base.py                  |  27 +++++
 src/access_moppy/driver.py                |  13 ++-
 src/access_moppy/ocean.py                 |  12 +++
 src/access_moppy/utilities.py             | 122 ++++++++++++++++++++--
 src/access_moppy/vocabulary_processors.py |   4 +
 tests/unit/test_utilities.py              | 120 +++++++++++++++++++++
 tests/unit/test_vocabulary_processors.py  |  24 +++++
 7 files changed, 313 insertions(+), 9 deletions(-)

diff --git a/src/access_moppy/base.py b/src/access_moppy/base.py
index 7e48f806..7d163d50 100644
--- a/src/access_moppy/base.py
+++ b/src/access_moppy/base.py
@@ -22,6 +22,7 @@
     calculate_longitude_bounds,
     calculate_time_bounds,
     normalize_cf_time_units,
+    parse_cmip6_table_frequency,
     type_mapping,
     validate_and_resample_if_needed,
     validate_cmip6_frequency_compatibility,
@@ -596,6 +597,29 @@ def rechunk_dataset(self):
             else:
                 logger.debug("No dataset loaded, cannot rechunk")
 
+    def _target_frequency_hint(self):
+        """Map the CMOR table's target frequency to a coarse label
+        ("daily"/"monthly"/"yearly") for time-bounds construction.
+
+        Used only as a fallback when the time axis has a single point and the
+        frequency cannot be inferred from point spacing. Returns None when the
+        frequency is not determinable or is sub-daily.
+        """
+        if not self.compound_name:
+            return None
+        try:
+            target = parse_cmip6_table_frequency(self.compound_name)
+        except Exception:
+            return None
+        days = target.total_seconds() / 86400
+        if 0.9 <= days <= 1.1:
+            return "daily"
+        if 28 <= days <= 31:
+            return "monthly"
+        if 360 <= days <= 366:
+            return "yearly"
+        return None
+
     def calculate_missing_bounds_variables(self, bnds_required):
         """Calculate missing bounds variables for coordinates."""
         for bnds_var in bnds_required:
@@ -622,6 +646,9 @@ def calculate_missing_bounds_variables(self, bnds_required):
                         self.ds,
                         time_coord=coord_name,
                         bnds_name="bnds",  # Atmosphere uses "bnds"
+                        # Fallback for a single time point (e.g. one resampled
+                        # year) where the frequency cannot be inferred.
+                        freq_hint=self._target_frequency_hint(),
                     )
 
                 elif coord_name in ["lat", "latitude", "y"]:
diff --git a/src/access_moppy/driver.py b/src/access_moppy/driver.py
index 68d3d7cc..65ce29c1 100644
--- a/src/access_moppy/driver.py
+++ b/src/access_moppy/driver.py
@@ -120,7 +120,7 @@ def __init__(
         parent_info: dict[str, dict[str, Any]] | None = None,
         model_id: str | None = None,
         validate_frequency: bool = True,
-        enable_resampling: bool = False,
+        enable_resampling: bool = True,
         enable_chunking: bool = False,
         resampling_method: str = "auto",
         input_folder: str | Path | None = None,
@@ -157,7 +157,10 @@ def __init__(
             validate_frequency: Validate temporal frequency consistency across
                 file inputs.  This is disabled automatically for xarray inputs.
             enable_resampling: Enable automatic temporal resampling when
-                frequency mismatches are detected.
+                frequency mismatches are detected. Defaults to ``True``;
+                resampling is a no-op when the input already matches the target
+                frequency, and only triggers on a genuine mismatch (e.g. monthly
+                input for an ``Oyr`` table). Pass ``False`` to disable.
             enable_chunking: Enable dask chunking in supported component
                 CMORisers.
             resampling_method: Temporal resampling method: ``"auto"``,
@@ -515,6 +518,9 @@ def __init__(
                     vocab=self.vocab,
                     variable_mapping=self.variable_mapping.to_dict(),
                     drs_root=drs_root if drs_root else None,
+                    validate_frequency=self.validate_frequency,
+                    enable_resampling=self.enable_resampling,
+                    resampling_method=self.resampling_method,
                 )
             else:
                 # ACCESS-OM2 uses MOM5 (B-grid) — handled by a separate CMORiser class
@@ -528,6 +534,9 @@ def __init__(
                     vocab=self.vocab,
                     variable_mapping=self.variable_mapping.to_dict(),
                     drs_root=drs_root if drs_root else None,
+                    validate_frequency=self.validate_frequency,
+                    enable_resampling=self.enable_resampling,
+                    resampling_method=self.resampling_method,
                 )
         elif table in ("SImon", "SIday") or table.startswith(_mip_seaice_prefixes):
             self.cmoriser = SeaIce_CMORiser(
diff --git a/src/access_moppy/ocean.py b/src/access_moppy/ocean.py
index 51d5dc23..a685d3e0 100644
--- a/src/access_moppy/ocean.py
+++ b/src/access_moppy/ocean.py
@@ -334,6 +334,9 @@ def __init__(
         vocab: CMIP6Vocabulary,
         variable_mapping: Dict[str, Any],
         drs_root: Optional[Path] = None,
+        validate_frequency: bool = True,
+        enable_resampling: bool = False,
+        resampling_method: str = "auto",
         # Backward compatibility
         input_paths: Optional[Union[str, List[str]]] = None,
     ):
@@ -345,6 +348,9 @@ def __init__(
             vocab=vocab,
             variable_mapping=variable_mapping,
             drs_root=drs_root,
+            validate_frequency=validate_frequency,
+            enable_resampling=enable_resampling,
+            resampling_method=resampling_method,
         )
 
         nominal_resolution = vocab._get_nominal_resolution(target_realm="ocean")
@@ -406,6 +412,9 @@ def __init__(
         vocab: CMIP6Vocabulary,
         variable_mapping: Dict[str, Any],
         drs_root: Optional[Path] = None,
+        validate_frequency: bool = True,
+        enable_resampling: bool = False,
+        resampling_method: str = "auto",
         # Backward compatibility
         input_paths: Optional[Union[str, List[str]]] = None,
     ):
@@ -417,6 +426,9 @@ def __init__(
             vocab=vocab,
             variable_mapping=variable_mapping,
             drs_root=drs_root,
+            validate_frequency=validate_frequency,
+            enable_resampling=enable_resampling,
+            resampling_method=resampling_method,
         )
 
         nominal_resolution = vocab._get_nominal_resolution(target_realm="ocean")
diff --git a/src/access_moppy/utilities.py b/src/access_moppy/utilities.py
index 3207f5b7..36a7cbf0 100644
--- a/src/access_moppy/utilities.py
+++ b/src/access_moppy/utilities.py
@@ -1917,6 +1917,55 @@ def get_resampling_frequency_string(target_freq: pd.Timedelta) -> str:
         return f"{int(years)}YE"
 
 
+def _normalise_calendar_name(calendar: Optional[str]) -> Optional[str]:
+    """Map the non-CF ``"GREGORIAN"`` label to ``"proleptic_gregorian"``.
+
+    ACCESS-ESM1-5 files label their (proleptic-Gregorian) time axis with the
+    non-CF name ``"GREGORIAN"``; ``base.CMORiser._check_calendar`` rewrites this
+    to ``"proleptic_gregorian"`` on the written file. Date arithmetic for bounds
+    and resampling must use the same calendar — otherwise cftime treats
+    ``"GREGORIAN"`` as the mixed Julian/Gregorian ``"standard"`` calendar and
+    shifts pre-1582 dates by ~1 day. All other names are returned unchanged.
+    """
+    return "proleptic_gregorian" if calendar == "GREGORIAN" else calendar
+
+
+def _shift_resampled_time_to_period_midpoint(
+    time_da: xr.DataArray, target_freq: pd.Timedelta
+) -> xr.DataArray:
+    """Move a resampled time coordinate from the period boundary to its midpoint.
+
+    pandas/xarray ``resample`` labels each bin on the period boundary (e.g. the
+    yearly frequency "YE" lands every value on 31 December). The CMOR convention
+    is to centre the time coordinate on the averaging period — a yearly mean sits
+    on ~2 July (12:00 in a 365-day year, 00:00 in a 366-day year), the midpoint of
+    ``[Jan 1, next Jan 1]``. This recomputes the coordinate as that midpoint.
+
+    Sub-daily or unrecognised frequencies are returned unchanged.
+    """
+    days = target_freq.total_seconds() / 86400
+    if 360 <= days <= 366:
+        bounds_fn = _calculate_yearly_bounds
+    elif 28 <= days <= 31:
+        bounds_fn = _calculate_monthly_bounds
+    elif 0.9 <= days <= 1.1:
+        bounds_fn = _calculate_daily_bounds
+    else:
+        return time_da
+
+    values = time_da.values
+    if values.size == 0:
+        return time_da
+    is_cftime = isinstance(values.flat[0], cftime.datetime)
+    calendar = time_da.attrs.get("calendar", "proleptic_gregorian")
+
+    bounds = bounds_fn(values, calendar, is_cftime)
+    midpoints = np.array(
+        [lo + (hi - lo) / 2 for lo, hi in bounds], dtype=values.dtype
+    )
+    return time_da.copy(data=midpoints)
+
+
 def resample_dataset_temporal(
     ds: xr.Dataset,
     target_freq: pd.Timedelta,
@@ -1943,6 +1992,20 @@ def resample_dataset_temporal(
             f"Available coordinates: {sorted(ds.coords)}"
         )
 
+    # xarray's resample requires a monotonic time index. Multi-file inputs supplied
+    # in non-chronological order (e.g. an unsorted glob) concatenate into a
+    # non-monotonic time axis, so sort here before resampling.
+    ds = ds.sortby(time_coord)
+
+    # Normalise the non-CF "GREGORIAN" calendar label to "proleptic_gregorian"
+    # (the calendar the written file ultimately declares) before decoding, so the
+    # resampled values, the period midpoint and the restored encoding are all
+    # computed in that calendar rather than cftime's Julian "standard" reading.
+    if ds[time_coord].attrs.get("calendar") == "GREGORIAN":
+        ds = ds.assign_coords(
+            {time_coord: ds[time_coord].assign_attrs(calendar="proleptic_gregorian")}
+        )
+
     # Convert target frequency to resampling string
     freq_str = get_resampling_frequency_string(target_freq)
 
@@ -2006,6 +2069,35 @@ def resample_dataset_temporal(
             if coord_name != time_coord:
                 ds_resampled[coord_name] = ds[coord_name]
 
+        # Centre the resampled time coordinate on each period's midpoint
+        # (CMOR convention) instead of the period boundary that resample labels it
+        # with (e.g. yearly means on ~2 July rather than 31 December).
+        ds_resampled[time_coord] = _shift_resampled_time_to_period_midpoint(
+            ds_resampled[time_coord], target_freq
+        )
+
+        # Restore the original CF time encoding. decode_cf() moved units/calendar
+        # off the coordinate (into encoding) for the resample, so without this the
+        # written file would have a time axis with no units — CF-invalid. Re-encode
+        # the (cftime) midpoints back to numeric using the input units/calendar and
+        # reattach the original attributes, matching the decode_cf=False pipeline.
+        orig_time_attrs = dict(ds[time_coord].attrs)
+        orig_units = orig_time_attrs.get("units", "")
+        if "since" in orig_units:
+            orig_calendar = orig_time_attrs.get("calendar", "standard")
+            time_vals = ds_resampled[time_coord].values
+            # decode_cf yields cftime for non-standard/pre-1582 calendars but
+            # numpy datetime64 otherwise; date2num needs cftime/datetime objects,
+            # so convert datetime64 to python datetimes first.
+            if time_vals.size and not isinstance(time_vals.flat[0], cftime.datetime):
+                time_vals = pd.to_datetime(time_vals).to_pydatetime()
+            numeric_time = date2num(
+                time_vals, units=orig_units, calendar=orig_calendar
+            )
+            ds_resampled[time_coord] = xr.DataArray(
+                numeric_time, dims=[time_coord], attrs=orig_time_attrs
+            )
+
         # Update attributes
         ds_resampled.attrs = ds.attrs.copy()
 
@@ -2165,7 +2257,10 @@ def normalize_cf_time_units(units: Optional[str]) -> Optional[str]:
 
 
 def calculate_time_bounds(
-    ds: xr.Dataset, time_coord: str = "time", bnds_name: str = "nv"
+    ds: xr.Dataset,
+    time_coord: str = "time",
+    bnds_name: str = "nv",
+    freq_hint: Optional[str] = None,
 ) -> xr.DataArray:
     """
     Calculate time bounds from time coordinate for CMIP6 compliance.
@@ -2186,6 +2281,12 @@ def calculate_time_bounds(
     bnds_name : str, default "nv"
         Name of the bounds dimension. Use "nv" for ocean data (default),
         or "bnds" for atmosphere data
+    freq_hint : str, optional
+        Frequency label ("daily", "monthly", "yearly") used only as a fallback
+        when the frequency cannot be inferred from the time axis itself — i.e.
+        when there is a single time point (e.g. a multi-year input resampled
+        down to one year). Inference from ≥2 points always takes precedence, so
+        this never changes existing multi-point behaviour.
 
     Returns
     -------
@@ -2205,14 +2306,13 @@ def calculate_time_bounds(
     time = ds[time_coord]
     n_times = len(time)
 
-    if n_times < 2:
-        raise ValueError("Need at least 2 time points to infer time bounds")
-
     # Compute only the 1-D time coordinate.  Using .compute().values (rather than
     # plain .values) ensures that only the time coordinate's dask graph is
     # triggered, not any larger graph that happens to reference the same chunks.
     time_values = time.compute().values
-    calendar = time.attrs.get("calendar", "proleptic_gregorian")
+    calendar = _normalise_calendar_name(
+        time.attrs.get("calendar", "proleptic_gregorian")
+    )
     units = time.attrs.get("units")
 
     # Determine the type of time coordinate
@@ -2236,8 +2336,16 @@ def calculate_time_bounds(
         )
         is_cftime = True
 
-    # Try to infer frequency
-    freq = _infer_frequency(time_values)
+    # Infer frequency from the spacing of time points. When only a single time
+    # point is present (e.g. a multi-year input resampled down to one year),
+    # inference returns None; fall back to the caller-supplied frequency hint
+    # (derived from the CMOR table) so per-period bounds can still be built.
+    freq = _infer_frequency(time_values) or freq_hint
+    if freq is None:
+        raise ValueError(
+            "Need at least 2 time points to infer time bounds, or pass freq_hint "
+            "(e.g. 'yearly') derived from the target table frequency"
+        )
 
     # Initialize bounds array
     time_bnds = np.empty((n_times, 2), dtype=object if is_cftime else time_values.dtype)
diff --git a/src/access_moppy/vocabulary_processors.py b/src/access_moppy/vocabulary_processors.py
index 97301069..5cacab93 100644
--- a/src/access_moppy/vocabulary_processors.py
+++ b/src/access_moppy/vocabulary_processors.py
@@ -891,6 +891,7 @@ def generate_filename(
             table_lower = table_name.lower()
             is_subdaily_data = any(freq in table_lower for freq in ["3hr", "6hr", "hr"])
             is_daily_data = "day" in table_lower
+            is_yearly_data = "yr" in table_lower
 
             # Format time range based on frequency
             if is_subdaily_data:
@@ -902,6 +903,9 @@ def generate_filename(
             elif is_daily_data:
                 # Daily data: include day (YYYYMMDD)
                 start, end = [f"{t.year:04d}{t.month:02d}{t.day:02d}" for t in times]
+            elif is_yearly_data:
+                # Yearly data (e.g. Oyr): year only (YYYY)
+                start, end = [f"{t.year:04d}" for t in times]
             else:
                 # Monthly or other data: year and month only (YYYYMM)
                 start, end = [f"{t.year:04d}{t.month:02d}" for t in times]
diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py
index 97cc2369..071242c7 100644
--- a/tests/unit/test_utilities.py
+++ b/tests/unit/test_utilities.py
@@ -26,6 +26,7 @@
     detect_time_frequency_lazy,
     get_requested_variables_from_data_request,
     normalize_cf_time_units,
+    resample_dataset_temporal,
 )
 
 
@@ -46,6 +47,125 @@ def test_insufficient_time_points(self):
         with pytest.raises(ValueError, match="Need at least 2 time points"):
             calculate_time_bounds(ds)
 
+    def test_single_point_with_freq_hint(self):
+        """A single time point succeeds when a frequency hint is supplied
+        (e.g. a multi-year input resampled down to one year, where the
+        frequency cannot be inferred from point spacing)."""
+        ds = xr.Dataset(coords={"time": [np.datetime64("2000-06-15")]})
+
+        time_bnds = calculate_time_bounds(ds, freq_hint="yearly")
+
+        assert time_bnds.shape == (1, 2)
+        assert time_bnds.values[0, 0] == np.datetime64("2000-01-01")
+        assert time_bnds.values[0, 1] == np.datetime64("2001-01-01")
+
+    def test_gregorian_label_treated_as_proleptic(self):
+        """The non-CF "GREGORIAN" label (ACCESS-ESM1-5) must be computed as
+        proleptic_gregorian, matching _check_calendar's rewrite, so pre-1582
+        bounds are not shifted by ~1 day into the Julian "standard" calendar.
+
+        For year 0101, Jan 1 is day 36524 in proleptic_gregorian (36525 in
+        Julian/standard)."""
+        ds = xr.Dataset(
+            coords={
+                "time": (
+                    "time",
+                    [36706.5, 37071.5],  # yearly midpoints, days since 0001-01-01
+                    {"units": "days since 0001-01-01", "calendar": "GREGORIAN"},
+                )
+            }
+        )
+
+        time_bnds = calculate_time_bounds(ds, freq_hint="yearly")
+
+        # proleptic_gregorian year boundaries, not the Julian 36525/36890
+        assert time_bnds.values[0, 0] == 36524.0
+        assert time_bnds.values[0, 1] == 36889.0
+
+
+def test_normalise_calendar_name():
+    """Only the non-CF "GREGORIAN" label is rewritten; other names pass through."""
+    from access_moppy.utilities import _normalise_calendar_name
+
+    assert _normalise_calendar_name("GREGORIAN") == "proleptic_gregorian"
+    assert _normalise_calendar_name("gregorian") == "gregorian"
+    assert _normalise_calendar_name("standard") == "standard"
+    assert _normalise_calendar_name("noleap") == "noleap"
+    assert _normalise_calendar_name(None) is None
+
+
+class TestResampleTimeMidpoint:
+    """Resampling must centre the time coordinate on each period's midpoint
+    (CMOR convention), not the period boundary that resample() labels it with."""
+
+    def test_monthly_to_yearly_lands_on_midyear(self):
+        months = pd.date_range("1950-01-16", "1954-12-16", freq="MS") + pd.Timedelta(
+            days=15
+        )
+        ds = xr.Dataset(
+            {"v": (["time"], np.arange(len(months), dtype="f4"))},
+            coords={"time": ("time", months)},
+        )
+
+        out = resample_dataset_temporal(
+            ds, pd.Timedelta(days=365), "v", "time", "auto"
+        )
+
+        times = pd.to_datetime(out["time"].values)
+        # Every yearly value sits on ~2 July, never on a year boundary.
+        assert all((t.month, t.day) == (7, 2) for t in times)
+        # 365-day years are centred at 12:00; the 366-day leap year (1952) at 00:00.
+        assert times[0].hour == 12  # 1950 (non-leap)
+        assert times[2].hour == 0  # 1952 (leap)
+
+    def test_resample_handles_unsorted_time(self):
+        """Non-chronological multi-file inputs concatenate into a non-monotonic
+        time axis; resampling must sort first rather than raising
+        'Index must be monotonic for resampling'."""
+        months = pd.date_range("1950-01-16", periods=24, freq="MS") + pd.Timedelta(
+            days=15
+        )
+        shuffled = months[np.array([12, 0, 6, 18, 3] + [i for i in range(24) if i not in (12, 0, 6, 18, 3)])]
+        ds = xr.Dataset(
+            {"v": (["time"], np.arange(24, dtype="f4"))},
+            coords={"time": ("time", shuffled)},
+        )
+        assert not pd.Index(ds["time"].values).is_monotonic_increasing
+
+        out = resample_dataset_temporal(
+            ds, pd.Timedelta(days=365), "v", "time", "auto"
+        )
+
+        # Resampling succeeded (no monotonicity error) and produced a sorted axis.
+        assert out.sizes["time"] >= 2
+        assert pd.Index(out["time"].values).is_monotonic_increasing
+
+    def test_resample_preserves_cf_time_units(self):
+        """Resampling decodes time internally; it must restore the original CF
+        units/calendar (as numeric values) so the written file keeps a valid
+        time axis rather than a units-less coordinate."""
+        from cftime import date2num
+
+        months = xr.cftime_range(
+            "1950-01-16", periods=24, freq="MS", calendar="standard"
+        )
+        units = "days since 1900-01-01"
+        numeric = date2num(months.values, units, "standard")
+        ds = xr.Dataset(
+            {"v": (["time"], np.arange(24, dtype="f4"))},
+            coords={
+                "time": ("time", numeric, {"units": units, "calendar": "standard"})
+            },
+        )
+
+        out = resample_dataset_temporal(
+            ds, pd.Timedelta(days=365), "v", "time", "auto"
+        )
+
+        assert out["time"].attrs.get("units") == units
+        assert out["time"].attrs.get("calendar") == "standard"
+        assert np.issubdtype(np.asarray(out["time"].values).dtype, np.floating)
+
 
 class TestCalculateTimeBoundsMonthly:
     """Test monthly frequency time bounds calculation."""
diff --git a/tests/unit/test_vocabulary_processors.py b/tests/unit/test_vocabulary_processors.py
index 3a883522..80bf1817 100644
--- a/tests/unit/test_vocabulary_processors.py
+++ b/tests/unit/test_vocabulary_processors.py
@@ -454,6 +454,30 @@ def test_generate_filename_datetime64_time_branch(vocabulary_instance):
     assert "202001-202002" in filename
 
 
+@pytest.mark.unit
+def test_generate_filename_yearly_year_only(vocabulary_instance):
+    """Yearly tables (Oyr) format the time range as YYYY-YYYY, not YYYYMM."""
+    cf_time = xr.cftime_range("2020-01-01", periods=2, freq="YS", calendar="gregorian")
+    ds = xr.Dataset(
+        {
+            "no3": xr.DataArray(
+                np.array([1.0, 2.0]),
+                dims=["time"],
+                coords={"time": cf_time},
+            )
+        }
+    )
+    attrs = {**_FILENAME_ATTRS, "variable_id": "no3", "table_id": "Oyr"}
+
+    with patch.object(
+        CMIP6Vocabulary, "_load_drs_templates", return_value=_TIME_RANGE_TEMPLATE
+    ):
+        filename = vocabulary_instance.generate_filename(attrs, ds, "no3", "Oyr.no3")
+
+    assert "2020-2021" in filename
+    assert "202001" not in filename  # no month component
+
+
 @pytest.mark.unit
 def test_generate_filename_numeric_time_branch(vocabulary_instance):
     """Numeric float64 time – uses num2date (else) branch."""

From 52d5fe24366afbb7e6170127b8543443090f02c0 Mon Sep 17 00:00:00 2001
From: rhaegar325 <rhaegar227@gmail.com>
Date: Fri, 26 Jun 2026 13:01:39 +1000
Subject: [PATCH 2/4] pre-commit fix

---
 src/access_moppy/utilities.py |  8 ++------
 tests/unit/test_utilities.py  | 18 ++++++++----------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/access_moppy/utilities.py b/src/access_moppy/utilities.py
index 36a7cbf0..bbb00b25 100644
--- a/src/access_moppy/utilities.py
+++ b/src/access_moppy/utilities.py
@@ -1960,9 +1960,7 @@ def _shift_resampled_time_to_period_midpoint(
     calendar = time_da.attrs.get("calendar", "proleptic_gregorian")
 
     bounds = bounds_fn(values, calendar, is_cftime)
-    midpoints = np.array(
-        [lo + (hi - lo) / 2 for lo, hi in bounds], dtype=values.dtype
-    )
+    midpoints = np.array([lo + (hi - lo) / 2 for lo, hi in bounds], dtype=values.dtype)
     return time_da.copy(data=midpoints)
 
 
@@ -2091,9 +2089,7 @@ def resample_dataset_temporal(
             # so convert datetime64 to python datetimes first.
             if time_vals.size and not isinstance(time_vals.flat[0], cftime.datetime):
                 time_vals = pd.to_datetime(time_vals).to_pydatetime()
-            numeric_time = date2num(
-                time_vals, units=orig_units, calendar=orig_calendar
-            )
+            numeric_time = date2num(time_vals, units=orig_units, calendar=orig_calendar)
             ds_resampled[time_coord] = xr.DataArray(
                 numeric_time, dims=[time_coord], attrs=orig_time_attrs
             )
diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py
index 071242c7..8ee9fa9a 100644
--- a/tests/unit/test_utilities.py
+++ b/tests/unit/test_utilities.py
@@ -107,9 +107,7 @@ def test_monthly_to_yearly_lands_on_midyear(self):
             coords={"time": ("time", months)},
         )
 
-        out = resample_dataset_temporal(
-            ds, pd.Timedelta(days=365), "v", "time", "auto"
-        )
+        out = resample_dataset_temporal(ds, pd.Timedelta(days=365), "v", "time", "auto")
 
         times = pd.to_datetime(out["time"].values)
         # Every yearly value sits on ~2 July, never on a year boundary.
@@ -125,16 +123,18 @@ def test_resample_handles_unsorted_time(self):
         months = pd.date_range("1950-01-16", periods=24, freq="MS") + pd.Timedelta(
             days=15
         )
-        shuffled = months[np.array([12, 0, 6, 18, 3] + [i for i in range(24) if i not in (12, 0, 6, 18, 3)])]
+        shuffled = months[
+            np.array(
+                [12, 0, 6, 18, 3] + [i for i in range(24) if i not in (12, 0, 6, 18, 3)]
+            )
+        ]
         ds = xr.Dataset(
             {"v": (["time"], np.arange(24, dtype="f4"))},
             coords={"time": ("time", shuffled)},
         )
         assert not pd.Index(ds["time"].values).is_monotonic_increasing
 
-        out = resample_dataset_temporal(
-            ds, pd.Timedelta(days=365), "v", "time", "auto"
-        )
+        out = resample_dataset_temporal(ds, pd.Timedelta(days=365), "v", "time", "auto")
 
         # Resampling succeeded (no monotonicity error) and produced a sorted axis.
         assert out.sizes["time"] >= 2
@@ -158,9 +158,7 @@ def test_resample_preserves_cf_time_units(self):
             },
         )
 
-        out = resample_dataset_temporal(
-            ds, pd.Timedelta(days=365), "v", "time", "auto"
-        )
+        out = resample_dataset_temporal(ds, pd.Timedelta(days=365), "v", "time", "auto")
 
         assert out["time"].attrs.get("units") == units
         assert out["time"].attrs.get("calendar") == "standard"

From 2008aa00393fe42c8c8b519de0d4b0abe366a5e1 Mon Sep 17 00:00:00 2001
From: rhaegar325 <rhaegar227@gmail.com>
Date: Fri, 26 Jun 2026 13:17:35 +1000
Subject: [PATCH 3/4] test cover update

---
 tests/unit/test_base.py      | 23 +++++++++++++++
 tests/unit/test_utilities.py | 56 ++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py
index 7c0ca085..fda99f2c 100644
--- a/tests/unit/test_base.py
+++ b/tests/unit/test_base.py
@@ -7,6 +7,7 @@
 
 import logging
 from pathlib import Path
+from types import SimpleNamespace
 from unittest.mock import MagicMock, Mock, patch
 
 import dask.array as da
@@ -2602,3 +2603,25 @@ def test_above_max_error_includes_actual_maximum(self):
         msg = str(exc_info.value)
         assert "Actual maximum found" in msg
         assert "9" in msg
+
+
+class TestTargetFrequencyHint:
+    """_target_frequency_hint maps the CMOR table frequency to a coarse label,
+    used only as a single-point fallback in time-bounds construction."""
+
+    @pytest.mark.parametrize(
+        "compound_name, expected",
+        [
+            ("Oyr.no3", "yearly"),
+            ("Omon.tos", "monthly"),
+            ("Oday.tos", "daily"),
+            ("3hr.x", None),  # sub-daily has no coarse bucket
+            ("fx.areacello", None),  # time-independent
+            ("Bogus.zzz", None),  # unparseable table -> None (exception swallowed)
+            (None, None),  # no compound_name
+            ("", None),
+        ],
+    )
+    def test_frequency_hint(self, compound_name, expected):
+        stub = SimpleNamespace(compound_name=compound_name)
+        assert CMORiser._target_frequency_hint(stub) == expected
diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py
index 8ee9fa9a..628e8f8f 100644
--- a/tests/unit/test_utilities.py
+++ b/tests/unit/test_utilities.py
@@ -164,6 +164,62 @@ def test_resample_preserves_cf_time_units(self):
         assert out["time"].attrs.get("calendar") == "standard"
         assert np.issubdtype(np.asarray(out["time"].values).dtype, np.floating)
 
+    def test_resample_gregorian_label_normalised(self):
+        """A "GREGORIAN"-labelled axis (values written by the model in proleptic
+        semantics) must be resampled and declared as proleptic_gregorian, matching
+        _check_calendar, so it is not read as the Julian "standard" calendar."""
+        from cftime import date2num
+
+        months = xr.cftime_range(
+            "1950-01-16", periods=24, freq="MS", calendar="proleptic_gregorian"
+        )
+        units = "days since 1900-01-01"
+        numeric = date2num(months.values, units, "proleptic_gregorian")
+        ds = xr.Dataset(
+            {"v": (["time"], np.arange(24, dtype="f4"))},
+            coords={
+                "time": ("time", numeric, {"units": units, "calendar": "GREGORIAN"})
+            },
+        )
+
+        out = resample_dataset_temporal(ds, pd.Timedelta(days=365), "v", "time", "auto")
+
+        assert out["time"].attrs.get("calendar") == "proleptic_gregorian"
+
+    def test_midpoint_shift_per_frequency(self):
+        """_shift_resampled_time_to_period_midpoint centres monthly/daily periods
+        too, and is a no-op for sub-daily / empty inputs."""
+        from access_moppy.utilities import _shift_resampled_time_to_period_midpoint
+
+        # Monthly: a period label in January -> mid-January (day 16, 12:00).
+        jan = xr.DataArray(
+            xr.cftime_range("2000-01-01", periods=1, calendar="standard").values,
+            dims="time",
+            name="time",
+        )
+        out_mon = _shift_resampled_time_to_period_midpoint(jan, pd.Timedelta(days=30))
+        assert out_mon.values[0].day == 16
+        assert out_mon.values[0].hour == 12
+
+        # Daily: midnight -> noon.
+        day = xr.DataArray(
+            xr.cftime_range("2000-01-01", periods=1, freq="D", calendar="standard").values,
+            dims="time",
+            name="time",
+        )
+        out_day = _shift_resampled_time_to_period_midpoint(day, pd.Timedelta(days=1))
+        assert out_day.values[0].hour == 12
+
+        # Sub-daily target frequency: unchanged (no recognised period).
+        out_noop = _shift_resampled_time_to_period_midpoint(jan, pd.Timedelta(hours=6))
+        assert out_noop.values[0] == jan.values[0]
+
+        # Empty axis: returned unchanged.
+        empty = xr.DataArray(np.array([], dtype=object), dims="time", name="time")
+        assert _shift_resampled_time_to_period_midpoint(
+            empty, pd.Timedelta(days=365)
+        ).size == 0
+
 
 class TestCalculateTimeBoundsMonthly:
     """Test monthly frequency time bounds calculation."""

From 8eab7f2a30ded00b273c1fc4397621531ad7a78a Mon Sep 17 00:00:00 2001
From: rhaegar325 <rhaegar227@gmail.com>
Date: Fri, 26 Jun 2026 13:19:23 +1000
Subject: [PATCH 4/4] test cover update

---
 tests/unit/test_utilities.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py
index 628e8f8f..105d1a82 100644
--- a/tests/unit/test_utilities.py
+++ b/tests/unit/test_utilities.py
@@ -203,7 +203,9 @@ def test_midpoint_shift_per_frequency(self):
 
         # Daily: midnight -> noon.
         day = xr.DataArray(
-            xr.cftime_range("2000-01-01", periods=1, freq="D", calendar="standard").values,
+            xr.cftime_range(
+                "2000-01-01", periods=1, freq="D", calendar="standard"
+            ).values,
             dims="time",
             name="time",
         )
@@ -216,9 +218,10 @@ def test_midpoint_shift_per_frequency(self):
 
         # Empty axis: returned unchanged.
         empty = xr.DataArray(np.array([], dtype=object), dims="time", name="time")
-        assert _shift_resampled_time_to_period_midpoint(
-            empty, pd.Timedelta(days=365)
-        ).size == 0
+        assert (
+            _shift_resampled_time_to_period_midpoint(empty, pd.Timedelta(days=365)).size
+            == 0
+        )
 
 
 class TestCalculateTimeBoundsMonthly: