IMAP-Science-Operations-Center · bryan-harter · Jun 6, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/imap_processing/cli.py b/imap_processing/cli.py
@@ -90,6 +90,7 @@
 from imap_processing.utils import (
     check_epochs_within_day_offsets,
     filter_day_boundary_data,
+    retrieve_mag_l1_inputs_from_l2_offsets,
 )
 
 logger = logging.getLogger(__name__)
@@ -581,7 +582,11 @@ def post_processing(
                 if self.repointing is not None:
                     ds.attrs["Repointing"] = self.repointing
                 ds.attrs["Start_date"] = self.start_date
-                ds.attrs["Parents"] = parent_files
+                # Don't overwrite Parents if processing already set it (e.g.
+                # MAG L2 records the L1 file actually used, not the passed-in
+                # dependency).
+                if "Parents" not in ds.attrs:
+                    ds.attrs["Parents"] = parent_files
                 products.append(write_cdf(ds))
             else:
                 # A path to a product that was already written out
@@ -1369,14 +1374,6 @@ def do_processing(  # noqa: PLR0912
             )
 
         if self.data_level == "l2":
-            science_files = dependencies.get_file_paths(source="mag", data_type="l1b")
-            science_files.extend(
-                dependencies.get_file_paths(source="mag", data_type="l1c")
-            )
-            # TODO: Overwrite dependencies with versions from offsets file
-            # TODO: Ensure that parent_files attribute works with that
-            input_data = load_cdf(science_files[0])
-
             descriptor_no_frame = str.split(self.descriptor, "-")[0]
 
             # We expect either a norm or a burst input descriptor.
@@ -1405,8 +1402,31 @@ def do_processing(  # noqa: PLR0912
 
             combined_calibration = MagAncillaryCombiner(calibration[0], day_buffer)
             offset_dataset = load_cdf(offsets[0].imap_file_paths[0].construct_path())
-            # TODO: get input data from offsets file
-            # TODO: Test data missing
+
+            # The L1B (burst) or L1C (norm) input file is retrieved from the
+            # offsets file's Parents attribute, so the L2 vectors always match
+            # the exact L1 versions the offsets were generated against. This
+            # ignores any L1B/L1C dependencies passed in to processing. If the
+            # offsets file has no Parents, fall back to the passed-in
+            # dependencies.
+            input_files = retrieve_mag_l1_inputs_from_l2_offsets(offset_dataset)
+            if input_files:
+                input_data = load_cdf(input_files[0])
+            else:
+                science_files = dependencies.get_file_paths(
+                    source="mag", data_type="l1b"
+                )
+                science_files.extend(
+                    dependencies.get_file_paths(source="mag", data_type="l1c")
+                )
+                logger.warning(
+                    "Offsets file %s has no Parents attribute; falling back "
+                    "to passed-in L1B/L1C dependencies for MAG L2 input.",
+                    offsets[0].imap_file_paths[0].construct_path().name,
+                )
+                input_files = [science_files[0]]
+                input_data = load_cdf(input_files[0])
+
             datasets = mag_l2(
                 combined_calibration.combined_dataset,
                 offset_dataset,
@@ -1415,6 +1435,19 @@ def do_processing(  # noqa: PLR0912
                 mode=DataMode(descriptor_no_frame.upper()),
             )
 
+            # Record the L1 file actually used (from the offsets file's
+            # Parents) in place of the passed-in L1B/L1C dependencies, so the
+            # product provenance matches the data that went into it.
+            # post_processing leaves an existing Parents attribute untouched.
+            l2_parents = [
+                file_path.name
+                for file_path in dependencies.get_file_paths()
+                if not file_path.name.startswith(("imap_mag_l1b_", "imap_mag_l1c_"))
+            ]
+            l2_parents.append(input_files[0].name)
+            for dataset in datasets:
+                dataset.attrs["Parents"] = l2_parents
+
         for ds in datasets:
             if "raw" not in ds.attrs["Logical_source"] and not np.all(
                 ds["epoch"].values[1:] > ds["epoch"].values[:-1]

diff --git a/imap_processing/tests/test_utils.py b/imap_processing/tests/test_utils.py
@@ -1,5 +1,6 @@
 """Tests coverage for imap_processing/utils.py"""
 
+from pathlib import Path
 from unittest import mock
 
 import numpy as np
@@ -460,3 +461,38 @@ def test_check_epochs_within_day(epoch_ns, raises):
                 check_epochs_within_day_offsets([ds], day)
         else:
             check_epochs_within_day_offsets([ds], day)
+
+
+def test_retrieve_mag_l1_inputs_from_l2_offsets():
+    """Parents are downloaded in order; single-string and missing handled."""
+    parents = [
+        "imap_mag_l1c_norm-mago_20250928_v008.cdf",
+        "imap_mag_l1b_burst-mago_20250928_v004.cdf",
+    ]
+
+    # Multiple parents: each is downloaded, paths returned in listed order.
+    ds = xr.Dataset()
+    ds.attrs["Parents"] = parents
+    with mock.patch(
+        "imap_processing.utils.download",
+        side_effect=lambda name: Path("/data") / name,
+    ) as mock_download:
+        result = utils.retrieve_mag_l1_inputs_from_l2_offsets(ds)
+    assert result == [Path("/data") / p for p in parents]
+    assert [call.args[0] for call in mock_download.call_args_list] == parents
+
+    # load_cdf collapses a single-element attribute to a scalar string.
+    single = xr.Dataset()
+    single.attrs["Parents"] = parents[0]
+    with mock.patch(
+        "imap_processing.utils.download",
+        side_effect=lambda name: Path("/data") / name,
+    ):
+        result = utils.retrieve_mag_l1_inputs_from_l2_offsets(single)
+    assert result == [Path("/data") / parents[0]]
+
+    # No Parents attribute -> empty list, no downloads attempted.
+    with mock.patch("imap_processing.utils.download") as mock_download:
+        result = utils.retrieve_mag_l1_inputs_from_l2_offsets(xr.Dataset())
+    assert result == []
+    mock_download.assert_not_called()
diff --git a/imap_processing/utils.py b/imap_processing/utils.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import space_packet_parser as spp
 import xarray as xr
+from imap_data_access.io import download
 from space_packet_parser.exceptions import UnrecognizedPacketTypeError
 from space_packet_parser.generators.ccsds import SequenceFlags
 from space_packet_parser.xtce import definitions, encodings, parameter_types
@@ -656,3 +657,38 @@ def check_epochs_within_day_offsets(
                     f"Data in {dataset_logical_id} contains epochs more than"
                     f" 24 hours outside the expected processing day {day}."
                 )
+
+
+def retrieve_mag_l1_inputs_from_l2_offsets(
+    l2_offsets_ds: xr.Dataset,
+) -> list[Path]:
+    """
+    Download the L1B/L1C parent files referenced by an L2 offsets file.
+
+    MAG ``l2-{norm,burst}-offsets`` ancillary files carry a ``Parents``
+    global attribute listing the exact L1B/L1C science files the offsets
+    were generated against. This reads that attribute, downloads each
+    referenced file from the SDC (skipped if already present locally), and
+    returns the local paths so L2 can use the matching science inputs rather
+    than any passed-in dependencies.
+
+    Parameters
+    ----------
+    l2_offsets_ds : xr.Dataset
+        The loaded ``l2-{norm,burst}-offsets`` ancillary dataset.
+
+    Returns
+    -------
+    list[pathlib.Path]
+        Local paths to the downloaded parent files, in the order listed in
+        the ``Parents`` attribute. Empty if the dataset has no ``Parents``
+        attribute.
+    """
+    parent_files = l2_offsets_ds.attrs.get("Parents", None)
+    if parent_files is None:
+        return []
+    # load_cdf collapses a single-element attribute to a scalar string.
+    if isinstance(parent_files, str):
+        parent_files = [parent_files]
+
+    return [download(parent) for parent in parent_files]