diff --git a/imap_processing/cli.py b/imap_processing/cli.py index 74be1892b..8e1bafc49 100644 --- a/imap_processing/cli.py +++ b/imap_processing/cli.py @@ -90,6 +90,7 @@ from imap_processing.utils import ( check_epochs_within_day_offsets, filter_day_boundary_data, + retrieve_mag_l1_inputs_from_l2_offsets, ) logger = logging.getLogger(__name__) @@ -581,7 +582,11 @@ def post_processing( if self.repointing is not None: ds.attrs["Repointing"] = self.repointing ds.attrs["Start_date"] = self.start_date - ds.attrs["Parents"] = parent_files + # Don't overwrite Parents if processing already set it (e.g. + # MAG L2 records the L1 file actually used, not the passed-in + # dependency). + if "Parents" not in ds.attrs: + ds.attrs["Parents"] = parent_files products.append(write_cdf(ds)) else: # A path to a product that was already written out @@ -1369,14 +1374,6 @@ def do_processing( # noqa: PLR0912 ) if self.data_level == "l2": - science_files = dependencies.get_file_paths(source="mag", data_type="l1b") - science_files.extend( - dependencies.get_file_paths(source="mag", data_type="l1c") - ) - # TODO: Overwrite dependencies with versions from offsets file - # TODO: Ensure that parent_files attribute works with that - input_data = load_cdf(science_files[0]) - descriptor_no_frame = str.split(self.descriptor, "-")[0] # We expect either a norm or a burst input descriptor. @@ -1405,8 +1402,31 @@ def do_processing( # noqa: PLR0912 combined_calibration = MagAncillaryCombiner(calibration[0], day_buffer) offset_dataset = load_cdf(offsets[0].imap_file_paths[0].construct_path()) - # TODO: get input data from offsets file - # TODO: Test data missing + + # The L1B (burst) or L1C (norm) input file is retrieved from the + # offsets file's Parents attribute, so the L2 vectors always match + # the exact L1 versions the offsets were generated against. This + # ignores any L1B/L1C dependencies passed in to processing. If the + # offsets file has no Parents, fall back to the passed-in + # dependencies. + input_files = retrieve_mag_l1_inputs_from_l2_offsets(offset_dataset) + if input_files: + input_data = load_cdf(input_files[0]) + else: + science_files = dependencies.get_file_paths( + source="mag", data_type="l1b" + ) + science_files.extend( + dependencies.get_file_paths(source="mag", data_type="l1c") + ) + logger.warning( + "Offsets file %s has no Parents attribute; falling back " + "to passed-in L1B/L1C dependencies for MAG L2 input.", + offsets[0].imap_file_paths[0].construct_path().name, + ) + input_files = [science_files[0]] + input_data = load_cdf(input_files[0]) + datasets = mag_l2( combined_calibration.combined_dataset, offset_dataset, @@ -1415,6 +1435,19 @@ def do_processing( # noqa: PLR0912 mode=DataMode(descriptor_no_frame.upper()), ) + # Record the L1 file actually used (from the offsets file's + # Parents) in place of the passed-in L1B/L1C dependencies, so the + # product provenance matches the data that went into it. + # post_processing leaves an existing Parents attribute untouched. + l2_parents = [ + file_path.name + for file_path in dependencies.get_file_paths() + if not file_path.name.startswith(("imap_mag_l1b_", "imap_mag_l1c_")) + ] + l2_parents.append(input_files[0].name) + for dataset in datasets: + dataset.attrs["Parents"] = l2_parents + for ds in datasets: if "raw" not in ds.attrs["Logical_source"] and not np.all( ds["epoch"].values[1:] > ds["epoch"].values[:-1] diff --git a/imap_processing/tests/test_utils.py b/imap_processing/tests/test_utils.py index 2f65770de..9e521c7b4 100644 --- a/imap_processing/tests/test_utils.py +++ b/imap_processing/tests/test_utils.py @@ -1,5 +1,6 @@ """Tests coverage for imap_processing/utils.py""" +from pathlib import Path from unittest import mock import numpy as np @@ -460,3 +461,38 @@ def test_check_epochs_within_day(epoch_ns, raises): check_epochs_within_day_offsets([ds], day) else: check_epochs_within_day_offsets([ds], day) + + +def test_retrieve_mag_l1_inputs_from_l2_offsets(): + """Parents are downloaded in order; single-string and missing handled.""" + parents = [ + "imap_mag_l1c_norm-mago_20250928_v008.cdf", + "imap_mag_l1b_burst-mago_20250928_v004.cdf", + ] + + # Multiple parents: each is downloaded, paths returned in listed order. + ds = xr.Dataset() + ds.attrs["Parents"] = parents + with mock.patch( + "imap_processing.utils.download", + side_effect=lambda name: Path("/data") / name, + ) as mock_download: + result = utils.retrieve_mag_l1_inputs_from_l2_offsets(ds) + assert result == [Path("/data") / p for p in parents] + assert [call.args[0] for call in mock_download.call_args_list] == parents + + # load_cdf collapses a single-element attribute to a scalar string. + single = xr.Dataset() + single.attrs["Parents"] = parents[0] + with mock.patch( + "imap_processing.utils.download", + side_effect=lambda name: Path("/data") / name, + ): + result = utils.retrieve_mag_l1_inputs_from_l2_offsets(single) + assert result == [Path("/data") / parents[0]] + + # No Parents attribute -> empty list, no downloads attempted. + with mock.patch("imap_processing.utils.download") as mock_download: + result = utils.retrieve_mag_l1_inputs_from_l2_offsets(xr.Dataset()) + assert result == [] + mock_download.assert_not_called() diff --git a/imap_processing/utils.py b/imap_processing/utils.py index 7412580a9..9526dba46 100644 --- a/imap_processing/utils.py +++ b/imap_processing/utils.py @@ -10,6 +10,7 @@ import pandas as pd import space_packet_parser as spp import xarray as xr +from imap_data_access.io import download from space_packet_parser.exceptions import UnrecognizedPacketTypeError from space_packet_parser.generators.ccsds import SequenceFlags from space_packet_parser.xtce import definitions, encodings, parameter_types @@ -656,3 +657,38 @@ def check_epochs_within_day_offsets( f"Data in {dataset_logical_id} contains epochs more than" f" 24 hours outside the expected processing day {day}." ) + + +def retrieve_mag_l1_inputs_from_l2_offsets( + l2_offsets_ds: xr.Dataset, +) -> list[Path]: + """ + Download the L1B/L1C parent files referenced by an L2 offsets file. + + MAG ``l2-{norm,burst}-offsets`` ancillary files carry a ``Parents`` + global attribute listing the exact L1B/L1C science files the offsets + were generated against. This reads that attribute, downloads each + referenced file from the SDC (skipped if already present locally), and + returns the local paths so L2 can use the matching science inputs rather + than any passed-in dependencies. + + Parameters + ---------- + l2_offsets_ds : xr.Dataset + The loaded ``l2-{norm,burst}-offsets`` ancillary dataset. + + Returns + ------- + list[pathlib.Path] + Local paths to the downloaded parent files, in the order listed in + the ``Parents`` attribute. Empty if the dataset has no ``Parents`` + attribute. + """ + parent_files = l2_offsets_ds.attrs.get("Parents", None) + if parent_files is None: + return [] + # load_cdf collapses a single-element attribute to a scalar string. + if isinstance(parent_files, str): + parent_files = [parent_files] + + return [download(parent) for parent in parent_files]