Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 44 additions & 11 deletions imap_processing/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
from imap_processing.utils import (
check_epochs_within_day_offsets,
filter_day_boundary_data,
retrieve_mag_l1_inputs_from_l2_offsets,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -581,7 +582,11 @@ def post_processing(
if self.repointing is not None:
ds.attrs["Repointing"] = self.repointing
ds.attrs["Start_date"] = self.start_date
ds.attrs["Parents"] = parent_files
# Don't overwrite Parents if processing already set it (e.g.
# MAG L2 records the L1 file actually used, not the passed-in
# dependency).
if "Parents" not in ds.attrs:
ds.attrs["Parents"] = parent_files
products.append(write_cdf(ds))
else:
# A path to a product that was already written out
Expand Down Expand Up @@ -1369,14 +1374,6 @@ def do_processing( # noqa: PLR0912
)

if self.data_level == "l2":
science_files = dependencies.get_file_paths(source="mag", data_type="l1b")
science_files.extend(
dependencies.get_file_paths(source="mag", data_type="l1c")
)
# TODO: Overwrite dependencies with versions from offsets file
# TODO: Ensure that parent_files attribute works with that
input_data = load_cdf(science_files[0])

descriptor_no_frame = str.split(self.descriptor, "-")[0]

# We expect either a norm or a burst input descriptor.
Expand Down Expand Up @@ -1405,8 +1402,31 @@ def do_processing( # noqa: PLR0912

combined_calibration = MagAncillaryCombiner(calibration[0], day_buffer)
offset_dataset = load_cdf(offsets[0].imap_file_paths[0].construct_path())
# TODO: get input data from offsets file
# TODO: Test data missing

# The L1B (burst) or L1C (norm) input file is retrieved from the
# offsets file's Parents attribute, so the L2 vectors always match
# the exact L1 versions the offsets were generated against. This
# ignores any L1B/L1C dependencies passed in to processing. If the
# offsets file has no Parents, fall back to the passed-in
# dependencies.
input_files = retrieve_mag_l1_inputs_from_l2_offsets(offset_dataset)
if input_files:
input_data = load_cdf(input_files[0])
else:
science_files = dependencies.get_file_paths(
source="mag", data_type="l1b"
)
science_files.extend(
dependencies.get_file_paths(source="mag", data_type="l1c")
)
logger.warning(
"Offsets file %s has no Parents attribute; falling back "
"to passed-in L1B/L1C dependencies for MAG L2 input.",
offsets[0].imap_file_paths[0].construct_path().name,
)
input_files = [science_files[0]]
input_data = load_cdf(input_files[0])

datasets = mag_l2(
combined_calibration.combined_dataset,
offset_dataset,
Expand All @@ -1415,6 +1435,19 @@ def do_processing( # noqa: PLR0912
mode=DataMode(descriptor_no_frame.upper()),
)

# Record the L1 file actually used (from the offsets file's
# Parents) in place of the passed-in L1B/L1C dependencies, so the
# product provenance matches the data that went into it.
# post_processing leaves an existing Parents attribute untouched.
l2_parents = [
file_path.name
for file_path in dependencies.get_file_paths()
if not file_path.name.startswith(("imap_mag_l1b_", "imap_mag_l1c_"))
]
l2_parents.append(input_files[0].name)
for dataset in datasets:
dataset.attrs["Parents"] = l2_parents

for ds in datasets:
if "raw" not in ds.attrs["Logical_source"] and not np.all(
ds["epoch"].values[1:] > ds["epoch"].values[:-1]
Expand Down
36 changes: 36 additions & 0 deletions imap_processing/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Tests coverage for imap_processing/utils.py"""

from pathlib import Path
from unittest import mock

import numpy as np
Expand Down Expand Up @@ -460,3 +461,38 @@ def test_check_epochs_within_day(epoch_ns, raises):
check_epochs_within_day_offsets([ds], day)
else:
check_epochs_within_day_offsets([ds], day)


def test_retrieve_mag_l1_inputs_from_l2_offsets():
"""Parents are downloaded in order; single-string and missing handled."""
parents = [
"imap_mag_l1c_norm-mago_20250928_v008.cdf",
"imap_mag_l1b_burst-mago_20250928_v004.cdf",
]

# Multiple parents: each is downloaded, paths returned in listed order.
ds = xr.Dataset()
ds.attrs["Parents"] = parents
with mock.patch(
"imap_processing.utils.download",
side_effect=lambda name: Path("/data") / name,
) as mock_download:
result = utils.retrieve_mag_l1_inputs_from_l2_offsets(ds)
assert result == [Path("/data") / p for p in parents]
assert [call.args[0] for call in mock_download.call_args_list] == parents

# load_cdf collapses a single-element attribute to a scalar string.
single = xr.Dataset()
single.attrs["Parents"] = parents[0]
with mock.patch(
"imap_processing.utils.download",
side_effect=lambda name: Path("/data") / name,
):
result = utils.retrieve_mag_l1_inputs_from_l2_offsets(single)
assert result == [Path("/data") / parents[0]]

# No Parents attribute -> empty list, no downloads attempted.
with mock.patch("imap_processing.utils.download") as mock_download:
result = utils.retrieve_mag_l1_inputs_from_l2_offsets(xr.Dataset())
assert result == []
mock_download.assert_not_called()
36 changes: 36 additions & 0 deletions imap_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pandas as pd
import space_packet_parser as spp
import xarray as xr
from imap_data_access.io import download
from space_packet_parser.exceptions import UnrecognizedPacketTypeError
from space_packet_parser.generators.ccsds import SequenceFlags
from space_packet_parser.xtce import definitions, encodings, parameter_types
Expand Down Expand Up @@ -656,3 +657,38 @@ def check_epochs_within_day_offsets(
f"Data in {dataset_logical_id} contains epochs more than"
f" 24 hours outside the expected processing day {day}."
)


def retrieve_mag_l1_inputs_from_l2_offsets(
l2_offsets_ds: xr.Dataset,
) -> list[Path]:
"""
Download the L1B/L1C parent files referenced by an L2 offsets file.

MAG ``l2-{norm,burst}-offsets`` ancillary files carry a ``Parents``
global attribute listing the exact L1B/L1C science files the offsets
were generated against. This reads that attribute, downloads each
referenced file from the SDC (skipped if already present locally), and
returns the local paths so L2 can use the matching science inputs rather
than any passed-in dependencies.

Parameters
----------
l2_offsets_ds : xr.Dataset
The loaded ``l2-{norm,burst}-offsets`` ancillary dataset.

Returns
-------
list[pathlib.Path]
Local paths to the downloaded parent files, in the order listed in
the ``Parents`` attribute. Empty if the dataset has no ``Parents``
attribute.
"""
parent_files = l2_offsets_ds.attrs.get("Parents", None)
if parent_files is None:
return []
# load_cdf collapses a single-element attribute to a scalar string.
if isinstance(parent_files, str):
parent_files = [parent_files]

return [download(parent) for parent in parent_files]
Loading