From 470e518b5013d6f0b03d71a489d62eae1fb3ef45 Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Mon, 20 Apr 2026 14:11:02 +0100 Subject: [PATCH] Rework ``strict_config`` This PR allows for more granular control over what arrays to include in a combination, and deprecates the old "strict_config" flag. The config_definitions now has a 'requires' keyword for interferometric configs, which works by using a pipe ("|") like an OR statement. So for instance, you can now set up a 12m config like this: interf_config 12m {'array_tags':['12m_1','12m_2']} interf_config 12m {'requires':['12m_1|12m_2']} which will read as you require at least one of 12m_1 or 12m_2. If a particular array isn't included anywhere in the 'requires' list, it's assumed to be required to be present. So in a more complicated case: interf_config 12m+7m {'array_tags':['12m_1','12m_2','7m']} interf_config 12m+7m {'requires':['12m_1|12m_2']} requires at least one of 12m_1 or 12m_2, AND also requires 7m. This means that the old, default strict_config=True behaviour is maintained just by not including this new 'requires' line, and strict_config=False can be maintained by (e.g. for 12m+7m): interf_config 12m+7m {'requires':['12m_1|12m_2|7m']} Trying to set strict_config to True or False in the VisHandler now raises a DeprecationWarning, and this new 'requires' behaviour is documented both in the KeyHandler and VisHandler. Importantly, this enables us to image datasets where we might have 2 12m configs in some cases but not others, but you don't want to allow it to image everything if e.g. the 7m is missing (the use case here is SSCALES). I've tested this on a number of cases and seems to be working as expected, though it would be good to get some eyes on just to make sure this is robust. - Rework ``strict_config`` in the KeyHandler and VisHandler - Update scripts to remove strict_config calls - Update CHANGELOG.md --- CHANGELOG.md | 3 +- phangsPipeline/handlerKeys.py | 150 +++++++++++++------- phangsPipeline/handlerVis.py | 63 ++++---- phangsPipeline/utilsKeyReaders.py | 1 + scripts/run_casa_imaging_chunked_example.py | 8 +- scripts/run_casa_imaging_chunked_ngc1097.py | 8 +- scripts/run_casa_test_imaging_example.py | 8 +- 7 files changed, 140 insertions(+), 101 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a72166b..b92fba6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Action to automatically add Dependabot updates to changelog (#330) +- ``requires`` keywords in ``config_definitions``, deprecating ``strict_config`` for more per-config control (#329). +- Action to automatically add Dependabot updates to changelog (#330). - Initial test suite, which checks the various CASA tasks and arguments used throughout the pipeline (#324). - Initial pip-installable version (#292). - Added more detailed instructions for installing analysisUtils (#340). diff --git a/phangsPipeline/handlerKeys.py b/phangsPipeline/handlerKeys.py index 360b1ed..0e01735 100644 --- a/phangsPipeline/handlerKeys.py +++ b/phangsPipeline/handlerKeys.py @@ -3,8 +3,8 @@ etc. This is the program that navigates the galaxy list, directory structure, etc. This part is pure python. """ - import ast +import copy import logging import os @@ -2087,6 +2087,23 @@ def get_array_tags_for_config(self, config=None): return None + def get_require_tags_for_config(self, + config=None, + ): + """ + Get the list of required array tags associated with an interferometric configuration. + """ + if config is None: + logging.error("Please specify a config.") + return None + + if 'interf_config' in self._config_dict: + if config in self._config_dict['interf_config']: + if 'requires' in self._config_dict['interf_config'][config]: + return self._config_dict['interf_config'][config]['requires'] + + return [] + def get_timebin_for_array_tag(self, array_tag=None): """ Get the timebin for an array tag. Returns 0s by default. @@ -2137,10 +2154,9 @@ def loop_over_input_ms( config=None, project=None, check_linmos=False, - strict_config=True, ): """ - Loop over the the target name, project tag, array tag, and + Loop over the target name, project tag, array tag, and obsnum for each input visibility file. If a target is supplied then restrict to that target, trying to match to a linear mosaic if the target is not represented in the dictionary. If @@ -2148,11 +2164,16 @@ def loop_over_input_ms( contribute to that configuration. Note that the interaction with configs that contain multiple - arrays is tricky. By default, if "strict_config" is TRUE, it - will only loop over targets that have data from ALL arrays in - the configuration. For example to be included in a "12m+7m" - configuration you need both "12m" AND "7m" data. Set - strict_config to FALSE to adjust this behaviour. + arrays is tricky. Use the 'requires' keyword in the + config_definitions file to control this. By default, we require ALL + arrays that make up the configuration, but this can be changed + to an OR if you only need one of a certain combination, e.g. + + interf_config 12m {'array_tags':['12m_1','12m_2']} + interf_config 12m {'requires':['12m_1|12m_2']} + + requires only one of 12m_1 or 12m_2 to be present. If an array is not + in the 'requires' list, then it is assumed to be required. """ # if user has input a target or target list, match to it @@ -2224,43 +2245,40 @@ def loop_over_input_ms( if not (this_target in just_targets): continue - # If we're being strict, only consider targets that have - # data associated with the user-supplied configs. - - if strict_config: - - # This list holds the valid array tags for this target. + # This list holds the valid array tags for this target. + valid_arraytags = [] - valid_arraytags = [] - - # This mode only works with a user-supplied list of - # configs. Else we loop over all measurement sets. + # This mode only works with a user-supplied list of + # configs. Else we loop over all measurement sets. + if config is not None: + if type(config) == type(''): + input_configs = [config] + elif type(config) == type([]): + input_configs = config + else: + logger.error("Expected list or string.") + raise Exception("Expected list or string.") - if config is not None: - if type(config) == type(''): - input_configs = [config] - elif type(config) == type([]): - input_configs = config - else: - logger.error("Expected list or string.") - raise Exception("Expected list or string.") + has_data_for_any_config = False - has_data_for_any_config = False + # Check if the target has data for that configuration + for this_config in input_configs: - # Check if the target has data for that configuration - for this_config in input_configs: + if self.has_data_for_config(target=this_target, config=this_config): + has_data_for_any_config = True - if self.has_data_for_config(target=this_target, config=this_config, strict=True): - has_data_for_any_config = True + # Note the array tags in this, known to be valid, configuration + this_arraytags = self.get_array_tags_for_config(this_config) + if not isinstance(this_arraytags, list): + raise TypeError("Expected list of array tags.") - # Note the array tags in this, known to be valid, configuration - for this_arraytag in self.get_array_tags_for_config(this_config): - if valid_arraytags.count(this_arraytag) == 0: - valid_arraytags.append(this_arraytag) + for this_arraytag in this_arraytags: + if valid_arraytags.count(this_arraytag) == 0: + valid_arraytags.append(this_arraytag) - # If there are no valid configurations skip. - if not has_data_for_any_config: - continue + # If there are no valid configurations skip. + if not has_data_for_any_config: + continue # loop over projects project_list = list(self._ms_dict[this_target].keys()) @@ -2280,12 +2298,11 @@ def loop_over_input_ms( if not (this_arraytag in just_arraytags): continue - if strict_config and config is not None: + if config is not None: if valid_arraytags.count(this_arraytag) == 0: continue # loop over obs nums - obsnum_list = list(self._ms_dict[this_target][this_project][this_arraytag].keys()) obsnum_list.sort() for this_obsnum in obsnum_list: @@ -2363,7 +2380,6 @@ def has_data_for_config( self, target=None, config=None, - strict=True, ): """ Test whether a target has data for a configuration in the ms @@ -2379,6 +2395,39 @@ def has_data_for_config( return None config_array_tags = self.get_array_tags_for_config(config) + if not isinstance(config_array_tags, list): + raise TypeError("Expected list of array tags.") + + config_require_tags = self.get_require_tags_for_config(config) + if not isinstance(config_require_tags, list): + raise TypeError("Expected list of require tags.") + + # Combine these into a list of (potentially) lists + final_config_array_tags = [] + for config_array_tag in config_array_tags: + + found_in_require = False + + for config_require_tag in config_require_tags: + + # Split these at the pipe, and see if the current + # array tag is already in the list + crt_split = config_require_tag.split('|') + if config_array_tag in crt_split: + + found_in_require = True + + # If we don't already have this in the list, + # then append + if crt_split not in final_config_array_tags: + final_config_array_tags.append(crt_split) + + # If we haven't found a match, then append the original + # array tag (as a single-entry list) + if not found_in_require: + final_config_array_tags.append([config_array_tag]) + + config_array_tags = copy.deepcopy(final_config_array_tags) arraytags_for_target = [] @@ -2392,7 +2441,6 @@ def has_data_for_config( for this_arraytag in self._ms_dict[this_target][this_project].keys(): arraytags_for_target.append(this_arraytag) - has_any = False missing_any = False for this_config_arraytag in config_array_tags: @@ -2401,25 +2449,17 @@ def has_data_for_config( for this_target_arraytag in arraytags_for_target: - if this_config_arraytag == this_target_arraytag: + if this_target_arraytag in this_config_arraytag: missing_this_one = False - has_any = True if missing_this_one: missing_any = True - if strict: - if missing_any: - return False - else: - return True - else: - if has_any: - return True - else: - return False + # If we don't match everything, return a false + if missing_any: + return False - return False + return True def get_field_for_input_ms( self, diff --git a/phangsPipeline/handlerVis.py b/phangsPipeline/handlerVis.py index ee51a29..331557b 100644 --- a/phangsPipeline/handlerVis.py +++ b/phangsPipeline/handlerVis.py @@ -88,24 +88,38 @@ def loop_stage_uvdata( intent=None, timebin=None, just_projects=None, - strict_config=True, require_full_line_coverage=False, require_full_cont_coverage=False, - overwrite=False): + overwrite=False, + strict_config=None, + ): """ Loops over the full set of targets, products, and configurations to run the uv data processing. Toggle the parts of the loop using the do_XXX booleans. Other choices affect the algorithms used. - The strict_config option sets whether to require that a target has data - from ALL arrays that make up the configuration (True) or not (False). + This strict_config option has now been deprecated in favour of + more granular control. Use the 'requires' keyword in the + config_definitions file instead. By default, we require ALL + arrays that make up the configuration, but this can be changed + to an OR if you only need one of a certain combination, e.g. + + interf_config 12m {'array_tags':['12m_1','12m_2']} + interf_config 12m {'requires':['12m_1|12m_2']} + + requires only one of 12m_1 or 12m_2 to be present. If an array is not + in the 'requires' list, then it is assumed to be required. The require_full_line_coverage option sets whether to require a measurement set to completely cover a given line's frequency range (True) or not (False). """ + if strict_config is not None: + raise DeprecationWarning("strict_config has been deprectated. " + "Use 'requires' in config_definitions instead.") + if make_directories: self._kh.make_missing_directories(imaging=True) @@ -131,7 +145,7 @@ def loop_stage_uvdata( target=target_list, config=config_list, project=just_projects, - strict_config=strict_config): + ): for this_product in product_list: @@ -185,16 +199,6 @@ def loop_stage_uvdata( just_line=True, just_interf=True): - if strict_config: - # this seems like it doesn't do anything - do we - # actually want a test here and if we do shouldn't - # it be checking if this is false then - # continuing? In theory this is checked above. - self._kh.has_data_for_config( - target=this_target, - config=this_config, - strict=True) - if do_extract_line: if this_product in self._kh.get_line_products(): @@ -209,7 +213,7 @@ def loop_stage_uvdata( # could add algorithm flags here require_full_line_coverage=require_full_line_coverage, overwrite=overwrite, - strict_config=strict_config) + ) for this_target, this_product, this_config in \ self.looper( @@ -219,13 +223,6 @@ def loop_stage_uvdata( just_cont=True, just_interf=True): - # Same as above - check / revise - if strict_config: - self._kh.has_data_for_config( - target=this_target, - config=this_config, - strict=True) - if do_extract_cont: if this_product in self._kh.get_continuum_products(): @@ -237,7 +234,7 @@ def loop_stage_uvdata( do_statwt=statwt_cont, require_full_cont_coverage=require_full_cont_coverage, overwrite=overwrite, - strict_config=strict_config) + ) # Clean up the staged measurement sets. They cost time to # re-split, but have a huge disk imprint and are redundant @@ -248,7 +245,7 @@ def loop_stage_uvdata( target=target_list, config=config_list, project=just_projects, - strict_config=strict_config): + ): for this_product in product_list: @@ -259,7 +256,7 @@ def loop_stage_uvdata( array_tag=this_array_tag, obsnum=this_obsnum, product=this_product, - strict_config=strict_config) + ) return () @@ -424,7 +421,7 @@ def remove_staged_products( obsnum=None, product=None, extra_ext='', - strict_config=True): + ): """ Remove 'staged' visibility products, which are intermediate between the calibrated data and the concated measurement sets @@ -477,7 +474,7 @@ def task_concat_uvdata( extra_ext_in='', extra_ext_out='', overwrite=False, - strict_config=True): + ): """ Concatenate all measurement sets for the supplied target+config+product combination. @@ -507,7 +504,7 @@ def task_concat_uvdata( target=target, config=config, project=just_projects, - strict_config=strict_config): + ): this_staged_ms = fnames.get_staged_msname( target=this_target, project=this_project, @@ -718,7 +715,7 @@ def task_extract_line( method="regrid_then_rebin", require_full_line_coverage=False, overwrite=False, - strict_config=True): + ): """ Extract spectral line data from ms data for the input target, config and product. @@ -771,7 +768,7 @@ def task_extract_line( self._kh.loop_over_input_ms(target=[target], config=[config], project=None, - strict_config=strict_config): + ): # The name of the staged measurement set with this # combination of target, project, array, obsnum. @@ -918,7 +915,7 @@ def task_extract_continuum( method="regrid_then_rebin", require_full_cont_coverage=False, overwrite=False, - strict_config=True): + ): """ Extract continuum data from ms data for the input target, config, and product. @@ -953,7 +950,7 @@ def task_extract_continuum( self._kh.loop_over_input_ms(target=[target], config=[config], project=None, - strict_config=strict_config): + ): # The name of the staged measurement set with this # combination of target, project, array, obsnum. diff --git a/phangsPipeline/utilsKeyReaders.py b/phangsPipeline/utilsKeyReaders.py index 5955d45..f698574 100644 --- a/phangsPipeline/utilsKeyReaders.py +++ b/phangsPipeline/utilsKeyReaders.py @@ -734,6 +734,7 @@ def read_config_key(fname='', existing_dict=None, delim=None): if this_type == "interf_config": expected_params = { 'array_tags': [], + 'requires': [], 'res_min_arcsec': 0.0, 'res_max_arcsec': 0.0, 'res_min_pc': 0.0, diff --git a/scripts/run_casa_imaging_chunked_example.py b/scripts/run_casa_imaging_chunked_example.py index f0f374c..2b4f2da 100644 --- a/scripts/run_casa_imaging_chunked_example.py +++ b/scripts/run_casa_imaging_chunked_example.py @@ -130,22 +130,22 @@ this_uvh.loop_stage_uvdata(do_copy=True, do_contsub=True, do_extract_line=False, do_extract_cont=False, do_remove_staging=False, overwrite=True, - strict_config=False) + ) this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, do_extract_line=True, do_extract_cont=False, do_remove_staging=False, overwrite=True, - strict_config=False) + ) this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, do_extract_line=False, do_extract_cont=True, do_remove_staging=False, overwrite=True, - strict_config=False) + ) this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, do_extract_line=False, do_extract_cont=False, do_remove_staging=True, overwrite=True, - strict_config=False) + ) ############################################################################## # Step through imaging diff --git a/scripts/run_casa_imaging_chunked_ngc1097.py b/scripts/run_casa_imaging_chunked_ngc1097.py index 3dcf19c..4da7cd7 100644 --- a/scripts/run_casa_imaging_chunked_ngc1097.py +++ b/scripts/run_casa_imaging_chunked_ngc1097.py @@ -138,22 +138,22 @@ this_uvh.loop_stage_uvdata(do_copy=True, do_contsub=True, do_extract_line=False, do_extract_cont=False, do_remove_staging=False, overwrite=True, - strict_config=False) + ) this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, do_extract_line=True, do_extract_cont=False, do_remove_staging=False, overwrite=True, - strict_config=False) + ) this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, do_extract_line=False, do_extract_cont=True, do_remove_staging=False, overwrite=True, - strict_config=False) + ) this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, do_extract_line=False, do_extract_cont=False, do_remove_staging=True, overwrite=True, - strict_config=False) + ) ############################################################################## # Step through imaging diff --git a/scripts/run_casa_test_imaging_example.py b/scripts/run_casa_test_imaging_example.py index f831f13..5655bb6 100644 --- a/scripts/run_casa_test_imaging_example.py +++ b/scripts/run_casa_test_imaging_example.py @@ -131,22 +131,22 @@ this_uvh.loop_stage_uvdata(do_copy=True, do_contsub=True, do_extract_line=False, do_extract_cont=False, do_remove_staging=False, overwrite=True, - strict_config=False) + ) this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, do_extract_line=True, do_extract_cont=False, do_remove_staging=False, overwrite=True, - strict_config=False) + ) # this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, # do_extract_line=False, do_extract_cont=True, # do_remove_staging=False, overwrite=True, - # strict_config=False) + # ) # this_uvh.loop_stage_uvdata(do_copy=False, do_contsub=False, # do_extract_line=False, do_extract_cont=False, # do_remove_staging=True, overwrite=True, - # strict_config=False) + # ) ############################################################################## # Step through imaging