From 2b3517cf4aff9b6ecf023d69282dfce50cc579ae Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Fri, 24 Apr 2026 15:47:43 -0400 Subject: [PATCH 1/2] added validator to disallow sample.metadata.source|sample_id for create and update --- src/schema/provenance_schema.yaml | 4 ++++ src/schema/schema_validators.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 5a8fe45b..dc623da3 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -1078,6 +1078,10 @@ ENTITIES: type: json_string indexed: true description: "The sample specific metadata derived from the uploaded sample_metadata.tsv file" + before_property_create_validators: + - validate_sample_metadata_dissalowed_fields + before_property_update_validators: + - validate_sample_metadata_dissalowed_fields rui_location: type: json_string indexed: true diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 0ca4acb5..77282c50 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -175,6 +175,38 @@ def validate_ids_exist_and_datasets(property_key, normalized_entity_type, reques raise ValueError(f"The following {len(unqualified_uuids_list)} uuids are either not found or not Dataset type: {str(unqualified_uuids_list)}.") +""" +Validate provided data does not include include metadata.source_id or metadata.sample_id + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + Submission +request: Flask request object + The instance of Flask request passed in from application request +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + The json data in request body, already after the regular validations +""" +def validate_sample_metadata_dissalowed_fields(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + data = new_data_dict + disallowed_values = ['source_id', 'sample_id'] + invalid_entries = [] + metadata = data.get('metadata') + if metadata and isinstance(metadata, dict): + metadata_fields = metadata.keys() + normalized_metadata_fields = [f.lower() for f in metadata_fields] + for field in normalized_metadata_fields: + if field in disallowed_values: + invalid_entries.append(field) + if len(invalid_entries) > 0: + raise ValueError(f'Disallowed field(s) inside Sample.metadata: {", ".join(invalid_entries)}') + + + """ Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset From f9ab8f88808605236ec96d1ef63b3898722c310f Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Fri, 24 Apr 2026 15:50:51 -0400 Subject: [PATCH 2/2] tweaked comment block to use more general language for validate_sample_metadata_disalllowed_fields --- src/schema/provenance_schema.yaml | 4 ++-- src/schema/schema_validators.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index dc623da3..3107832f 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -1079,9 +1079,9 @@ ENTITIES: indexed: true description: "The sample specific metadata derived from the uploaded sample_metadata.tsv file" before_property_create_validators: - - validate_sample_metadata_dissalowed_fields + - validate_sample_metadata_disallowed_fields before_property_update_validators: - - validate_sample_metadata_dissalowed_fields + - validate_sample_metadata_disallowed_fields rui_location: type: json_string indexed: true diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 77282c50..4ec3cb21 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -176,7 +176,7 @@ def validate_ids_exist_and_datasets(property_key, normalized_entity_type, reques """ -Validate provided data does not include include metadata.source_id or metadata.sample_id +Validate provided data does not include include prohibited fields (e.g. sample_id, source_id) Parameters ---------- @@ -191,7 +191,7 @@ def validate_ids_exist_and_datasets(property_key, normalized_entity_type, reques new_data_dict : dict The json data in request body, already after the regular validations """ -def validate_sample_metadata_dissalowed_fields(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): +def validate_sample_metadata_disallowed_fields(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): data = new_data_dict disallowed_values = ['source_id', 'sample_id'] invalid_entries = []