From 593f6aae01b23f33e5603e942239aa579d411980 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 27 Mar 2026 15:59:43 -0400 Subject: [PATCH 1/2] wip: updated logic for batch ids query --- src/app_neo4j_queries.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 8fc130b0..0daba1c2 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -1198,19 +1198,15 @@ def get_entities_by_uuid(neo4j_driver, """ def get_batch_ids(neo4j_driver, id_list): query = """ - MATCH (e) - WHERE e.uuid IN $id_list OR e.hubmap_id IN $id_list - WITH e, [id IN $id_list WHERE id = e.uuid OR id = e.hubmap_id][0] AS original_id - RETURN original_id, e.uuid AS uuid, e.hubmap_id AS hubmap_id + UNWIND $id_list AS uid + MATCH (e:Entity {uuid: uid}) + RETURN uid AS uuid, e.hubmap_id AS hubmap_id """ - result_map = {} + result_dict = {} with neo4j_driver.session() as session: result = session.run(query, id_list=id_list) for record in result: - result_map[record['original_id']] = { - "uuid": record['uuid'], - "hubmap_id": record['hubmap_id'] - } - return result_map \ No newline at end of file + result_dict[record["uuid"]] = record["hubmap_id"] + return result_dict \ No newline at end of file From ace7caac10fa695e7cf66971bf951d6ea2ff9f9c Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 31 Mar 2026 00:33:58 -0400 Subject: [PATCH 2/2] updated query to massively improve the speed of fetching metadata by not needing to page through the result set --- src/app_neo4j_queries.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 0daba1c2..2975c908 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -1198,15 +1198,12 @@ def get_entities_by_uuid(neo4j_driver, """ def get_batch_ids(neo4j_driver, id_list): query = """ - UNWIND $id_list AS uid - MATCH (e:Entity {uuid: uid}) - RETURN uid AS uuid, e.hubmap_id AS hubmap_id + MATCH (e:Entity) + WHERE e.uuid IN $id_list + RETURN apoc.map.fromPairs(COLLECT([e.uuid, e.hubmap_id])) AS result """ - result_dict = {} - with neo4j_driver.session() as session: - result = session.run(query, id_list=id_list) - for record in result: - result_dict[record["uuid"]] = record["hubmap_id"] - return result_dict \ No newline at end of file + record = session.run(query, id_list=id_list) + raw = record.single()["result"] + return {uuid: {"uuid": uuid, "hubmap_id": hubmap_id} for uuid, hubmap_id in raw.items()}