From 2ac57507aad91ead3c7039511b61a9934707a95e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joan=20H=C3=A9risson?= <joan.herisson@univ-evry.fr>
Date: Thu, 21 May 2026 14:31:45 +0200
Subject: [PATCH 1/3] fix(rr_cache/rr_cache.py): add path to input filename

---
 rr_cache/rr_cache.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rr_cache/rr_cache.py b/rr_cache/rr_cache.py
index 086c513..87a6a79 100644
--- a/rr_cache/rr_cache.py
+++ b/rr_cache/rr_cache.py
@@ -797,7 +797,7 @@ def _gen_reactions(
                         else:
                             reactions = getattr(
                                 rrCache, "_m_" + attribute + "_reactions_legacy"
-                            )(dep_file, logger=logger)
+                            )(_dep_file, logger=logger)
                             logger.debug("   Writing data to file...")
                             rrCache._store_cache_to_file(
                                 reactions, outfile, logger=logger
@@ -1458,6 +1458,7 @@ def _m_rr_reactions_legacy(
                     "rel_direction": int(row["Rule_relative_direction"]),
                     "left": {row["Substrate_ID"]: 1},
                     "right": products,
+                    "ec_numbers": row["EC_number"].split(",") if row["EC_number"] else [],
                 }
 
             except ValueError:
@@ -1469,6 +1470,8 @@ def _m_rr_reactions_legacy(
                     "Problem converting rule_score: " + str(row["Score_normalized"])
                 )
 
+        print(rr_reactions)
+        exit()
         return rr_reactions
 
     ## Generate complete reactions from the rxn_recipes.tsv from RetroRules

From fe1586108b568520c0b6621c370479f7b2b0c7c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joan=20H=C3=A9risson?= <joan.herisson@univ-evry.fr>
Date: Thu, 21 May 2026 14:32:47 +0200
Subject: [PATCH 2/3] feat: add missing reactions in rules from mnx

---
 rr_cache/config/config_rr2026.json |   3 +-
 rr_cache/rr_cache.py               | 309 ++++++++++++++++++++++++++++-
 tests/data/metrics_rr2026.json     |   2 +-
 tests/test_rrCache.py              |  18 ++
 4 files changed, 326 insertions(+), 6 deletions(-)

diff --git a/rr_cache/config/config_rr2026.json b/rr_cache/config/config_rr2026.json
index db5b10a..16de2f7 100644
--- a/rr_cache/config/config_rr2026.json
+++ b/rr_cache/config/config_rr2026.json
@@ -44,7 +44,8 @@
             "url": "https://www.metanetx.org/ftp/4.5/",
             "files": {
                 "chem_xref.tsv": "2b88dfe3b990ee85c8d8eb51f6d7079bf042b14e4cd11a4cc6ceb841686f9ea7ef4a04358363561fbbb2187a9870a1930037a030283908788a572e7b96b91ec7",
-                "chem_prop.tsv": "00d8d09d52884961748d032288781c4d47929f12d5052b367116c411194fdfae879b15844e8a15bccd494c26702dae57fd682c3bc03b5fee3c3e92e138c1eac2"
+                "chem_prop.tsv": "00d8d09d52884961748d032288781c4d47929f12d5052b367116c411194fdfae879b15844e8a15bccd494c26702dae57fd682c3bc03b5fee3c3e92e138c1eac2",
+                "reac_prop.tsv": "1796b5d77c707d0cfbfce99cf0466ef6aff0fe61ea5ded043216aa334e5ecd1388364d214162342f470b51eca2dab4309dcec2b9e074abee47dcae974c674bd7"
             }
         },
         "rr2": {
diff --git a/rr_cache/rr_cache.py b/rr_cache/rr_cache.py
index 87a6a79..d76b9cc 100644
--- a/rr_cache/rr_cache.py
+++ b/rr_cache/rr_cache.py
@@ -12,6 +12,7 @@
 from re import findall as re_findall
 
 # from time       import time as time_time
+import requests
 from requests import exceptions as r_exceptions
 from hashlib import sha512
 from pathlib import Path
@@ -296,15 +297,316 @@ def get_list_of_compounds(self):
     def get_reaction(self, rxn_id: str):
         return self.__get_object("template_reactions", rxn_id)
 
+    def add_reaction(self, rxn, persist: bool = True):
+        """Add a reaction to `template_reactions` and optionally persist the cache.
+
+        The `rxn` argument may be either:
+          - a dict-like object containing at least `left` and `right` mappings (and
+            optional `direction`, `main_left`, `main_right`), or
+          - a string reaction id (e.g. 'MNXR12345') — in which case this will only
+            work if a local `reac_prop.tsv` exists in the input-cache (no download).
+
+        This function no longer attempts to download `reac_prop.tsv`; download
+        and scanning should be handled by `add_missing_reactions`.
+
+        Args:
+            rxn: Reaction data (dict-like or string id)
+            persist (bool): Whether to persist changes to disk (default: True). Set to False
+                           to keep changes in memory only, useful when adding multiple reactions
+                           to avoid repeated file write operations.
+
+        Returns the reaction dict on success, or None on failure.
+        """
+        # Ensure template_reactions are loaded
+        try:
+            if not self.__hasattr("template_reactions"):
+                self.Load(attrs=["template_reactions"])
+            tr = self.get("template_reactions") or {}
+        except Exception as e:
+            self.logger.error(f"Failed to load template_reactions: {e}")
+            return None
+
+        # If passed a dict-like reaction, insert directly
+        if isinstance(rxn, dict):
+            # try to detect rxn_id inside dict
+            rxn_id = rxn.get("id") or rxn.get("rxn_id")
+            if not rxn_id:
+                self.logger.error("Reaction dict must include an 'id' or 'rxn_id' key")
+                return None
+            if rxn_id in tr:
+                return tr[rxn_id]
+
+            # Minimal validation: require left/right
+            if "left" not in rxn or "right" not in rxn:
+                self.logger.error(
+                    "Reaction dict must contain 'left' and 'right' mappings"
+                )
+                return None
+
+            tr[rxn_id] = {
+                "left": rxn.get("left", {}),
+                "right": rxn.get("right", {}),
+                "direction": rxn.get("direction", 0),
+                "main_left": rxn.get("main_left", []),
+                "main_right": rxn.get("main_right", []),
+            }
+
+            # persist to in-memory cache
+            try:
+                self.set("template_reactions", tr)
+                # persist to disk if requested
+                if persist:
+                    outfile = os_path.join(
+                        self.__cache_dir,
+                        rrCache.__cache["template_reactions"]["file"]["name"],
+                    )
+                    rrCache._store_cache_to_file(tr, outfile, logger=self.logger)
+            except Exception as e:
+                self.logger.warning(f"Failed to persist template_reactions cache: {e}")
+            return tr[rxn_id]
+
+        # If passed a string id, try local lookup only (no download)
+        if isinstance(rxn, str):
+            rxn_id = rxn
+            if rxn_id in tr:
+                return tr[rxn_id]
+
+            reac_prop_file = os_path.join(self.__input__cache_dir, "reac_prop.tsv")
+            if not os_path.exists(reac_prop_file):
+                self.logger.error(
+                    "Local reac_prop.tsv not found — cannot add reaction by id. "
+                    "Use add_missing_reactions to download/scan and add reactions."
+                )
+                return None
+
+            reaction = rrCache._m_mnx_reaction_from_reac_prop(
+                reac_prop_file, rxn_id, logger=self.logger
+            )
+            if reaction is None:
+                self.logger.warning(
+                    f"Reaction {rxn_id} not found in local reac_prop.tsv"
+                )
+                return None
+
+            # reuse dict insertion path
+            return self.add_reaction({"id": rxn_id, **reaction}, persist=persist)
+
+        self.logger.error("add_reaction expects a dict-like or string reaction id")
+        return None
+
+    @staticmethod
+    def _m_mnx_reaction_from_reac_prop(
+        reac_prop_path: str,
+        rxn_id: str,
+        logger: Logger = getLogger(__name__),
+    ) -> Dict:
+        if not os_path.exists(reac_prop_path):
+            logger.error(f"MetaNetX reaction file not found: {reac_prop_path}")
+            return None
+
+        with open(reac_prop_path, "rt", encoding="utf-8-sig") as f:
+            reader = csv_reader(f, delimiter="\t")
+            header = None
+            for row in reader:
+                if not row or len(row) == 0:
+                    continue
+                if row[0].startswith("#ID"):
+                    header = [h.lstrip("#") for h in row]
+                    continue
+                if row[0].startswith("#"):
+                    continue
+                if header is None:
+                    continue
+
+                row_dict = {
+                    header[i]: row[i] if i < len(row) else ""
+                    for i in range(len(header))
+                }
+                if row_dict.get("ID") != rxn_id:
+                    continue
+
+                equation = row_dict.get("mnx_equation") or row_dict.get("equation")
+                if not equation:
+                    logger.warning(
+                        f"No equation found for reaction {rxn_id} in reac_prop.tsv"
+                    )
+                    return None
+
+                parsed = rrCache._read_equation(equation, rxn_id, logger)
+                if parsed is None:
+                    return None
+
+                left = parsed.get("left", {})
+                right = parsed.get("right", {})
+                main_left = [next(iter(left.keys()))] if left else []
+                main_right = [next(iter(right.keys()))] if right else []
+
+                return {
+                    "left": left,
+                    "right": right,
+                    "direction": 0,
+                    "main_left": main_left,
+                    "main_right": main_right,
+                }
+
+        return None
+
     def get_list_of_reactions(self):
         return self.__get_list_of_objects("template_reactions")
 
+    def add_missing_reactions(self, limit: int = None, logger: Logger = None) -> int:
+        """Scan `reac_prop.tsv` (download if missing) for reactions absent from
+        `template_reactions`, build reaction dicts, and add them via
+        `add_reaction` one by one.
+
+        Args:
+            limit (int, optional): Maximum number of reactions to add. None means no limit.
+            logger (Logger, optional): Logger to use. Defaults to self.logger.
+
+        Returns:
+            int: Number of reactions successfully added.
+        """
+        if logger is None:
+            logger = self.logger
+
+        # Ensure template_reactions loaded
+        try:
+            if not self.__hasattr("template_reactions"):
+                self.Load(attrs=["template_reactions"])
+            tr = self.get("template_reactions") or {}
+        except Exception as e:
+            logger.error(f"Failed to load template_reactions: {e}")
+            return 0
+
+        # Ensure reac_prop.tsv exists (try configured download, then fallback)
+        reac_prop_file = os_path.join(self.__input__cache_dir, "reac_prop.tsv")
+        if not os_path.exists(reac_prop_file):
+            try:
+                mnx_source = rrCache.__cache_sources.get("mnx", {})
+                reac_fingerprint = mnx_source.get("files", {}).get("reac_prop.tsv")
+                rrCache._download_if_not_exists_or_corrupted(
+                    mnx_source.get("url", ""),
+                    "reac_prop.tsv",
+                    self.__input__cache_dir,
+                    reac_fingerprint,
+                    logger=logger,
+                )
+            except Exception:
+                # fallback direct download using config URL
+                try:
+                    mnx_source = rrCache.__cache_sources.get("mnx", {})
+                    # base_url = mnx_source.get("url", "https://www.metanetx.org/ftp/4.5/")
+                    base_url = mnx_source.get("url")
+                    fallback_url = f"{base_url}reac_prop.tsv"
+                    logger.debug(
+                        f"Downloading fallback reac_prop.tsv from {fallback_url}"
+                    )
+                    r = requests.get(fallback_url, timeout=30)
+                    r.raise_for_status()
+                    if not os_path.isdir(self.__input__cache_dir):
+                        makedirs(self.__input__cache_dir, exist_ok=True)
+                    with open(reac_prop_file, "w", encoding="utf-8") as f:
+                        f.write(r.text)
+                except Exception as e2:
+                    logger.error(f"Cannot retrieve MetaNetX reac_prop.tsv: {e2}")
+                    return 0
+
+        to_add = []
+        try:
+            with open(reac_prop_file, "rt", encoding="utf-8-sig") as f:
+                reader = csv_reader(f, delimiter="\t")
+                header = None
+                for row in reader:
+                    if not row or len(row) == 0:
+                        continue
+                    if row[0].startswith("#ID"):
+                        header = [h.lstrip("#") for h in row]
+                        continue
+                    if row[0].startswith("#"):
+                        continue
+                    if header is None:
+                        continue
+
+                    rxn_id = row[0]
+                    if rxn_id in tr:
+                        continue
+
+                    # Build row dict and parse equation
+                    row_dict = {
+                        header[i]: row[i] if i < len(row) else ""
+                        for i in range(len(header))
+                    }
+                    equation = row_dict.get("mnx_equation") or row_dict.get("equation")
+                    if not equation:
+                        logger.debug(f"Skipping {rxn_id}: no equation")
+                        continue
+
+                    parsed = rrCache._read_equation(equation, rxn_id, logger)
+                    if parsed is None:
+                        logger.debug(f"Skipping {rxn_id}: failed to parse equation")
+                        continue
+
+                    left = parsed.get("left", {})
+                    right = parsed.get("right", {})
+                    main_left = [next(iter(left.keys()))] if left else []
+                    main_right = [next(iter(right.keys()))] if right else []
+
+                    to_add.append(
+                        {
+                            "id": rxn_id,
+                            "left": left,
+                            "right": right,
+                            "direction": 0,
+                            "main_left": main_left,
+                            "main_right": main_right,
+                        }
+                    )
+                    if limit is not None and len(to_add) >= limit:
+                        break
+        except Exception as e:
+            logger.error(f"Error while scanning reac_prop.tsv: {e}")
+            return 0
+
+        added = 0
+        for rxn_dict in to_add:
+            res = self.add_reaction(rxn_dict, persist=False)
+            if res is not None:
+                added += 1
+
+        # Store cache to file once after all reactions are added
+        if added > 0:
+            try:
+                outfile = os_path.join(
+                    self.__cache_dir,
+                    rrCache.__cache["template_reactions"]["file"]["name"],
+                )
+                tr = self.get("template_reactions") or {}
+                rrCache._store_cache_to_file(tr, outfile, logger=logger)
+            except Exception as e:
+                logger.warning(f"Failed to persist template_reactions cache: {e}")
+
+        return added
+
     def get_reaction_rule(self, rr_id: str):
         return self.__get_object("rr_reactions", rr_id)
 
     def get_list_of_reaction_rules(self):
         return self.__get_list_of_objects("rr_reactions")
 
+    def __contains__(self, id: str) -> bool:
+        if not isinstance(id, str):
+            return False
+
+        for attr in ("cid_strc", "template_reactions", "rr_reactions"):
+            try:
+                if not self.__hasattr(attr):
+                    self.Load(attrs=[attr])
+                if id in self.get(attr):
+                    return True
+            except Exception:
+                continue
+        return False
+
     def __get_object(self, attr: str, id: str):
         try:
             if not self.__hasattr(attr):
@@ -503,6 +805,8 @@ def Build(self, interactive: bool = DEFAULTS["interactive"]) -> None:
                 logger=self.logger,
             )  # , deprecatedCID_cid, deprecatedRID_rid, logger)
             print_progress(self.logger)
+        # Add missing reactions from MetaNetX
+        self.add_missing_reactions(logger=self.logger)
         try:
             rrCache._gen_comp_xref_deprecatedCompID_compid(
                 self.__input__cache_dir, self.__cache_dir, self.logger
@@ -797,7 +1101,7 @@ def _gen_reactions(
                         else:
                             reactions = getattr(
                                 rrCache, "_m_" + attribute + "_reactions_legacy"
-                            )(_dep_file, logger=logger)
+                            )(dep_file, logger=logger)
                             logger.debug("   Writing data to file...")
                             rrCache._store_cache_to_file(
                                 reactions, outfile, logger=logger
@@ -1458,7 +1762,6 @@ def _m_rr_reactions_legacy(
                     "rel_direction": int(row["Rule_relative_direction"]),
                     "left": {row["Substrate_ID"]: 1},
                     "right": products,
-                    "ec_numbers": row["EC_number"].split(",") if row["EC_number"] else [],
                 }
 
             except ValueError:
@@ -1470,8 +1773,6 @@ def _m_rr_reactions_legacy(
                     "Problem converting rule_score: " + str(row["Score_normalized"])
                 )
 
-        print(rr_reactions)
-        exit()
         return rr_reactions
 
     ## Generate complete reactions from the rxn_recipes.tsv from RetroRules
diff --git a/tests/data/metrics_rr2026.json b/tests/data/metrics_rr2026.json
index 4e32edf..00090e6 100644
--- a/tests/data/metrics_rr2026.json
+++ b/tests/data/metrics_rr2026.json
@@ -8,7 +8,7 @@
         "file_size": 243759725
     },
     "template_reactions": {
-        "length": 42700,
+        "length": 95420,
         "file_size": 8519350
     },
     "rr_reactions": {
diff --git a/tests/test_rrCache.py b/tests/test_rrCache.py
index 12eabb1..2f9ca5f 100644
--- a/tests/test_rrCache.py
+++ b/tests/test_rrCache.py
@@ -159,6 +159,24 @@ def test_get_reaction_rule(caches, reference_data, cspace: str, rule_id: str):
     assert caches[cspace].get_reaction_rule(rule_id) == retrorules[rule_id]
 
 
+@pytest.mark.parametrize("cspace, cmpd_id", COMPOUND_CASES)
+def test_contains_compound_in_cache(caches, cspace: str, cmpd_id: str):
+    assert cmpd_id in caches[cspace]
+    assert "NOT_A_VALID_ID" not in caches[cspace]
+
+
+@pytest.mark.parametrize("cspace, rxn_id", REACTION_CASES)
+def test_contains_reaction_in_cache(caches, cspace: str, rxn_id: str):
+    assert rxn_id in caches[cspace]
+    assert "NOT_A_VALID_ID" not in caches[cspace]
+
+
+@pytest.mark.parametrize("cspace, rule_id", RULE_CASES)
+def test_contains_rule_in_cache(caches, cspace: str, rule_id: str):
+    assert rule_id in caches[cspace]
+    assert "NOT_A_VALID_ID" not in caches[cspace]
+
+
 @pytest.mark.parametrize("cspace, rule_id", RULE_CASES)
 def test_get_list_of_reaction_rules(caches, reference_data, cspace: str, rule_id: str):
     rule_ids = caches[cspace].get_list_of_reaction_rules()

From ca6744bbe0335f93fade705f4643f8f293103699 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joan=20H=C3=A9risson?= <joan.herisson@univ-evry.fr>
Date: Fri, 22 May 2026 22:23:06 +0200
Subject: [PATCH 3/3] chore: insert reac_prop into config file

---
 rr_cache/config/config_rr2026.json |   2 +-
 rr_cache/rr_cache.py               | 574 +++++++++++------------------
 tests/data/metrics_rr2026.json     |   2 +-
 3 files changed, 210 insertions(+), 368 deletions(-)

diff --git a/rr_cache/config/config_rr2026.json b/rr_cache/config/config_rr2026.json
index 16de2f7..d43945d 100644
--- a/rr_cache/config/config_rr2026.json
+++ b/rr_cache/config/config_rr2026.json
@@ -31,7 +31,7 @@
         },
         "template_reactions": {
             "deps": {
-                "file_deps": ["templates_metadata"]
+                "file_deps": ["templates_metadata", "reac_prop.tsv"]
             },
             "file": {
                 "url": "",
diff --git a/rr_cache/rr_cache.py b/rr_cache/rr_cache.py
index d76b9cc..af2d5a2 100644
--- a/rr_cache/rr_cache.py
+++ b/rr_cache/rr_cache.py
@@ -7,12 +7,15 @@
     MolToInchiKey,
 )
 from csv import DictReader as csv_DictReader, reader as csv_reader
+from pandas import (
+    read_csv as pd_read_csv,
+    DataFrame,
+)
+from io import StringIO
 from json import dump as json_dump, dumps as json_dumps, load as json_load
 from gzip import open as gzip_open, GzipFile
 from re import findall as re_findall
 
-# from time       import time as time_time
-import requests
 from requests import exceptions as r_exceptions
 from hashlib import sha512
 from pathlib import Path
@@ -297,103 +300,6 @@ def get_list_of_compounds(self):
     def get_reaction(self, rxn_id: str):
         return self.__get_object("template_reactions", rxn_id)
 
-    def add_reaction(self, rxn, persist: bool = True):
-        """Add a reaction to `template_reactions` and optionally persist the cache.
-
-        The `rxn` argument may be either:
-          - a dict-like object containing at least `left` and `right` mappings (and
-            optional `direction`, `main_left`, `main_right`), or
-          - a string reaction id (e.g. 'MNXR12345') — in which case this will only
-            work if a local `reac_prop.tsv` exists in the input-cache (no download).
-
-        This function no longer attempts to download `reac_prop.tsv`; download
-        and scanning should be handled by `add_missing_reactions`.
-
-        Args:
-            rxn: Reaction data (dict-like or string id)
-            persist (bool): Whether to persist changes to disk (default: True). Set to False
-                           to keep changes in memory only, useful when adding multiple reactions
-                           to avoid repeated file write operations.
-
-        Returns the reaction dict on success, or None on failure.
-        """
-        # Ensure template_reactions are loaded
-        try:
-            if not self.__hasattr("template_reactions"):
-                self.Load(attrs=["template_reactions"])
-            tr = self.get("template_reactions") or {}
-        except Exception as e:
-            self.logger.error(f"Failed to load template_reactions: {e}")
-            return None
-
-        # If passed a dict-like reaction, insert directly
-        if isinstance(rxn, dict):
-            # try to detect rxn_id inside dict
-            rxn_id = rxn.get("id") or rxn.get("rxn_id")
-            if not rxn_id:
-                self.logger.error("Reaction dict must include an 'id' or 'rxn_id' key")
-                return None
-            if rxn_id in tr:
-                return tr[rxn_id]
-
-            # Minimal validation: require left/right
-            if "left" not in rxn or "right" not in rxn:
-                self.logger.error(
-                    "Reaction dict must contain 'left' and 'right' mappings"
-                )
-                return None
-
-            tr[rxn_id] = {
-                "left": rxn.get("left", {}),
-                "right": rxn.get("right", {}),
-                "direction": rxn.get("direction", 0),
-                "main_left": rxn.get("main_left", []),
-                "main_right": rxn.get("main_right", []),
-            }
-
-            # persist to in-memory cache
-            try:
-                self.set("template_reactions", tr)
-                # persist to disk if requested
-                if persist:
-                    outfile = os_path.join(
-                        self.__cache_dir,
-                        rrCache.__cache["template_reactions"]["file"]["name"],
-                    )
-                    rrCache._store_cache_to_file(tr, outfile, logger=self.logger)
-            except Exception as e:
-                self.logger.warning(f"Failed to persist template_reactions cache: {e}")
-            return tr[rxn_id]
-
-        # If passed a string id, try local lookup only (no download)
-        if isinstance(rxn, str):
-            rxn_id = rxn
-            if rxn_id in tr:
-                return tr[rxn_id]
-
-            reac_prop_file = os_path.join(self.__input__cache_dir, "reac_prop.tsv")
-            if not os_path.exists(reac_prop_file):
-                self.logger.error(
-                    "Local reac_prop.tsv not found — cannot add reaction by id. "
-                    "Use add_missing_reactions to download/scan and add reactions."
-                )
-                return None
-
-            reaction = rrCache._m_mnx_reaction_from_reac_prop(
-                reac_prop_file, rxn_id, logger=self.logger
-            )
-            if reaction is None:
-                self.logger.warning(
-                    f"Reaction {rxn_id} not found in local reac_prop.tsv"
-                )
-                return None
-
-            # reuse dict insertion path
-            return self.add_reaction({"id": rxn_id, **reaction}, persist=persist)
-
-        self.logger.error("add_reaction expects a dict-like or string reaction id")
-        return None
-
     @staticmethod
     def _m_mnx_reaction_from_reac_prop(
         reac_prop_path: str,
@@ -454,139 +360,6 @@ def _m_mnx_reaction_from_reac_prop(
     def get_list_of_reactions(self):
         return self.__get_list_of_objects("template_reactions")
 
-    def add_missing_reactions(self, limit: int = None, logger: Logger = None) -> int:
-        """Scan `reac_prop.tsv` (download if missing) for reactions absent from
-        `template_reactions`, build reaction dicts, and add them via
-        `add_reaction` one by one.
-
-        Args:
-            limit (int, optional): Maximum number of reactions to add. None means no limit.
-            logger (Logger, optional): Logger to use. Defaults to self.logger.
-
-        Returns:
-            int: Number of reactions successfully added.
-        """
-        if logger is None:
-            logger = self.logger
-
-        # Ensure template_reactions loaded
-        try:
-            if not self.__hasattr("template_reactions"):
-                self.Load(attrs=["template_reactions"])
-            tr = self.get("template_reactions") or {}
-        except Exception as e:
-            logger.error(f"Failed to load template_reactions: {e}")
-            return 0
-
-        # Ensure reac_prop.tsv exists (try configured download, then fallback)
-        reac_prop_file = os_path.join(self.__input__cache_dir, "reac_prop.tsv")
-        if not os_path.exists(reac_prop_file):
-            try:
-                mnx_source = rrCache.__cache_sources.get("mnx", {})
-                reac_fingerprint = mnx_source.get("files", {}).get("reac_prop.tsv")
-                rrCache._download_if_not_exists_or_corrupted(
-                    mnx_source.get("url", ""),
-                    "reac_prop.tsv",
-                    self.__input__cache_dir,
-                    reac_fingerprint,
-                    logger=logger,
-                )
-            except Exception:
-                # fallback direct download using config URL
-                try:
-                    mnx_source = rrCache.__cache_sources.get("mnx", {})
-                    # base_url = mnx_source.get("url", "https://www.metanetx.org/ftp/4.5/")
-                    base_url = mnx_source.get("url")
-                    fallback_url = f"{base_url}reac_prop.tsv"
-                    logger.debug(
-                        f"Downloading fallback reac_prop.tsv from {fallback_url}"
-                    )
-                    r = requests.get(fallback_url, timeout=30)
-                    r.raise_for_status()
-                    if not os_path.isdir(self.__input__cache_dir):
-                        makedirs(self.__input__cache_dir, exist_ok=True)
-                    with open(reac_prop_file, "w", encoding="utf-8") as f:
-                        f.write(r.text)
-                except Exception as e2:
-                    logger.error(f"Cannot retrieve MetaNetX reac_prop.tsv: {e2}")
-                    return 0
-
-        to_add = []
-        try:
-            with open(reac_prop_file, "rt", encoding="utf-8-sig") as f:
-                reader = csv_reader(f, delimiter="\t")
-                header = None
-                for row in reader:
-                    if not row or len(row) == 0:
-                        continue
-                    if row[0].startswith("#ID"):
-                        header = [h.lstrip("#") for h in row]
-                        continue
-                    if row[0].startswith("#"):
-                        continue
-                    if header is None:
-                        continue
-
-                    rxn_id = row[0]
-                    if rxn_id in tr:
-                        continue
-
-                    # Build row dict and parse equation
-                    row_dict = {
-                        header[i]: row[i] if i < len(row) else ""
-                        for i in range(len(header))
-                    }
-                    equation = row_dict.get("mnx_equation") or row_dict.get("equation")
-                    if not equation:
-                        logger.debug(f"Skipping {rxn_id}: no equation")
-                        continue
-
-                    parsed = rrCache._read_equation(equation, rxn_id, logger)
-                    if parsed is None:
-                        logger.debug(f"Skipping {rxn_id}: failed to parse equation")
-                        continue
-
-                    left = parsed.get("left", {})
-                    right = parsed.get("right", {})
-                    main_left = [next(iter(left.keys()))] if left else []
-                    main_right = [next(iter(right.keys()))] if right else []
-
-                    to_add.append(
-                        {
-                            "id": rxn_id,
-                            "left": left,
-                            "right": right,
-                            "direction": 0,
-                            "main_left": main_left,
-                            "main_right": main_right,
-                        }
-                    )
-                    if limit is not None and len(to_add) >= limit:
-                        break
-        except Exception as e:
-            logger.error(f"Error while scanning reac_prop.tsv: {e}")
-            return 0
-
-        added = 0
-        for rxn_dict in to_add:
-            res = self.add_reaction(rxn_dict, persist=False)
-            if res is not None:
-                added += 1
-
-        # Store cache to file once after all reactions are added
-        if added > 0:
-            try:
-                outfile = os_path.join(
-                    self.__cache_dir,
-                    rrCache.__cache["template_reactions"]["file"]["name"],
-                )
-                tr = self.get("template_reactions") or {}
-                rrCache._store_cache_to_file(tr, outfile, logger=logger)
-            except Exception as e:
-                logger.warning(f"Failed to persist template_reactions cache: {e}")
-
-        return added
-
     def get_reaction_rule(self, rr_id: str):
         return self.__get_object("rr_reactions", rr_id)
 
@@ -805,8 +578,6 @@ def Build(self, interactive: bool = DEFAULTS["interactive"]) -> None:
                 logger=self.logger,
             )  # , deprecatedCID_cid, deprecatedRID_rid, logger)
             print_progress(self.logger)
-        # Add missing reactions from MetaNetX
-        self.add_missing_reactions(logger=self.logger)
         try:
             rrCache._gen_comp_xref_deprecatedCompID_compid(
                 self.__input__cache_dir, self.__cache_dir, self.logger
@@ -1062,50 +833,48 @@ def _gen_reactions(
         if os_path.exists(outfile) and check_sha(outfile, rrCache.__cache[_attribute]):
             logger.debug("   Cache file already exists")
         else:
+            dep_files = {}
             # Iterate over the file dependencies and find the corresponding files in the input cache sources
             for dep_file in rrCache.__cache[_attribute]["deps"]["file_deps"]:
                 # Iterate over sources to look if the dependency file is listed in
                 for scat, source in rrCache.__cache_sources.items():
+                    if scat not in dep_files:
+                        dep_files[scat] = []
                     if dep_file in source["files"]:
                         # Look is the file is listed for one of the databases to use,
                         # i.e. if the value is the fingerprint or a dict
                         if isinstance(source["files"][dep_file], dict):
-                            _reactions = {}
                             for db in source["files"][dep_file]:
-                                if db not in databases:
+                                if db in databases:
+                                    dep_files[scat].append(
+                                        os_path.join(input_dir, db, dep_file)
+                                    )
+                                else:
                                     logger.debug(
                                         f"Database {db} is not in the list of databases to include in the cache, skipping generation of reactions for {db}"
                                     )
                                     continue
-                                logger.debug("   Generating data...")
-                                _dep_file = os_path.join(input_dir, db, dep_file)
-                                _reactions = getattr(
-                                    rrCache, "_m_" + attribute + "_reactions"
-                                )(_dep_file, logger=logger)
-                                # Merge with existing reactions for existing keys (append as nested dict)
-                                # Otherwise, for rules built on reactions from several databases,
-                                # the last database in the loop will overwrite the previous ones instead of merging them
-                                for rkey, rval in _reactions.items():
-                                    if (
-                                        rkey in reactions
-                                        and isinstance(reactions[rkey], dict)
-                                        and isinstance(rval, dict)
-                                    ):
-                                        reactions[rkey].update(rval)
-                                    else:
-                                        reactions[rkey] = rval
-                            logger.debug("   Writing data to file...")
-                            rrCache._store_cache_to_file(
-                                reactions, outfile, logger=logger
-                            )
                         else:
-                            reactions = getattr(
-                                rrCache, "_m_" + attribute + "_reactions_legacy"
-                            )(dep_file, logger=logger)
-                            logger.debug("   Writing data to file...")
-                            rrCache._store_cache_to_file(
-                                reactions, outfile, logger=logger
-                            )
+                            dep_files[scat].append(os_path.join(input_dir, dep_file))
+
+            method_name = "_m_" + attribute + "_reactions"
+
+            if type == "legacy":
+                method_name += "_legacy"
+                # Get 'rr2' if not empty otherwise 'rr2more'
+                if dep_files["rr2"] != []:
+                    dep_files = dep_files["rr2"][0]
+                elif dep_files["rr2more"] != []:
+                    dep_files = dep_files["rr2more"][0]
+                else:
+                    logger.error(
+                        "No reaction rule file found for legacy type, cannot generate reactions"
+                    )
+                    return
+
+            reactions = getattr(rrCache, method_name)(dep_files, logger=logger)
+            logger.debug("   Writing data to file...")
+            rrCache._store_cache_to_file(reactions, outfile, logger=logger)
 
         del reactions
 
@@ -1652,69 +1421,79 @@ def _m_mnxc_xref(
 
     @staticmethod
     def _m_rr_reactions(
-        rules_rall_path: str, logger: Logger = getLogger(__name__)
+        rules_rall_paths: str, logger: Logger = getLogger(__name__)
     ) -> Dict:
-        logger.debug(f"Parsing rules from {rules_rall_path}")
+        logger.debug(f"Parsing rules from {rules_rall_paths}")
+
+        _rules_rall_paths = rules_rall_paths["rr2"]
 
         rr_reactions = {}
 
-        if not os_path.exists(rules_rall_path):
-            logger.error("Could not read the rules file (" + str(rules_rall_path) + ")")
-            return None
+        for _rules_rall_path in _rules_rall_paths:
 
-        for row in csv_DictReader(gzip_open(rules_rall_path, "rt"), delimiter="\t"):
-            if row["TEMPLATE_ID"] not in rr_reactions:
-                rr_reactions[row["TEMPLATE_ID"]] = {}
-            if row["REACTION_ID"] not in rr_reactions[row["TEMPLATE_ID"]]:
-                subtrates = {row["LEFT_IDS"]: 1}
-                products = dict(Counter(row["RIGHT_IDS"].split(".")))
-                rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]] = {
-                    "rule_id": row["TEMPLATE_ID"],
-                    "rule_score": None if row["SCORE"] == "" else float(row["SCORE"]),
-                    "reac_id": row["REACTION_ID"],
-                    "subs_id": row["LEFT_IDS"],
-                    "rel_direction": (1 if row["DIRECTION"] == "L2R" else -1),
-                    "left": subtrates,
-                    "right": products,
-                    "left_excluded": (
-                        row["LEFT_EXCLUDED_IDS"].split(".")
-                        if row["LEFT_EXCLUDED_IDS"]
-                        else []
-                    ),
-                    "right_excluded": (
-                        row["RIGHT_EXCLUDED_IDS"].split(".")
-                        if row["RIGHT_EXCLUDED_IDS"]
-                        else []
-                    ),
-                }
-            # Handle multiple reactions per rule, update direction if needed
-            else:
-                if (
-                    rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
-                        "rel_direction"
-                    ]
-                    != 0
-                    and (1 if row["DIRECTION"] == "L2R" else -1)
-                    != rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
-                        "rel_direction"
-                    ]
-                ):
-                    logger.debug(
-                        "Updating direction for reaction "
-                        + str(row["REACTION_ID"])
-                        + " in rule "
-                        + str(row["TEMPLATE_ID"])
-                        + " from "
-                        + str(
-                            rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
-                                "rel_direction"
-                            ]
+            if not os_path.exists(_rules_rall_path):
+                logger.error(
+                    "Could not read the rules file (" + str(_rules_rall_path) + ")"
+                )
+                return None
+
+            for row in csv_DictReader(
+                gzip_open(_rules_rall_path, "rt"), delimiter="\t"
+            ):
+                if row["TEMPLATE_ID"] not in rr_reactions:
+                    rr_reactions[row["TEMPLATE_ID"]] = {}
+                if row["REACTION_ID"] not in rr_reactions[row["TEMPLATE_ID"]]:
+                    subtrates = {row["LEFT_IDS"]: 1}
+                    products = dict(Counter(row["RIGHT_IDS"].split(".")))
+                    rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]] = {
+                        "rule_id": row["TEMPLATE_ID"],
+                        "rule_score": (
+                            None if row["SCORE"] == "" else float(row["SCORE"])
+                        ),
+                        "reac_id": row["REACTION_ID"],
+                        "subs_id": row["LEFT_IDS"],
+                        "rel_direction": (1 if row["DIRECTION"] == "L2R" else -1),
+                        "left": subtrates,
+                        "right": products,
+                        "left_excluded": (
+                            row["LEFT_EXCLUDED_IDS"].split(".")
+                            if row["LEFT_EXCLUDED_IDS"]
+                            else []
+                        ),
+                        "right_excluded": (
+                            row["RIGHT_EXCLUDED_IDS"].split(".")
+                            if row["RIGHT_EXCLUDED_IDS"]
+                            else []
+                        ),
+                    }
+                # Handle multiple reactions per rule, update direction if needed
+                else:
+                    if (
+                        rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
+                            "rel_direction"
+                        ]
+                        != 0
+                        and (1 if row["DIRECTION"] == "L2R" else -1)
+                        != rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
+                            "rel_direction"
+                        ]
+                    ):
+                        logger.debug(
+                            "Updating direction for reaction "
+                            + str(row["REACTION_ID"])
+                            + " in rule "
+                            + str(row["TEMPLATE_ID"])
+                            + " from "
+                            + str(
+                                rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
+                                    "rel_direction"
+                                ]
+                            )
+                            + " to bidirectional (0)"
                         )
-                        + " to bidirectional (0)"
-                    )
-                    rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
-                        "rel_direction"
-                    ] = 0  # bidirectional
+                        rr_reactions[row["TEMPLATE_ID"]][row["REACTION_ID"]][
+                            "rel_direction"
+                        ] = 0  # bidirectional
 
         return rr_reactions
 
@@ -1787,64 +1566,127 @@ def _m_rr_reactions_legacy(
 
     @staticmethod
     def _m_template_reactions(
-        metadata_path: str, logger: Logger = getLogger(__name__)
+        paths: Dict[str, List[str]], logger: Logger = getLogger(__name__)
     ) -> Dict:
 
-        if not os_path.exists(metadata_path):
-            logger.error("Cannot find file: " + str(metadata_path))
-            return None
+        metadata_paths = paths["rr2"]
+        reac_prop_path = (
+            paths["mnx"][0] if "mnx" in paths and len(paths["mnx"]) > 0 else None
+        )
+
+        logger.debug(f"metadata_paths: {metadata_paths}")
+        logger.debug(f"reac_prop_path: {reac_prop_path}")
 
         reactions = {}
 
-        for row in csv_DictReader(gzip_open(metadata_path, "rt"), delimiter="\t"):
-            if row["REACTION_ID"] not in reactions:
-                # print(row)
-                substrates = dict(
-                    Counter(
-                        [row["LEFT_IDS"]]
-                        + (
-                            row["LEFT_EXCLUDED_IDS"].split(".")
-                            if row["LEFT_EXCLUDED_IDS"]
-                            else []
+        # Extract reaction data from rules metadata files
+        for metadata_path in metadata_paths:
+
+            if not os_path.exists(metadata_path):
+                logger.error("Cannot find file: " + str(metadata_path))
+                return None
+
+            for row in csv_DictReader(gzip_open(metadata_path, "rt"), delimiter="\t"):
+                if row["REACTION_ID"] not in reactions:
+                    substrates = dict(
+                        Counter(
+                            [row["LEFT_IDS"]]
+                            + (
+                                row["LEFT_EXCLUDED_IDS"].split(".")
+                                if row["LEFT_EXCLUDED_IDS"]
+                                else []
+                            )
                         )
                     )
-                )
-                products = dict(
-                    Counter(
-                        row["RIGHT_IDS"].split(".")
-                        + (
-                            row["RIGHT_EXCLUDED_IDS"].split(".")
-                            if row["RIGHT_EXCLUDED_IDS"]
-                            else []
+                    products = dict(
+                        Counter(
+                            row["RIGHT_IDS"].split(".")
+                            + (
+                                row["RIGHT_EXCLUDED_IDS"].split(".")
+                                if row["RIGHT_EXCLUDED_IDS"]
+                                else []
+                            )
                         )
                     )
-                )
-                # if row['REACTION_ID'] == 'MNXR182203':
-                #     print(substrates)
-                #     print(products)
-                #     exit()
-                main_left = row["LEFT_IDS"]
-                main_right = row["RIGHT_IDS"].split(".")[0]
-                if row["DIRECTION"] == "R2L":
-                    # Swap left and right if direction is R2L
-                    substrates, products = products, substrates
-                    main_left, main_right = main_right, main_left
-                    direction = -1
+                    main_left = row["LEFT_IDS"]
+                    main_right = row["RIGHT_IDS"].split(".")[0]
+                    if row["DIRECTION"] == "R2L":
+                        # Swap left and right if direction is R2L
+                        substrates, products = products, substrates
+                        main_left, main_right = main_right, main_left
+                        direction = -1
+                    else:
+                        direction = 1
+                    reactions[row["REACTION_ID"]] = {
+                        "left": substrates,
+                        "right": products,
+                        "direction": direction,
+                        "main_left": main_left,
+                        "main_right": main_right,
+                    }
+                # Handle multiple reactions per rule, update direction if needed
+                elif row["DIRECTION"] != reactions[row["REACTION_ID"]]["direction"]:
+                    reactions[row["REACTION_ID"]]["direction"] = 0  # bidirectional
+
+        # Complete missing reactions from the reaction properties file (TSV)
+        # Ignore all lines starting with '#', the last one contains the header '#ID'	and 'mnx_equation'
+        # Example of mnx_equation: 1 MNXM10958@MNXD1 + 1 MNXM1104529@MNXD1 = 1 MNXM1102128@MNXD1 + 1 MNXM8415@MNXD1
+        if reac_prop_path:
+            reac_prop_df = rrCache.__load_reactions_tsv(reac_prop_path)
+            for _, row in reac_prop_df.iterrows():
+                reac_id = row["ID"]
+                if reac_id not in reactions:
+                    rxn = rrCache._read_equation(row["mnx_equation"], reac_id, logger)
+                    # check 'right' and 'left' are not empty
+                    if reac_id not in reactions and rxn["left"] and rxn["right"]:
+                        reactions[reac_id] = {
+                            "left": rxn["left"],
+                            "right": rxn["right"],
+                            "direction": 0,  # default to bidirectional if not specified
+                            "main_left": "",
+                            "main_right": "",
+                        }
                 else:
-                    direction = 1
-                reactions[row["REACTION_ID"]] = {
-                    "left": substrates,
-                    "right": products,
-                    "direction": direction,
-                    "main_left": main_left,
-                    "main_right": main_right,
-                }
-            # Handle multiple reactions per rule, update direction if needed
-            elif row["DIRECTION"] != reactions[row["REACTION_ID"]]["direction"]:
-                reactions[row["REACTION_ID"]]["direction"] = 0  # bidirectional
+                    logger.debug(
+                        f"Reaction {reac_id} already in reactions, skipping equation parsing"
+                    )
 
         return reactions
 
+    @staticmethod
+    def __load_reactions_tsv(
+        path: str, logger: Logger = getLogger(__name__)
+    ) -> "DataFrame":
+        """
+        Load a TSV file while:
+        - ignoring comment lines starting with '#'
+        - using the LAST commented line as the header
+        """
+
+        header = None
+        data_lines = []
+
+        with open(path, "r") as f:
+            for line in f:
+                line = line.rstrip("\n")
+
+                if line.startswith("#"):
+                    # Save last commented line as header
+                    header = line[1:].split("\t")
+                else:
+                    data_lines.append(line)
+
+        if header is None:
+            raise ValueError("No header line found starting with '#'")
+
+        # Rebuild TSV content without comments
+        tsv_content = "\n".join(data_lines)
+
+        # Read with pandas
+        df = pd_read_csv(StringIO(tsv_content), sep="\t", names=header)
+
+        return df
+
     @staticmethod
     def _m_template_reactions_legacy(
         rxn_recipes_path: str, logger: Logger = getLogger(__name__)
diff --git a/tests/data/metrics_rr2026.json b/tests/data/metrics_rr2026.json
index 00090e6..1378aee 100644
--- a/tests/data/metrics_rr2026.json
+++ b/tests/data/metrics_rr2026.json
@@ -8,7 +8,7 @@
         "file_size": 243759725
     },
     "template_reactions": {
-        "length": 95420,
+        "length": 95419,
         "file_size": 8519350
     },
     "rr_reactions": {