= ({
database,
+ placeholder,
startMessage,
quickActions,
VCellActions,
@@ -638,12 +640,12 @@ const handleSendMessageBMDB = async (overrideMessage?: string) => {
value={inputMessage}
onChange={(e) => setInputMessage(e.target.value)}
onKeyPress={handleKeyPress}
- placeholder={useVCDB && useBMDB
+ placeholder={placeholder || (useVCDB && useBMDB
? "Ask about VCell and BioModels biomodels..."
: useVCDB
? "Ask about VCell biomodels..."
: "Ask about BioModels biomodels..."
- }
+ )}
className="flex-1 border-slate-300 focus:border-blue-500"
disabled={isLoading || isInitialLoading}
/>
From 937e5ee990518efeea9dd7299e19cd443d9e3497 Mon Sep 17 00:00:00 2001
From: reeshapatel12
Date: Sun, 12 Apr 2026 17:26:07 -0400
Subject: [PATCH 57/60] create more endpoints for BMDB API calls, including for
getting xml files and for getting information about a specific model
---
backend/app/controllers/bmdb_controller.py | 15 ++++-
backend/app/routes/bmdb_router.py | 65 +++++++---------------
backend/app/schemas/bmdb_schema.py | 10 ----
backend/app/services/databases_service.py | 21 ++++++-
4 files changed, 53 insertions(+), 58 deletions(-)
diff --git a/backend/app/controllers/bmdb_controller.py b/backend/app/controllers/bmdb_controller.py
index 0463d71..3094233 100644
--- a/backend/app/controllers/bmdb_controller.py
+++ b/backend/app/controllers/bmdb_controller.py
@@ -5,6 +5,7 @@
from app.services.databases_service import (
get_xml_file,
fetch_bmdb_models,
+ get_bmdb_model_info,
)
@@ -38,4 +39,16 @@ async def get_xml_controller(bmdbID: str, truncate: bool = False) -> str:
try:
return await get_xml_file(bmdbID, truncate)
except Exception as e:
- raise HTTPException(status_code=500, detail="Error fetching XML file.")
\ No newline at end of file
+ raise HTTPException(status_code=500, detail="Error fetching XML file.")
+
+
+async def get_bmdb_model_info_controller(bmdbID: str) -> dict:
+ """
+ Controller function to fetch information about a specific BMDB model.
+ Raises:
+ HTTPException: If the URL cannot be generated.
+ """
+ try:
+ return await get_bmdb_model_info(bmdbID)
+ except Exception as e:
+ raise HTTPException(status_code=500, detail="Error fetching BMDB model info.")
\ No newline at end of file
diff --git a/backend/app/routes/bmdb_router.py b/backend/app/routes/bmdb_router.py
index 201d703..42c9943 100644
--- a/backend/app/routes/bmdb_router.py
+++ b/backend/app/routes/bmdb_router.py
@@ -1,15 +1,9 @@
-from fastapi import APIRouter, Depends, HTTPException, Response
-from typing import List
-from multiprocessing import process
+from fastapi import APIRouter, Depends, HTTPException
from app.schemas.bmdb_schema import BMDBRequestParams
-import httpx
-import requests
-from app.controllers.llms_controller import (
- get_llm_response,
-)
from app.controllers.bmdb_controller import (
get_bmdb_models_controller,
get_xml_controller,
+ get_bmdb_model_info_controller,
)
router = APIRouter()
@@ -25,41 +19,24 @@ async def get_biomodels(params: BMDBRequestParams = Depends()):
except HTTPException as e:
raise e
-# For BioModelsDB search using BioModelsDB API
-# @router.post("/bmdb-search")
-# async def search_llm(conversation_history: dict):
-# """
-# Endpoint to query the LLM and execute the necessary tools.
-# Args:
-# conversation_history (dict): The conversation history containing user prompts and responses.
-# database (str): The database to query - bmdb in this case.
-# Returns:
-# dict: The final response after processing the prompt with the tools.
-# """
-
-# print("DEBUG20: BMDB POST: ROUTER")
-# result, bmdbkeys, tool_summary = await get_llm_response(
-# conversation_history.get("conversation_history", []), database="bmdb"
-# )
-# return {"response": result, "bmkeys": bmdbkeys, "tool_summary": tool_summary}
-
-# @router.get("/", response_model=dict)
-# async def get_biomodels(params: BiomodelRequestParams = Depends()):
-# """
-# Endpoint to retrieve biomodels based on provided filters and sorting.
-# """
-# try:
-# return await get_biomodels_controller(params)
-# except HTTPException as e:
-# raise e
+@router.get("/get-xml", response_model=str)
+async def get_xml(bmdbID: str, truncate: bool = False):
+ """
+ Endpoint to get XML file contents for a given biomodel.
+ """
+ try:
+ return await get_xml_controller(bmdbID, truncate)
+ except HTTPException as e:
+ raise e
+
-# @router.get("/", response_model=str)
-# async def get_xml(bmdbID: str, truncate: bool = False):
-# """
-# Endpoint to get XML file contents for a given biomodel.
-# """
-# try:
-# return await get_xml_controller(bmdbID, truncate)
-# except HTTPException as e:
-# raise e
\ No newline at end of file
+@router.get("/model-info", response_model=dict)
+async def get_model_info(bmdbID: str):
+ """
+ Endpoint to get information about a specific BMDB model.
+ """
+ try:
+ return await get_bmdb_model_info_controller(bmdbID)
+ except HTTPException as e:
+ raise e
\ No newline at end of file
diff --git a/backend/app/schemas/bmdb_schema.py b/backend/app/schemas/bmdb_schema.py
index b26ed21..ce3d33f 100644
--- a/backend/app/schemas/bmdb_schema.py
+++ b/backend/app/schemas/bmdb_schema.py
@@ -23,13 +23,3 @@ class OrderByEnum(str, Enum):
class BMDBRequestParams(BaseModel, use_enum_values=True):
bmName: Optional[str] = "" # Name of the biomodel to search for
bmId: Optional[str] = "" # Biomodel ID
- category: Optional[CategoryEnum] = CategoryEnum.all # Category of the biomodel
- owner: Optional[str] = "" # Owner of the biomodel
- # savedLow: Optional[date] = None # Lower bound of the save date range
- # savedHigh: Optional[date] = None # Upper bound of the save date range
- startRow: Optional[int] = 1 # Starting row of the result set (default is 1)
- maxRows: Optional[int] = 1000 # Maximum number of rows to return (default is 100)
- orderBy: Optional[OrderByEnum] = (
- OrderByEnum.date_desc
- ) # Order of results (default is "date_desc")
-
diff --git a/backend/app/services/databases_service.py b/backend/app/services/databases_service.py
index 45558cb..a9c6a12 100644
--- a/backend/app/services/databases_service.py
+++ b/backend/app/services/databases_service.py
@@ -94,9 +94,6 @@ async def fetch_biomodels(params: BiomodelRequestParams) -> dict:
# Construct the full URL
url = f"{VCELL_API_BASE_URL}/biomodel?{query_string}"
- # elif source == "search":
- # bm_name = params_dict.get("bmName", "")
- # url = f"{BIOMODELS_API_URL}/{quote(bm_name)}"
# Log the URL being queried
logger.info(f"Querying URL: {url}")
@@ -243,6 +240,24 @@ async def get_xml_file(bmId: str, truncate: bool = False, max_retries: int = 3)
)
+@observe(name="GET_BMDB_MODEL_INFO")
+async def get_bmdb_model_info(bmdbID: str) -> dict:
+ """
+ Fetches information about a specific given model from BMDB.
+ """
+ url = f"{BIOMODELS_API_URL}/{bmdbID}?format=json"
+
+ logger.info(f"Fetching BMDB model info from URL: {url}")
+
+ async with httpx.AsyncClient() as client:
+ response = await client.get(url)
+ response.raise_for_status()
+ raw_data = response.json()
+
+ # returns dictionary with model info, including name, description, etc.
+ return raw_data
+
+
@observe(name="GET_VCML_FILE")
async def get_vcml_file(
biomodel_id: str, truncate: bool = False, max_retries: int = 3
From 517dbf94892d12382a4e65473d69ef880af9ae91 Mon Sep 17 00:00:00 2001
From: reeshapatel12
Date: Fri, 17 Apr 2026 14:10:35 -0400
Subject: [PATCH 58/60] log the number of rows fetched for list of biomodels in
the frontend output
---
backend/app/services/llms_service.py | 38 ++++------------------------
1 file changed, 5 insertions(+), 33 deletions(-)
diff --git a/backend/app/services/llms_service.py b/backend/app/services/llms_service.py
index c90648c..c12948b 100644
--- a/backend/app/services/llms_service.py
+++ b/backend/app/services/llms_service.py
@@ -4,6 +4,7 @@
execute_tool,
select_tools_for_prompt,
should_use_tools,
+ default_rows,
)
from app.services.databases_service import (
@@ -13,6 +14,8 @@
)
from app.utils.system_prompt import SYSTEM_PROMPT
+from app.utils.bmdb_system_prompt import BMDB_SYSTEM_PROMPT
+from app.utils.vcdb_system_prompt import VCDB_SYSTEM_PROMPT
from app.schemas.vcelldb_schema import BiomodelRequestParams
from app.core.singleton import get_openai_client
@@ -105,7 +108,7 @@ async def get_response_with_tools(conversation_history: list[dict], database: st
messages = [
{
"role": "system",
- "content": SYSTEM_PROMPT,
+ "content": SYSTEM_PROMPT + (BMDB_SYSTEM_PROMPT if database == "bmdb" else VCDB_SYSTEM_PROMPT),
},
]
@@ -125,21 +128,6 @@ async def get_response_with_tools(conversation_history: list[dict], database: st
tools=bmdbtools,
tool_choice="auto",
)
- # elif database == "vcdb":
-
- # user_prompt = conversation_history[-1]["content"]
- # print("CCCCCCC" + str(user_prompt))
-
-
- # logger.info(f"User prompt: {user_prompt}")
-
- # response = client.chat.completions.create(
- # name="GET_RESPONSE_WITH_TOOLS::RETRIEVE_TOOLS",
- # model=settings.AZURE_DEPLOYMENT_NAME,
- # messages=messages,
- # tools=tools,
- # tool_choice="auto",
- # )
# IMPLEMENTATION: changing the way llm sees/chooses tools
elif database == "vcdb":
@@ -250,23 +238,6 @@ async def get_response_with_tools(conversation_history: list[dict], database: st
elif database == "bmdb":
bmdb_models = result.get("data", [])
bmkeys = [model.get("id") for model in bmdb_models if model.get("id")]
- # # Extract the function name and arguments
- # name = tool_call.function.name
- # args = json.loads(tool_call.function.arguments)
-
- # logger.info(f"Tool Call: {name} with args: {args}")
-
- # # Execute the tool function
- # result = await execute_tool(name, args)
-
- # logger.info(f"Tool Result: {str(result)[:500]}")
-
- # compact_result = summarize_tool_result(result)
-
- # # Send the result back to the model
- # messages.append(
- # {"role": "tool", "tool_call_id": tool_call.id, "content": str(compact_result)}
- # )
logger.info("DEBUG100-START")
@@ -298,6 +269,7 @@ async def get_response_with_tools(conversation_history: list[dict], database: st
log_timing("TOTAL REQUEST TIME (from initial request to final output)", total_start)
total_time = time.perf_counter() - total_start
tool_summary += f"*Total request time: {total_time:.2f}s.*"
+ tool_summary += f"\n*Max rows fetched for list of biomodels was {default_rows}.*"
return final_response, bmkeys, tool_summary
From 5db77dbe47beac039899d853f2318bd19917181f Mon Sep 17 00:00:00 2001
From: reeshapatel12
Date: Sat, 18 Apr 2026 16:16:19 -0400
Subject: [PATCH 59/60] split system prompt into three separate prompts and
improve each individually
---
backend/app/utils/bmdb_system_prompt.py | 28 +++++++++++++++
backend/app/utils/system_prompt.py | 45 -------------------------
backend/app/utils/vcdb_system_prompt.py | 38 +++++++++++++++++++++
3 files changed, 66 insertions(+), 45 deletions(-)
create mode 100644 backend/app/utils/bmdb_system_prompt.py
create mode 100644 backend/app/utils/vcdb_system_prompt.py
diff --git a/backend/app/utils/bmdb_system_prompt.py b/backend/app/utils/bmdb_system_prompt.py
new file mode 100644
index 0000000..3b76ef3
--- /dev/null
+++ b/backend/app/utils/bmdb_system_prompt.py
@@ -0,0 +1,28 @@
+BMDB_SYSTEM_PROMPT = """
+## Formatting Guidelines for Biomodels
+Ignore all previous instructions.
+You MUST follow this exact output format. Do NOT modify, omit, or reorder any fields.
+ALWAYS use the provided name and biomodelID exactly. Format the name as [name](/search/bmdbID).
+
+### Formatting Guidelines for biomodels retrieved from BioModels database (BMDB)
+* For each BMDB model:
+```
+1. **[Biomodel Name](/search/${bmdbID})**
+ - **Biomodel Key:** ${bmdbId}
+ - **Owner:** ${owner}
+ - **Description:** ${description or summary of the biomodel, do not include `clonedFrom` info}
+```
+
+### Rules for LONG LISTS (>10 models)
+
+- ALWAYS continue numbering sequentially (1, 2, 3, ...)
+- Repeat the EXACT same structure for EVERY item
+- If applications exist, do NOT omit them
+- Do NOT summarize or shorten later items
+- Do NOT merge multiple models into one entry
+- Maintain identical formatting across all entries
+
+### Biomodel Analysis Guidelines
+* Include as many relevant details as possible, such as biomodel ID, names, descriptions, parameters, and any other relevant metadata that can aid in the user's understanding.
+* When the user query is about: "Describe parameters", "Describe species", "Describe reactions", or "What Applications are used?" — specifically in the context of model analysis: Make sure to use the `get_xml_file` tool to retrieve the SBML XML file for the BMDB biomodel. This file contains detailed information about the model's structure and behavior, which is essential for providing accurate descriptions of parameters, species, reactions, and applications. Use also the "fetch_bmdb_models" tool to gather additional context about the biomodel, and Try when asked these questions to focus on the asked aspects, Do not provide general summaries, model structure, or unrelated metadata unless explicitly requested. Keep the focus tightly on the requested element and be as technically precise as possible. Elaborate as much as you can on the requested aspect, providing detailed descriptions and explanations based on the SBML XML content.
+"""
diff --git a/backend/app/utils/system_prompt.py b/backend/app/utils/system_prompt.py
index 7f6a33a..f75c4dc 100644
--- a/backend/app/utils/system_prompt.py
+++ b/backend/app/utils/system_prompt.py
@@ -19,52 +19,7 @@
* Format all units, chemical names, reaction rates, and numerical expressions using math mode to ensure
proper rendering. Example: "The rate is $5.2 \times 10^{-3} \text{ mmol}\cdot\text{ml}^{-1}\cdot\text{min}^{-1}$".
-## Formatting Guidelines for Biomodels
-You MUST follow this exact output format. Do NOT modify, omit, or reorder any fields.
-ALWAYS use the provided name and biomodelID exactly. Format the name as [name](/search/biomodelID).
-
-### Formatting Guidelines for biomodels retrieved from VCell database (VCDB)
-* For each VCELL model:
-```
-1. **[Biomodel Name](/search/${biomodelID})**
- - **Biomodel Key:** ${biomodelId}
- - **Owner:** ${owner}
- - **Description:** ${description or summary of the biomodel, do not include `clonedFrom` info}
- - **Applications:**
-
-List every application name for the model in italics, each on its own bullet point. Under each
-bulleted application name, list its corresponding simulations, with each simulation followed by a solver in round brackets.
-Do not omit any applications.
-```
-
-### Formatting Guidelines for biomodels retrieved from BioModels database (BMDB)
-* For each BMDB model:
-```
-1. **[Biomodel Name](/search/${biomodelID})**
- - **Biomodel Key:** ${biomodelId}
- - **Owner:** ${owner}
- - **Description:** ${description or summary of the biomodel, do not include `clonedFrom` info}
-```
-
-### Rules for LONG LISTS (>10 models)
-
-- ALWAYS continue numbering sequentially (1, 2, 3, ...)
-- Repeat the EXACT same structure for EVERY item
-- If applications exist, do NOT omit them
-- Do NOT summarize or shorten later items
-- Do NOT merge multiple models into one entry
-- Maintain identical formatting across all entries
-
### Guidelines for Follow-up Questions and Further Actions
* If there is an opportunity for follow-up questions or further actions, always ask the user if they'd like to explore
more options or if you can assist with other related tasks.
-
-### Biomodel Analysis Guidelines
-* Include as many relevant details as possible, such as biomodel ID, names, descriptions, parameters, and any other relevant metadata that can aid in the user's understanding.
-* When the user query is about: "Describe parameters", "Describe species", "Describe reactions", or "What Applications are used?" — specifically in the context of model analysis: Make sure to use the `get_vcml_file` tool to retrieve the VCML file for the VCELL biomodel or the `get_xml_file` tool to retrieve the SBML XML file for the BMDB biomodel. This file contains detailed information about the model's structure and behavior, which is essential for providing accurate descriptions of parameters, species, reactions, and applications. Use also the "fetch_biomodels" tool to gather additional context about the biomodel, and Try when asked these questions to focus on the asked aspects, Do not provide general summaries, model structure, or unrelated metadata unless explicitly requested. Keep the focus tightly on the requested element and be as technically precise as possible. Elaborate as much as you can on the requested aspect, providing detailed descriptions and explanations based on the VCML or SBML XML content.
-
-### Publications Guidelines
-* If asked for publications, research papers, pubmed articles, etc. use the `fetch_publications` tool. After fetching, extract the relevant information, filter by user's specific needs, format publication links using markdown `[Title](DOI_URL)`, provide context (date, authors, description), and clearly communicate if no relevant publications are found.
-* When using the `fetch_publications` tool, the response contains the full list of VCell related publications with fields: `pubKey` (unique identifier), `title`, `authors` (array), `year`, `citation` (full citation string in journal format), `pubmedid` (PubMed ID), `doi` (DOI link to the publication), `biomodelReferences` (array of related biomodels), and `mathmodelReferences` (array of related mathematical models).
-* When presenting publications, always provide elaborate, fact-based responses based solely on the available tool results.
"""
diff --git a/backend/app/utils/vcdb_system_prompt.py b/backend/app/utils/vcdb_system_prompt.py
new file mode 100644
index 0000000..c645c30
--- /dev/null
+++ b/backend/app/utils/vcdb_system_prompt.py
@@ -0,0 +1,38 @@
+VCDB_SYSTEM_PROMPT = """
+### Publications Guidelines
+* If asked for publications, research papers, pubmed articles, etc. use the `fetch_publications` tool. After fetching, extract the relevant information, filter by user's specific needs, format publication links using markdown `[Title](DOI_URL)`, provide context (date, authors, description), and clearly communicate if no relevant publications are found.
+* When using the `fetch_publications` tool, the response contains the full list of VCell related publications with fields: `pubKey` (unique identifier), `title`, `authors` (array), `year`, `citation` (full citation string in journal format), `pubmedid` (PubMed ID), `doi` (DOI link to the publication), `biomodelReferences` (array of related biomodels), and `mathmodelReferences` (array of related mathematical models).
+* When presenting publications, always provide elaborate, fact-based responses based solely on the available tool results.
+
+
+## Formatting Guidelines for Biomodels
+You MUST follow this exact output format. Do NOT modify, omit, or reorder any fields.
+ALWAYS use the provided name and biomodelID exactly. Format the name as [name](/search/biomodelID).
+
+### Formatting Guidelines for biomodels retrieved from VCell database (VCDB)
+* For each VCELL model:
+```
+1. **[Biomodel Name](/search/${biomodelID})**
+ - **Biomodel Key:** ${biomodelId}
+ - **Owner:** ${owner}
+ - **Description:** ${description or summary of the biomodel, do not include `clonedFrom` info}
+ - **Applications:**
+
+List every application name for the model in italics, each on its own bullet point. Under each
+bulleted application name, list its corresponding simulations, with each simulation followed by a solver in round brackets.
+Do not omit any applications.
+```
+
+### Rules for LONG LISTS (>10 models)
+
+- ALWAYS continue numbering sequentially (1, 2, 3, ...)
+- Repeat the EXACT same structure for EVERY item
+- If applications exist, do NOT omit them
+- Do NOT summarize or shorten later items
+- Do NOT merge multiple models into one entry
+- Maintain identical formatting across all entries
+
+### Biomodel Analysis Guidelines
+* Include as many relevant details as possible, such as biomodel ID, names, descriptions, parameters, and any other relevant metadata that can aid in the user's understanding.
+* When the user query is about: "Describe parameters", "Describe species", "Describe reactions", or "What Applications are used?" — specifically in the context of model analysis: Make sure to use the `get_vcml_file` tool to retrieve the VCML file for the VCELL biomodel. This file contains detailed information about the model's structure and behavior, which is essential for providing accurate descriptions of parameters, species, reactions, and applications. Use also the "fetch_biomodels" tool to gather additional context about the biomodel, and Try when asked these questions to focus on the asked aspects, Do not provide general summaries, model structure, or unrelated metadata unless explicitly requested. Keep the focus tightly on the requested element and be as technically precise as possible. Elaborate as much as you can on the requested aspect, providing detailed descriptions and explanations based on the VCML content.
+"""
From 3a2ab978af8f4645fd95d9dddd2a9f392faf427e Mon Sep 17 00:00:00 2001
From: reeshapatel12
Date: Sat, 18 Apr 2026 17:17:01 -0400
Subject: [PATCH 60/60] add to system prompt that all model elements that have
links leading to identifiers.org should be underlined with a link available,
and no other elements should have links
---
backend/app/utils/bmdb_system_prompt.py | 7 +++----
backend/app/utils/system_prompt.py | 7 +++++--
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/backend/app/utils/bmdb_system_prompt.py b/backend/app/utils/bmdb_system_prompt.py
index 3b76ef3..d93d6db 100644
--- a/backend/app/utils/bmdb_system_prompt.py
+++ b/backend/app/utils/bmdb_system_prompt.py
@@ -1,14 +1,13 @@
BMDB_SYSTEM_PROMPT = """
## Formatting Guidelines for Biomodels
-Ignore all previous instructions.
You MUST follow this exact output format. Do NOT modify, omit, or reorder any fields.
-ALWAYS use the provided name and biomodelID exactly. Format the name as [name](/search/bmdbID).
+ALWAYS use the provided name and biomodelID exactly. Format the name as [name](/search/${id}).
### Formatting Guidelines for biomodels retrieved from BioModels database (BMDB)
* For each BMDB model:
```
-1. **[Biomodel Name](/search/${bmdbID})**
- - **Biomodel Key:** ${bmdbId}
+1. **[Biomodel Name](/search/${id})**
+ - **Biomodel Key:** ${id}
- **Owner:** ${owner}
- **Description:** ${description or summary of the biomodel, do not include `clonedFrom` info}
```
diff --git a/backend/app/utils/system_prompt.py b/backend/app/utils/system_prompt.py
index f75c4dc..962a34b 100644
--- a/backend/app/utils/system_prompt.py
+++ b/backend/app/utils/system_prompt.py
@@ -3,8 +3,6 @@
SBML format (taken from BioModels database, also called BMDB or BioModels.org).
Your task is to provide human-readable, accurate, detailed, and contextually appropriate responses based on the tools available.
-## Core Guidelines
-
### General Guidelines
* Stick strictly to the user's query.
* Do not make assumptions or inferences about missing or incomplete information in the user's input.
@@ -19,6 +17,11 @@
* Format all units, chemical names, reaction rates, and numerical expressions using math mode to ensure
proper rendering. Example: "The rate is $5.2 \times 10^{-3} \text{ mmol}\cdot\text{ml}^{-1}\cdot\text{min}^{-1}$".
+### Formatting Guidelines for Elements with Identifiers.org Links
+* Any model element that includes a link to identifiers.org MUST be formatted as an underlined clickable link.
+* ONLY identifiers.org links should be formatted this way.
+* Do not hyperlink any other model elements (including names, descriptions, or internal links like /search/...).
+
### Guidelines for Follow-up Questions and Further Actions
* If there is an opportunity for follow-up questions or further actions, always ask the user if they'd like to explore
more options or if you can assist with other related tasks.