tigergraph · chengbiao-jin · May 16, 2026 · Apr 15, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/README.md b/README.md
@@ -63,6 +63,7 @@
 ---
 
 ## Releases
+* **5/16/2026**: GraphRAG v1.4.0 released. Added schema-aware knowledge graphs, auto retrieval method selection, and a Trace Logs UI, along with many other improvements and bug fixes. See [Release Notes](https://github.com/tigergraph/graphrag/releases/tag/v1.4.0) for details.
 * **4/10/2026**: GraphRAG v1.3.0 released. Added an admin configuration UI with role-based access and per-graph chatbot LLM override, along with many other improvements and bug fixes. See [Release Notes](https://github.com/tigergraph/graphrag/releases/tag/v1.3.0) for details.
 * **2/28/2026**: GraphRAG v1.2.0 released. Added Admin UI for graph initialization, document ingestion, and knowledge graph rebuild, along with many other improvements and bug fixes. See [Release Notes](https://github.com/tigergraph/graphrag/releases/tag/v1.2.0) for details.
 * **9/22/2025**: GraphRAG is available now officially v1.1 (v1.1.0). AWS Bedrock support is completed with BDA integration for multimodal document ingestion. See [Release Notes](https://github.com/tigergraph/graphrag/releases/tag/v1.1.0) for details.
@@ -478,6 +479,11 @@ Copy the below code into `configs/server_config.json`. You shouldn’t need to c
 | `chat_history_api` | string | `"http://chat-history:8002"` | URL of the chat history service. No change needed when using the provided Docker Compose file. |
 | `chunker` | string | `"semantic"` | Default document chunker. Options: `semantic`, `character`, `regex`, `markdown`, `html`, `recursive`. |
 | `extractor` | string | `"llm"` | Entity extraction method. Options: `llm`, `graphrag`. |
+| `strict_mode` | bool | `false` | Dynamic-schema enforcement during extraction. When `true`, entities and relationships that don't match the domain schema are dropped. When `false` (default), unmatched nodes fall back to generic `Entity` vertices. |
+| `retrieval_include_entity` | bool \| null | `null` (auto) | Whether retriever queries include the generic `Entity` vertex alongside domain types. When unset, the server uses `false` if a domain schema exists and `true` otherwise. Set explicitly to override. |
+| `schema_max_sample_files` | int | `5` | Maximum number of sample documents accepted by the *Generate from sample documents* path on the *Initialize Knowledge Graph* dialog. |
+| `schema_max_total_mb` | int | `50` | Combined upload cap (MB) across all sample files for schema extraction. Bounds the content sent to the LLM. A single file may use the full budget; no separate per-file cap. |
+| `enable_router_fallback` | bool | `true` | When the function-call or Cypher path fails after 3 retries, fall back to vector search instead of failing the query. |
 | `chunker_config` | object | `{}` | Chunker-specific settings (see sub-parameters below). All settings are saved regardless of which chunker is selected as default. |
 | ↳ `chunk_size` | int | `2048` | Maximum number of characters per chunk. Used by `character`, `markdown`, `html`, and `recursive` chunkers. Larger values produce fewer, bigger chunks; smaller values produce more, finer-grained chunks. |
 | ↳ `overlap_size` | int | 1/8 of `chunk_size` | Number of overlapping characters between consecutive chunks. Used by `character`, `markdown`, `html`, and `recursive` chunkers. More overlap preserves cross-chunk context but increases total chunk count. Set to `0` for no overlap. |
@@ -926,7 +932,9 @@ Today's primary lever is the **entity-extraction prompt**:
 - **Add 1–2 short domain examples** in the prompt. Even one well-chosen exemplar (an extracted entity with type and definition) dramatically improves consistency across chunks.
 - **List the canonical edge verbs you want.** Encourage `PUBLISHES`, `OWNS`, `ISSUES`, `MANAGES`, `REPORTS_ON` in the relationship-extraction prompt rather than letting the LLM emit ad-hoc nominal phrases.
 
-If extraction quality is still poor after iterating on the prompt, the next-best option today is to clear the graph's domain types and re-ingest with the improved prompt — schema growth is currently driven entirely by what extraction produces. (A schema-aware initialization flow that lets you supply a curated schema up front is on the roadmap.)
+If extraction quality is still poor after iterating on the prompt, declare a domain schema up front via the *Initialize Knowledge Graph* dialog (paste GSQL, or generate a draft from sample documents) so extraction populates the types you actually want instead of growing them organically from what the LLM happens to emit. See the Configuration table above for `strict_mode` and `retrieval_include_entity` for the schema-aware behavior knobs.
+
+**Note on LLM faithfulness.** Entity, relationship, and attribute extraction is best-effort and may include occasional errors, especially for well-known entities. For high-stakes applications, validate critical extracted values against your source documents before relying on them.
 
 ### 4. Retrieval — match context size to the question
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.3.1
+1.4.0
diff --git a/common/config.py b/common/config.py
@@ -166,6 +166,17 @@ def resolve_llm_services(llm_cfg: dict) -> dict:
             if svc_key in cfg and "region_name" not in cfg[svc_key]:
                 cfg[svc_key]["region_name"] = top_region
 
+    # Inject top-level prompt_path into LLM-prompted service configs
+    # if missing. The UI never lets users set per-service prompt_paths;
+    # in practice they are always identical to completion's.
+    # ``embedding_service`` is excluded — embedding models never load
+    # prompt files (their class hierarchy has no prompt-property machinery).
+    top_prompt_path = cfg.get("prompt_path")
+    if top_prompt_path:
+        for svc_key in ["completion_service", "multimodal_service", "chat_service"]:
+            if svc_key in cfg and "prompt_path" not in cfg[svc_key]:
+                cfg[svc_key]["prompt_path"] = top_prompt_path
+
     completion = cfg.get("completion_service", {})
 
     # Resolve embedding: inherit provider-level config from completion
@@ -366,6 +377,15 @@ def get_graphrag_config(graphname=None):
         if svc_key in llm_config and "region_name" not in llm_config[svc_key]:
             llm_config[svc_key]["region_name"] = llm_config["region_name"]
 
+# Inject top-level prompt_path into LLM-prompted service configs if
+# missing. Embedding service is excluded — embedding models never load
+# prompt files. Per-service entries on disk are accepted for backward
+# compat but never written by the UI.
+if "prompt_path" in llm_config:
+    for svc_key in ["completion_service", "multimodal_service", "chat_service"]:
+        if svc_key in llm_config and "prompt_path" not in llm_config[svc_key]:
+            llm_config[svc_key]["prompt_path"] = llm_config["prompt_path"]
+
 _comp = llm_config.get("completion_service")
 if _comp is None:
     raise Exception("completion_service is not found in llm_config")
@@ -414,6 +434,8 @@ def get_graphrag_config(graphname=None):
     graphrag_config["chunker"] = "semantic"
 if "extractor" not in graphrag_config:
     graphrag_config["extractor"] = "llm"
+# ``retrieval_include_entity`` is resolved at install time
+# (see ``common.db.retriever_render.resolve_include_entity``).
 
 reuse_embedding = graphrag_config.get("reuse_embedding", True)
 doc_process_switch = graphrag_config.get("doc_process_switch", True)
@@ -441,6 +463,15 @@ def get_graphrag_config(graphname=None):
 else:
     raise Exception("Embedding service not implemented")
 
+def get_embedding_service():
+    """Return the current embedding service instance.
+
+    Use this instead of importing ``embedding_service`` directly so
+    consumers always read the latest instance after a config reload.
+    """
+    return embedding_service
+
+
 def get_llm_service(service_config: dict) -> LLM_Model:
     """
     Instantiate an LLM provider from a flat service config dict.
@@ -474,25 +505,134 @@ def get_llm_service(service_config: dict) -> LLM_Model:
         raise Exception(f"LLM service '{service_name}' not supported")
 
 
-if os.getenv("INIT_EMBED_STORE", "true") == "true":
+# Module-level ``embedding_store`` is the back-compat default for
+# direct importers (``from common.config import embedding_store``).
+# It's populated by the background init thread below.
+#
+# ``_embedding_stores`` is the per-graph cache used by chatbot
+# retrievers via ``get_embedding_store(graphname=...)``. Each entry
+# has its own ``TigerGraphConnection`` bound to that graphname for
+# its lifetime — no in-place ``set_graphname`` mutation — so
+# concurrent chat across different graphs can't race over a shared
+# connection.
+embedding_store = None
+_embedding_store_ready = threading.Event()
+_embedding_stores: dict = {}
+_embedding_stores_lock = threading.Lock()
+service_status["embedding_store"] = {
+    "status": "initializing",
+    "error": "Embedding store is still initializing",
+}
+
+
+def _build_embedding_store(graphname: str = "") -> TigerGraphEmbeddingStore:
+    """Construct a fresh ``TigerGraphEmbeddingStore`` bound to *graphname*.
+
+    Uses the live globals (``db_config`` for the connection and
+    ``embedding_service`` for the model) so the result reflects the
+    current config.
+    """
     conn = TigerGraphConnection(
         host=db_config.get("hostname", "http://tigergraph"),
         username=db_config.get("username", "tigergraph"),
         password=db_config.get("password", "tigergraph"),
         gsPort=db_config.get("gsPort", "14240"),
         restppPort=db_config.get("restppPort", "9000"),
-        graphname=db_config.get("graphname", ""),
+        graphname=graphname or db_config.get("graphname", ""),
         apiToken=db_config.get("apiToken", ""),
     )
     if not db_config.get("apiToken") and db_config.get("getToken"):
         conn.getToken()
 
-    embedding_store = TigerGraphEmbeddingStore(
+    store = TigerGraphEmbeddingStore(
         conn,
         embedding_service,
         support_ai_instance=True,
     )
-    service_status["embedding_store"] = {"status": "ok", "error": None}
+    if graphname:
+        # Runs the GDS check and per-graph vector-query install.
+        store.set_graphname(graphname)
+    return store
+
+
+def _init_embedding_store():
+    """Background thread target. Builds the default embedding store
+    without blocking module import — TigerGraph may be slow on first
+    connect, and we don't want app startup to wait on it.
+    """
+    global embedding_store
+    try:
+        embedding_store = _build_embedding_store()
+        service_status["embedding_store"] = {"status": "ok", "error": None}
+    except Exception as e:
+        service_status["embedding_store"] = {"status": "error", "error": str(e)}
+        logger.error(f"Failed to initialize embedding store: {e}")
+    finally:
+        _embedding_store_ready.set()
+
+
+def get_embedding_store(graphname: str | None = None, timeout: float = 0):
+    """Return an embedding store.
+
+    Args:
+        graphname: When supplied, returns a per-graph instance built
+            and cached on first request (each cache entry has its own
+            connection bound to *graphname* for its lifetime).
+        timeout: Seconds to wait for the default-store init when
+            *graphname* is not supplied. Default 0 (non-blocking —
+            raises immediately if still initializing).
+
+    Raises:
+        RuntimeError: if not yet ready, timed out, or initialization failed.
+    """
+    if graphname:
+        with _embedding_stores_lock:
+            cached = _embedding_stores.get(graphname)
+            if cached is not None:
+                return cached
+        # Build outside the lock so first-time setup for one graph
+        # doesn't serialize first-time setup for another.
+        store = _build_embedding_store(graphname)
+        with _embedding_stores_lock:
+            existing = _embedding_stores.get(graphname)
+            if existing is not None:
+                return existing  # racing thread won
+            _embedding_stores[graphname] = store
+            return store
+
+    if not _embedding_store_ready.wait(timeout=timeout):
+        raise RuntimeError(
+            "Embedding store is still initializing. Please try again shortly."
+        )
+    if embedding_store is None:
+        error = service_status.get("embedding_store", {}).get("error", "Unknown error")
+        raise RuntimeError(f"Embedding store failed to initialize: {error}")
+    return embedding_store
+
+
+def reset_embedding_store() -> None:
+    """Drop the per-graph cache and the default store, then re-run the
+    background init so a config reload picks up the new
+    ``embedding_service`` and ``db_config``. Callers should swap the
+    inputs before calling. No-op when ``INIT_EMBED_STORE`` is disabled
+    (e.g. ECC).
+    """
+    global embedding_store
+    if os.getenv("INIT_EMBED_STORE", "true") != "true":
+        return
+    with _embedding_stores_lock:
+        _embedding_stores.clear()
+    embedding_store = None
+    _embedding_store_ready.clear()
+    service_status["embedding_store"] = {
+        "status": "initializing",
+        "error": "Embedding store is still initializing",
+    }
+    threading.Thread(target=_init_embedding_store, daemon=True).start()
+
+
+if os.getenv("INIT_EMBED_STORE", "true") == "true":
+    threading.Thread(target=_init_embedding_store, daemon=True).start()
 
 
 def reload_llm_config(new_llm_config: dict = None):
@@ -550,6 +690,14 @@ def reload_llm_config(new_llm_config: dict = None):
                 if svc_key in new_llm_config and "region_name" not in new_llm_config[svc_key]:
                     new_llm_config[svc_key]["region_name"] = new_llm_config["region_name"]
 
+        # Inject top-level prompt_path into LLM-prompted service configs
+        # if missing. Embedding service is excluded — embedding models
+        # never load prompt files.
+        if "prompt_path" in new_llm_config:
+            for svc_key in ["completion_service", "multimodal_service", "chat_service"]:
+                if svc_key in new_llm_config and "prompt_path" not in new_llm_config[svc_key]:
+                    new_llm_config[svc_key]["prompt_path"] = new_llm_config["prompt_path"]
+
         new_completion_config = new_llm_config.get("completion_service")
         new_embedding_config = new_llm_config.get("embedding_service")
 
@@ -595,6 +743,10 @@ def reload_llm_config(new_llm_config: dict = None):
         else:
             raise Exception("Embedding service not implemented")
 
+        # Clear per-graph cache + rebuild the default so callers don't
+        # keep references to the old embedding service.
+        reset_embedding_store()
+
         return {
             "status": "success",
             "message": "LLM configuration reloaded successfully"
@@ -645,6 +797,10 @@ def reload_db_config(new_db_config: dict = None):
             del db_config[k]
         db_config.update(new_db_config)
 
+        # Clear per-graph cache + rebuild the default so callers don't
+        # keep connections bound to the old credentials.
+        reset_embedding_store()
+
         return {
             "status": "success",
             "message": "DB configuration reloaded successfully"