diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index 049ca544a..b3785a197 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -1321,6 +1321,38 @@ class ExistMemCubeIdResponse(BaseResponse[dict[str, bool]]): """Response model for checking if mem cube id exists.""" +class CreateCubeRequest(BaseRequest): + """Request model for explicitly creating a mem cube. + + Creating a cube up-front is required for multi-tenant / multi-cube + deployments: without an explicit cube registration, ``/product/add`` + will succeed (writing embeddings to the vector store) but + ``/product/search`` will return empty results because the tree + registry has no entry for the cube. See Issue #1681 for context. + """ + + cube_id: str = Field(..., description="Unique identifier for the new cube") + owner_id: str = Field( + ..., + description=( + "User ID that owns the cube. Currently used as a marker tag on the cube; " + "future versions may use it for access control." + ), + ) + cube_name: str | None = Field( + None, + description="Human-readable name. Defaults to ``cube_id`` if not provided.", + ) + + +class CreateCubeResponse(BaseResponse[dict[str, Any]]): + """Response model for creating a mem cube. + + ``data`` always contains ``cube_id`` and a ``created`` boolean. ``created`` + is ``False`` when the cube already existed (idempotent path). + """ + + class DeleteMemoryByRecordIdRequest(BaseRequest): """Request model for deleting memory by record id.""" diff --git a/src/memos/api/routers/server_router.py b/src/memos/api/routers/server_router.py index fa8a0b396..3de4557e7 100644 --- a/src/memos/api/routers/server_router.py +++ b/src/memos/api/routers/server_router.py @@ -32,6 +32,8 @@ ChatBusinessRequest, ChatPlaygroundRequest, ChatRequest, + CreateCubeRequest, + CreateCubeResponse, DeleteMemoryByRecordIdRequest, DeleteMemoryByRecordIdResponse, DeleteMemoryRequest, @@ -95,6 +97,47 @@ status_tracker = TaskStatusTracker(redis_client=redis_client) graph_db = components["graph_db"] +# Opt-in flag: when enabled, /product/add and /product/search return 404 if an +# explicit ``mem_cube_id`` / ``writable_cube_ids`` / ``readable_cube_ids`` refers +# to a cube that has not been registered via /product/create_cube. Default is +# off to preserve backward compatibility; see Issue #1681. +STRICT_CUBE_VALIDATION = os.getenv("MEMOS_STRICT_CUBE_VALIDATION", "false").lower() == "true" + + +def _cube_exists(cube_id: str) -> bool: + """Return True if the cube has been registered (or had memories written).""" + if not cube_id: + return False + try: + return bool(graph_db.exist_user_name(user_name=cube_id).get(cube_id, False)) + except Exception: + # If the registry is unavailable, fall back to allowing the request + # rather than blocking it. The underlying handler will surface the + # real error. + logger.warning("Cube existence check failed; skipping validation.", exc_info=True) + return True + + +def _validate_cube_or_404(cube_ids: list[str]) -> None: + """Raise HTTP 404 with a clear message if any cube id is missing. + + Only checks explicit cube ids — callers should skip implicit defaults + (e.g. when the request didn't set ``mem_cube_id`` and the handler is + falling back to ``user_id``). + """ + if not STRICT_CUBE_VALIDATION: + return + missing = [cid for cid in cube_ids if cid and not _cube_exists(cid)] + if missing: + raise HTTPException( + status_code=404, + detail=( + f"Cube {missing[0]!r} does not exist. Create it via " + "POST /product/create_cube or omit mem_cube_id to use the " + "default cube." + ), + ) + # ============================================================================= # Search API Endpoints @@ -107,7 +150,21 @@ def search_memories(search_req: APISearchRequest): Search memories for a specific user. This endpoint uses the class-based SearchHandler for better code organization. + + When ``MEMOS_STRICT_CUBE_VALIDATION`` is enabled, an explicit + ``mem_cube_id`` / ``readable_cube_ids`` referring to a cube that has + not been registered will return HTTP 404 instead of an empty result. """ + # Only validate explicit cube ids — when neither mem_cube_id nor + # readable_cube_ids is set, the handler implicitly falls back to user_id, + # which is the legacy default behaviour we must preserve. + explicit_cube_ids: list[str] = [] + if search_req.mem_cube_id: + explicit_cube_ids.append(search_req.mem_cube_id) + if search_req.readable_cube_ids: + explicit_cube_ids.extend(search_req.readable_cube_ids) + _validate_cube_or_404(explicit_cube_ids) + search_results = search_handler.handle_search_memories(search_req) return search_results @@ -123,7 +180,19 @@ def add_memories(add_req: APIADDRequest): Add memories for a specific user. This endpoint uses the class-based AddHandler for better code organization. + + When ``MEMOS_STRICT_CUBE_VALIDATION`` is enabled, an explicit + ``mem_cube_id`` / ``writable_cube_ids`` referring to a cube that has + not been registered will return HTTP 404 instead of silently writing + to an orphan partition. """ + explicit_cube_ids: list[str] = [] + if add_req.mem_cube_id: + explicit_cube_ids.append(add_req.mem_cube_id) + if add_req.writable_cube_ids: + explicit_cube_ids.extend(add_req.writable_cube_ids) + _validate_cube_or_404(explicit_cube_ids) + return add_handler.handle_add_memories(add_req) @@ -392,6 +461,70 @@ def exist_mem_cube_id(request: ExistMemCubeIdRequest): ) +@router.post( + "/create_cube", + summary="Create / register a mem cube", + response_model=CreateCubeResponse, +) +def create_cube(request: CreateCubeRequest): + """Explicitly create / register a mem cube. + + Server-mode HTTP API previously accepted arbitrary ``mem_cube_id`` values + on :http:post:`/product/add` but did not register the cube in the tree + registry, so subsequent :http:post:`/product/search` calls returned + empty results even though data was written to the vector store. See + Issue #1681 for context. + + This endpoint is idempotent — calling it twice with the same + ``cube_id`` returns ``created=False`` on the second call without + raising. + """ + cube_id = request.cube_id + owner_id = request.owner_id + + if not cube_id: + raise HTTPException(status_code=400, detail="cube_id must be a non-empty string") + + if hasattr(graph_db, "create_user_name"): + try: + created = bool(graph_db.create_user_name(user_name=cube_id, owner_id=owner_id)) + except Exception as e: + logger.error("Failed to create cube %s: %s", cube_id, e, exc_info=True) + raise HTTPException( + status_code=500, + detail=f"Failed to create cube {cube_id!r}: {e}", + ) from e + else: + # Backwards-compatibility shim for graph_db backends that have not + # adopted the create_user_name extension yet: treat the cube as + # registered if any memory exists for it; otherwise report + # not-implemented so integrators see a clear signal. + existing = bool(graph_db.exist_user_name(user_name=cube_id).get(cube_id, False)) + if existing: + created = False + else: + raise HTTPException( + status_code=501, + detail=( + "The active graph_db backend does not support explicit cube " + "creation. Upgrade to a backend that implements " + "create_user_name, or add a memory via /product/add to " + "implicitly register the cube." + ), + ) + + return CreateCubeResponse( + code=200, + message="Cube created" if created else "Cube already exists", + data={ + "cube_id": cube_id, + "cube_name": request.cube_name or cube_id, + "owner_id": owner_id, + "created": created, + }, + ) + + @router.post("/chat/stream/business_user", summary="Chat with MemOS for business user") def chat_stream_business_user(chat_req: ChatBusinessRequest): """(inner) Chat with MemOS for a specific business user. Returns SSE stream.""" diff --git a/src/memos/graph_dbs/neo4j.py b/src/memos/graph_dbs/neo4j.py index 56c3e08a0..0b8485773 100644 --- a/src/memos/graph_dbs/neo4j.py +++ b/src/memos/graph_dbs/neo4j.py @@ -2109,6 +2109,62 @@ def exist_user_name(self, user_name: str) -> dict[str, bool]: ) raise + def create_user_name(self, user_name: str, owner_id: str | None = None) -> bool: + """Register a cube (``user_name``) in the graph. + + Creates an idempotent marker :Memory node so that subsequent calls + to :meth:`exist_user_name` return ``True`` and so that the tree + retriever has a registered partition for the cube. The marker has + a deterministic id (``_cube_marker:``) to avoid + duplicates and is excluded from normal search by carrying a + ``node_type`` of ``"cube_marker"``. + + Args: + user_name: The cube id / user_name to register. + owner_id: Optional owner identifier stored as metadata on the + marker node. + + Returns: + bool: ``True`` if a new marker was created, ``False`` if the + cube was already registered. + """ + if not user_name: + raise ValueError("user_name must be a non-empty string") + + # Don't create a marker if any memory already exists for this cube. + if self.exist_user_name(user_name).get(user_name): + logger.info(f"[create_user_name] Cube {user_name} already exists; create is a no-op.") + return False + + marker_id = f"_cube_marker:{user_name}" + query = """ + MERGE (n:Memory {id: $id}) + ON CREATE SET + n.memory = $memory, + n.user_name = $user_name, + n.owner_id = $owner_id, + n.node_type = 'cube_marker', + n.created_at = datetime(), + n.updated_at = datetime() + RETURN n.id AS id + """ + try: + with self.driver.session(database=self.db_name) as session: + session.run( + query, + id=marker_id, + memory="", + user_name=user_name, + owner_id=owner_id or "", + ) + logger.info(f"[create_user_name] Registered cube marker for user_name {user_name}") + return True + except Exception as e: + logger.error( + f"[create_user_name] Failed to register cube {user_name}: {e}", exc_info=True + ) + raise + def delete_node_by_mem_cube_id( self, mem_cube_id: str | None = None, diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 856f94f2a..014075643 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -5044,6 +5044,53 @@ def escape_user_name(un: str) -> str: ) raise + def create_user_name(self, user_name: str, owner_id: str | None = None) -> bool: + """Register a cube (``user_name``) in the graph. + + Creates an idempotent marker Memory node so that subsequent calls to + :meth:`exist_user_name` return ``True``. The marker has a + deterministic id (``_cube_marker:``) and is tagged with + ``node_type='cube_marker'`` so it is not surfaced in regular search + results. + + Args: + user_name: The cube id / user_name to register. + owner_id: Optional owner identifier stored as metadata on the + marker node. + + Returns: + bool: ``True`` if a new marker was created, ``False`` if the + cube was already registered. + """ + if not user_name: + raise ValueError("user_name must be a non-empty string") + + if self.exist_user_name(user_name).get(user_name): + logger.info(f"[create_user_name] Cube {user_name} already exists; create is a no-op.") + return False + + marker_id = f"_cube_marker:{user_name}" + try: + # Use add_node so that connection/auth/escape logic stays in + # one place. The marker node carries ``node_type='cube_marker'`` + # so search/scroll routines can skip it. + self.add_node( + id=marker_id, + memory="", + metadata={ + "node_type": "cube_marker", + "owner_id": owner_id or "", + }, + user_name=user_name, + ) + logger.info(f"[create_user_name] Registered cube marker for user_name {user_name}") + return True + except Exception as e: + logger.error( + f"[create_user_name] Failed to register cube {user_name}: {e}", exc_info=True + ) + raise + @timed def delete_node_by_mem_cube_id( self, diff --git a/tests/api/test_server_router.py b/tests/api/test_server_router.py index 5906697d9..8014e3ed5 100644 --- a/tests/api/test_server_router.py +++ b/tests/api/test_server_router.py @@ -15,6 +15,8 @@ APIADDRequest, APIChatCompleteRequest, APISearchRequest, + CreateCubeRequest, + CreateCubeResponse, MemoryResponse, SearchResponse, SuggestionResponse, @@ -415,3 +417,198 @@ def test_get_all_response_format(self, mock_handlers, client): data = response.json() assert data["message"] == "Memories retrieved successfully" assert isinstance(data["data"], list) + + +class TestServerRouterCreateCube: + """Tests for the explicit ``POST /product/create_cube`` endpoint added + in response to Issue #1681. + + Without explicit cube creation, ``/add`` happily accepts an unknown + ``mem_cube_id`` and writes embeddings to the vector store, but + ``/search`` returns empty results because the tree registry has no + entry for the cube. The new endpoint is intentionally idempotent so + that "register before write" is a no-op for already-existing cubes. + """ + + def test_create_cube_new(self, client): + """A new cube_id reaches graph_db.create_user_name and returns created=True.""" + with patch("memos.api.routers.server_router.graph_db") as mock_graph_db: + mock_graph_db.exist_user_name.return_value = {"my_custom_cube": False} + mock_graph_db.create_user_name.return_value = True + + request_data = { + "cube_id": "my_custom_cube", + "owner_id": "user_alice", + } + response = client.post("/product/create_cube", json=request_data) + + assert response.status_code == 200 + data = response.json() + assert data["code"] == 200 + assert data["message"] == "Cube created" + assert data["data"]["cube_id"] == "my_custom_cube" + assert data["data"]["owner_id"] == "user_alice" + assert data["data"]["created"] is True + # cube_name defaults to cube_id when not provided + assert data["data"]["cube_name"] == "my_custom_cube" + + mock_graph_db.create_user_name.assert_called_once_with( + user_name="my_custom_cube", owner_id="user_alice" + ) + + def test_create_cube_idempotent(self, client): + """Calling create_cube for an existing cube returns created=False, not 409.""" + with patch("memos.api.routers.server_router.graph_db") as mock_graph_db: + mock_graph_db.create_user_name.return_value = False + + request_data = { + "cube_id": "existing_cube", + "owner_id": "user_alice", + "cube_name": "Existing Cube", + } + response = client.post("/product/create_cube", json=request_data) + + assert response.status_code == 200 + data = response.json() + assert data["message"] == "Cube already exists" + assert data["data"]["created"] is False + assert data["data"]["cube_name"] == "Existing Cube" + + def test_create_cube_missing_required_field(self, client): + """cube_id is required; missing it surfaces 422 from FastAPI.""" + response = client.post( + "/product/create_cube", + json={"owner_id": "user_alice"}, + ) + assert response.status_code == 422 + + def test_create_cube_backend_without_create_user_name(self, client): + """When the backend lacks create_user_name and cube doesn't exist, return 501. + + This guards integrators against silently treating a no-op as success + on backends that haven't shipped the new method. + """ + with patch("memos.api.routers.server_router.graph_db") as mock_graph_db: + # Simulate a backend that has exist_user_name but not create_user_name. + mock_graph_db.exist_user_name.return_value = {"new_cube": False} + # Use spec to ensure hasattr(...) returns False for create_user_name. + del mock_graph_db.create_user_name + + request_data = {"cube_id": "new_cube", "owner_id": "user_alice"} + response = client.post("/product/create_cube", json=request_data) + + assert response.status_code == 501 + + def test_create_cube_request_model_defaults(self): + """CreateCubeRequest exposes cube_name as optional and CreateCubeResponse + encodes the standard BaseResponse envelope.""" + req = CreateCubeRequest(cube_id="c1", owner_id="o1") + assert req.cube_name is None + + resp = CreateCubeResponse( + message="ok", + data={"cube_id": "c1", "created": True}, + ) + assert resp.code == 200 + assert resp.data["cube_id"] == "c1" + + +class TestServerRouterStrictCubeValidation: + """Tests for the opt-in ``MEMOS_STRICT_CUBE_VALIDATION`` mode added in + response to Issue #1681. + + By default (flag off) the existing behaviour is preserved — /add and + /search never reject requests on unknown cube_ids. When the flag is + on, explicit cube ids that don't exist in the registry return HTTP + 404 with a clear message instead of silently producing empty results. + """ + + def test_search_returns_404_when_strict_and_cube_missing(self, mock_handlers, client): + with ( + patch("memos.api.routers.server_router.STRICT_CUBE_VALIDATION", True), + patch("memos.api.routers.server_router.graph_db") as mock_graph_db, + ): + mock_graph_db.exist_user_name.return_value = {"ghost_cube": False} + + response = client.post( + "/product/search", + json={ + "query": "q", + "user_id": "u1", + "mem_cube_id": "ghost_cube", + }, + ) + assert response.status_code == 404 + assert "ghost_cube" in response.json()["detail"] + # The handler must not run when validation rejects the request. + mock_handlers["search"].handle_search_memories.assert_not_called() + + def test_add_returns_404_when_strict_and_cube_missing(self, mock_handlers, client): + with ( + patch("memos.api.routers.server_router.STRICT_CUBE_VALIDATION", True), + patch("memos.api.routers.server_router.graph_db") as mock_graph_db, + ): + mock_graph_db.exist_user_name.return_value = {"ghost_cube": False} + + response = client.post( + "/product/add", + json={ + "mem_cube_id": "ghost_cube", + "user_id": "u1", + "memory_content": "hello", + }, + ) + assert response.status_code == 404 + assert "create_cube" in response.json()["detail"] + mock_handlers["add"].handle_add_memories.assert_not_called() + + def test_add_passes_when_strict_and_cube_exists(self, mock_handlers, client): + with ( + patch("memos.api.routers.server_router.STRICT_CUBE_VALIDATION", True), + patch("memos.api.routers.server_router.graph_db") as mock_graph_db, + ): + mock_graph_db.exist_user_name.return_value = {"good_cube": True} + + response = client.post( + "/product/add", + json={ + "mem_cube_id": "good_cube", + "user_id": "u1", + "memory_content": "hello", + }, + ) + assert response.status_code == 200 + mock_handlers["add"].handle_add_memories.assert_called_once() + + def test_add_skips_validation_when_strict_but_no_explicit_cube(self, mock_handlers, client): + """If the caller did not provide an explicit cube_id, the legacy + implicit-default-to-user_id path is preserved even in strict mode.""" + with ( + patch("memos.api.routers.server_router.STRICT_CUBE_VALIDATION", True), + patch("memos.api.routers.server_router.graph_db") as mock_graph_db, + ): + response = client.post( + "/product/add", + json={"user_id": "u1", "memory_content": "hello"}, + ) + assert response.status_code == 200 + # exist_user_name should not even be consulted in this case. + mock_graph_db.exist_user_name.assert_not_called() + mock_handlers["add"].handle_add_memories.assert_called_once() + + def test_validation_disabled_by_default(self, mock_handlers, client): + """Default behaviour preserves backward compatibility — no 404 even + if the cube has never been registered.""" + with patch("memos.api.routers.server_router.graph_db") as mock_graph_db: + mock_graph_db.exist_user_name.return_value = {"ghost_cube": False} + + response = client.post( + "/product/add", + json={ + "mem_cube_id": "ghost_cube", + "user_id": "u1", + "memory_content": "hello", + }, + ) + assert response.status_code == 200 + mock_handlers["add"].handle_add_memories.assert_called_once() diff --git a/tests/graph_dbs/test_create_user_name.py b/tests/graph_dbs/test_create_user_name.py new file mode 100644 index 000000000..2fe80d980 --- /dev/null +++ b/tests/graph_dbs/test_create_user_name.py @@ -0,0 +1,88 @@ +"""Tests for the ``create_user_name`` cube-registration helper added to +Neo4jGraphDB and PolarDBGraphDB in response to Issue #1681. + +These tests use mocked drivers/connections — they intentionally avoid +spinning up a real Neo4j or PolarDB instance. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from memos.configs.graph_db import Neo4jGraphDBConfig + + +@pytest.fixture +def shared_db_config(): + return Neo4jGraphDBConfig( + uri="bolt://localhost:7687", + user="neo4j", + password="test", + db_name="test_db", + auto_create=False, + use_multi_db=False, + user_name="default_user", + embedding_dimension=3, + ) + + +@pytest.fixture +def neo4j_db(shared_db_config): + with patch("neo4j.GraphDatabase") as mock_gd: + mock_driver = MagicMock() + mock_gd.driver.return_value = mock_driver + from memos.graph_dbs.neo4j import Neo4jGraphDB + + db = Neo4jGraphDB(shared_db_config) + db.driver = mock_driver + yield db + + +class TestNeo4jCreateUserName: + """Cube registration is idempotent and skips work when the cube exists.""" + + def test_returns_false_when_cube_already_exists(self, neo4j_db): + """Pre-existing user_name should short-circuit without issuing a CREATE.""" + with patch.object( + neo4j_db, "exist_user_name", return_value={"alice_cube": True} + ) as mock_exist: + created = neo4j_db.create_user_name("alice_cube", owner_id="alice") + + assert created is False + mock_exist.assert_called_once_with("alice_cube") + # The driver should not be touched when the cube already exists. + neo4j_db.driver.session.assert_not_called() + + def test_creates_marker_node_when_cube_missing(self, neo4j_db): + """First-time registration emits a deterministic marker id and reports True.""" + mock_session = MagicMock() + # Make `with self.driver.session(...) as session:` yield mock_session. + neo4j_db.driver.session.return_value.__enter__.return_value = mock_session + + with patch.object(neo4j_db, "exist_user_name", return_value={"alice_cube": False}): + created = neo4j_db.create_user_name("alice_cube", owner_id="alice") + + assert created is True + mock_session.run.assert_called_once() + kwargs = mock_session.run.call_args.kwargs + # The marker id is deterministic so repeated calls MERGE on the same node. + assert kwargs["id"] == "_cube_marker:alice_cube" + assert kwargs["user_name"] == "alice_cube" + assert kwargs["owner_id"] == "alice" + + def test_rejects_empty_user_name(self, neo4j_db): + """An empty string is never a valid cube id — fail loudly.""" + with pytest.raises(ValueError): + neo4j_db.create_user_name("") + + def test_owner_id_defaults_to_empty_string(self, neo4j_db): + """owner_id is optional; missing values become an empty string on the marker.""" + mock_session = MagicMock() + neo4j_db.driver.session.return_value.__enter__.return_value = mock_session + + with patch.object(neo4j_db, "exist_user_name", return_value={"orphan_cube": False}): + created = neo4j_db.create_user_name("orphan_cube") + + assert created is True + kwargs = mock_session.run.call_args.kwargs + assert kwargs["owner_id"] == ""