NVIDIA · mimran-khan · Jun 22, 2026
diff --git a/src/skillspector/nodes/report.py b/src/skillspector/nodes/report.py
@@ -267,6 +267,54 @@ def _build_metadata(has_executable_scripts: bool, use_llm: bool) -> dict[str, ob
     return meta
 
 
+def _build_analysis_completeness(
+    components: list[str],
+    file_cache: dict[str, str],
+    use_llm: bool,
+    findings_pre_filter: list[Finding],
+    findings_post_filter: list[Finding],
+) -> dict[str, object]:
+    """Build analysis_completeness section indicating scan coverage and limitations.
+
+    Helps consumers understand what was NOT analyzed and whether findings
+    can be trusted as comprehensive.
+    """
+    total_components = len(components)
+    scanned_components = sum(1 for c in components if c in file_cache)
+
+    llm_available, llm_error = is_llm_available()
+    llm_used = use_llm and llm_available
+
+    limitations: list[str] = []
+    if scanned_components < total_components:
+        skipped = total_components - scanned_components
+        limitations.append(f"{skipped} component(s) had no content in file_cache (skipped)")
+    if use_llm and not llm_available:
+        limitations.append(f"LLM meta-analysis unavailable: {llm_error or 'unknown reason'}")
+    if not use_llm:
+        limitations.append("LLM meta-analysis was disabled (--no-llm)")
+
+    findings_dropped = len(findings_pre_filter) - len(findings_post_filter)
+    if findings_dropped > 0:
+        limitations.append(
+            f"{findings_dropped} finding(s) filtered by meta-analyzer or heuristics"
+        )
+
+    completeness: dict[str, object] = {
+        "total_components": total_components,
+        "scanned_components": scanned_components,
+        "coverage_percent": round(scanned_components / total_components * 100, 1)
+        if total_components > 0
+        else 100.0,
+        "llm_analysis": "applied" if llm_used else "skipped",
+        "findings_before_filtering": len(findings_pre_filter),
+        "findings_after_filtering": len(findings_post_filter),
+        "limitations": limitations if limitations else None,
+        "is_complete": len(limitations) == 0,
+    }
+    return completeness
+
+
 def _format_json(
     findings: list[Finding],
     component_metadata: list[dict[str, object]],
@@ -277,6 +325,7 @@ def _format_json(
     risk_recommendation: str,
     has_executable_scripts: bool,
     use_llm: bool = True,
+    analysis_completeness: dict[str, object] | None = None,
 ) -> str:
     """Generate JSON report string."""
     skill_name = (manifest.get("name") or "unknown") if manifest else "unknown"
@@ -304,6 +353,8 @@ def _format_json(
         "issues": [f.to_dict() for f in findings],
         "metadata": _build_metadata(has_executable_scripts, use_llm),
     }
+    if analysis_completeness is not None:
+        data["analysis_completeness"] = analysis_completeness
     return json.dumps(data, indent=2)
 
 
@@ -377,12 +428,14 @@ def _format_markdown(
 def report(state: SkillspectorState) -> dict[str, object]:
     """Generate SARIF, compute risk score, and set report_body from output_format."""
     findings = state.get("filtered_findings", state.get("findings", []))
-    # When use_llm is False, meta_analyzer is skipped; ensure final state has filtered_findings
+    raw_findings = state.get("findings", [])
     if "filtered_findings" not in state:
         filtered_findings = state.get("findings", [])
     else:
         filtered_findings = findings
     component_metadata = state.get("component_metadata") or []
+    components = state.get("components") or []
+    file_cache = state.get("file_cache") or {}
     has_executable_scripts = state.get("has_executable_scripts", False)
     manifest = state.get("manifest") or {}
     skill_path = state.get("skill_path")
@@ -393,6 +446,9 @@ def report(state: SkillspectorState) -> dict[str, object]:
         findings, has_executable_scripts
     )
     sarif_report = _build_sarif(findings)
+    analysis_completeness = _build_analysis_completeness(
+        components, file_cache, use_llm, raw_findings, filtered_findings
+    )
 
     if output_format == "terminal":
         report_body = _format_terminal(
@@ -416,6 +472,7 @@ def report(state: SkillspectorState) -> dict[str, object]:
             risk_recommendation,
             has_executable_scripts,
             use_llm=use_llm,
+            analysis_completeness=analysis_completeness,
         )
     elif output_format == "markdown":
         report_body = _format_markdown(

diff --git a/tests/nodes/test_analysis_completeness.py b/tests/nodes/test_analysis_completeness.py
@@ -0,0 +1,172 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for analysis_completeness field in report output."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import patch
+
+import pytest
+
+from skillspector.models import Finding
+from skillspector.nodes.report import _build_analysis_completeness, report
+
+
+def _make_finding(**kwargs) -> Finding:
+    defaults = {
+        "rule_id": "PE3",
+        "message": "Credential Access",
+        "severity": "HIGH",
+        "confidence": 0.9,
+        "file": "tool.py",
+        "start_line": 1,
+        "end_line": 1,
+        "remediation": "Remove",
+        "tags": ["test"],
+        "context": "ctx",
+        "matched_text": "match",
+        "category": "priv_esc",
+        "pattern": "PE3",
+        "finding": "snippet",
+        "explanation": "explain",
+        "code_snippet": "code",
+        "intent": None,
+    }
+    defaults.update(kwargs)
+    return Finding(**defaults)
+
+
+class TestBuildAnalysisCompleteness:
+    """_build_analysis_completeness produces correct coverage metadata."""
+
+    def test_full_coverage_complete(self) -> None:
+        components = ["a.py", "b.py"]
+        file_cache = {"a.py": "code", "b.py": "code"}
+        findings = [_make_finding()]
+        with patch("skillspector.nodes.report.is_llm_available", return_value=(True, None)):
+            result = _build_analysis_completeness(
+                components, file_cache, use_llm=True,
+                findings_pre_filter=findings, findings_post_filter=findings,
+            )
+        assert result["total_components"] == 2
+        assert result["scanned_components"] == 2
+        assert result["coverage_percent"] == 100.0
+        assert result["llm_analysis"] == "applied"
+        assert result["is_complete"] is True
+        assert result["limitations"] is None
+
+    def test_partial_coverage_reports_skipped(self) -> None:
+        components = ["a.py", "b.py", "c.py"]
+        file_cache = {"a.py": "code"}
+        with patch("skillspector.nodes.report.is_llm_available", return_value=(True, None)):
+            result = _build_analysis_completeness(
+                components, file_cache, use_llm=True,
+                findings_pre_filter=[], findings_post_filter=[],
+            )
+        assert result["total_components"] == 3
+        assert result["scanned_components"] == 1
+        assert result["coverage_percent"] == pytest.approx(33.3, abs=0.1)
+        assert result["is_complete"] is False
+        assert any("2 component(s)" in lim for lim in result["limitations"])
+
+    def test_llm_unavailable_noted(self) -> None:
+        with patch(
+            "skillspector.nodes.report.is_llm_available",
+            return_value=(False, "OPENAI_API_KEY not set"),
+        ):
+            result = _build_analysis_completeness(
+                ["a.py"], {"a.py": "code"}, use_llm=True,
+                findings_pre_filter=[], findings_post_filter=[],
+            )
+        assert result["llm_analysis"] == "skipped"
+        assert result["is_complete"] is False
+        assert any("LLM meta-analysis unavailable" in lim for lim in result["limitations"])
+
+    def test_llm_disabled_noted(self) -> None:
+        with patch("skillspector.nodes.report.is_llm_available", return_value=(True, None)):
+            result = _build_analysis_completeness(
+                ["a.py"], {"a.py": "code"}, use_llm=False,
+                findings_pre_filter=[], findings_post_filter=[],
+            )
+        assert result["llm_analysis"] == "skipped"
+        assert result["is_complete"] is False
+        assert any("--no-llm" in lim for lim in result["limitations"])
+
+    def test_findings_filtered_noted(self) -> None:
+        pre = [_make_finding(), _make_finding(), _make_finding()]
+        post = [_make_finding()]
+        with patch("skillspector.nodes.report.is_llm_available", return_value=(True, None)):
+            result = _build_analysis_completeness(
+                ["a.py"], {"a.py": "code"}, use_llm=True,
+                findings_pre_filter=pre, findings_post_filter=post,
+            )
+        assert result["findings_before_filtering"] == 3
+        assert result["findings_after_filtering"] == 1
+        assert any("2 finding(s) filtered" in lim for lim in result["limitations"])
+
+    def test_empty_components_gives_100_coverage(self) -> None:
+        with patch("skillspector.nodes.report.is_llm_available", return_value=(True, None)):
+            result = _build_analysis_completeness(
+                [], {}, use_llm=True,
+                findings_pre_filter=[], findings_post_filter=[],
+            )
+        assert result["coverage_percent"] == 100.0
+        assert result["total_components"] == 0
+
+
+class TestCompletenessInJsonReport:
+    """analysis_completeness field appears in JSON report output."""
+
+    @patch("skillspector.nodes.report.is_llm_available", return_value=(True, None))
+    def test_json_report_includes_completeness(self, _mock_llm) -> None:
+        state = {
+            "findings": [_make_finding()],
+            "filtered_findings": [_make_finding()],
+            "components": ["tool.py"],
+            "file_cache": {"tool.py": "import os"},
+            "component_metadata": [{"path": "tool.py", "type": "python", "lines": 1}],
+            "has_executable_scripts": False,
+            "manifest": {"name": "test-skill"},
+            "skill_path": "/tmp/skill",
+            "output_format": "json",
+            "use_llm": True,
+        }
+        result = report(state)
+        body = json.loads(result["report_body"])
+        assert "analysis_completeness" in body
+        assert body["analysis_completeness"]["total_components"] == 1
+        assert body["analysis_completeness"]["scanned_components"] == 1
+        assert body["analysis_completeness"]["coverage_percent"] == 100.0
+
+    @patch("skillspector.nodes.report.is_llm_available", return_value=(True, None))
+    def test_sarif_format_does_not_include_completeness(self, _mock_llm) -> None:
+        state = {
+            "findings": [_make_finding()],
+            "filtered_findings": [_make_finding()],
+            "components": ["tool.py"],
+            "file_cache": {"tool.py": "import os"},
+            "component_metadata": [],
+            "has_executable_scripts": False,
+            "manifest": {},
+            "skill_path": None,
+            "output_format": "sarif",
+            "use_llm": True,
+        }
+        result = report(state)
+        body = json.loads(result["report_body"])
+        assert "analysis_completeness" not in body
+        assert "$schema" in body