From d3990fea29dc650fb7cd43f153a357a56bd9f43b Mon Sep 17 00:00:00 2001 From: Lalit Shrotriya Date: Thu, 18 Jun 2026 10:21:21 +0000 Subject: [PATCH] fix(schemas): normalize confidence from 0-100 scale before Pydantic validation LLMFinding and MetaAnalyzerFinding both hard-fail with le=1.0 when Ollama (or other local models) return confidence as an integer on a 0-100 scale. Add a mode="before" field_validator that: converts to float, divides by 100 if the value exceeds 1.0, then clamps to [0.0, 1.0]. This also handles negative values and values above 100 gracefully rather than crashing the meta-analyzer for the entire file. Closes #89 Signed-off-by: Lalit Shrotriya --- src/skillspector/llm_analyzer_base.py | 11 +++++- src/skillspector/nodes/meta_analyzer.py | 10 ++++++ tests/nodes/test_llm_analyzer_base.py | 45 ++++++++++++++++++++++--- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/skillspector/llm_analyzer_base.py b/src/skillspector/llm_analyzer_base.py index aa3e7e9a..b4a99776 100644 --- a/src/skillspector/llm_analyzer_base.py +++ b/src/skillspector/llm_analyzer_base.py @@ -33,7 +33,7 @@ from typing import Literal from langchain_core.messages import BaseMessage -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator from skillspector.llm_utils import get_chat_model from skillspector.logging_config import get_logger @@ -68,6 +68,15 @@ class LLMFinding(BaseModel): explanation: str = Field(default="", description="Why this is a finding (2-3 sentences)") remediation: str = Field(default="", description="Actionable steps to fix the issue") + @field_validator("confidence", mode="before") + @classmethod + def _normalize_confidence(cls, v: object) -> float: + """Accept 0-100 scale (e.g. from Ollama) and normalize to [0, 1].""" + v = float(v) # raises TypeError/ValueError for non-numeric inputs + if v > 1.0: + v = v / 100.0 + return max(0.0, min(1.0, v)) + def to_finding(self, file: str) -> Finding: """Convert to a :class:`Finding` for the graph state.""" return Finding( diff --git a/src/skillspector/nodes/meta_analyzer.py b/src/skillspector/nodes/meta_analyzer.py index 8f2b5410..3397f097 100644 --- a/src/skillspector/nodes/meta_analyzer.py +++ b/src/skillspector/nodes/meta_analyzer.py @@ -64,6 +64,16 @@ class MetaAnalyzerFinding(BaseModel): ) is_vulnerability: bool = Field(description="Whether this is a true vulnerability") confidence: float = Field(ge=0.0, le=1.0, description="Confidence score between 0.0 and 1.0") + + @field_validator("confidence", mode="before") + @classmethod + def _normalize_confidence(cls, v: object) -> float: + """Accept 0-100 scale (e.g. from Ollama) and normalize to [0, 1].""" + v = float(v) # raises TypeError/ValueError for non-numeric inputs + if v > 1.0: + v = v / 100.0 + return max(0.0, min(1.0, v)) + intent: Literal["malicious", "negligent", "benign"] = Field( description="Likely intent behind the finding" ) diff --git a/tests/nodes/test_llm_analyzer_base.py b/tests/nodes/test_llm_analyzer_base.py index c1fabca5..4a8f1ea7 100644 --- a/tests/nodes/test_llm_analyzer_base.py +++ b/tests/nodes/test_llm_analyzer_base.py @@ -566,14 +566,32 @@ def test_valid_finding(self) -> None: assert len(result.findings) == 1 assert result.findings[0].confidence == 0.9 + def test_confidence_100_scale_normalized(self) -> None: + """Ollama and some models return confidence on 0-100 scale; must be normalized.""" + f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=100) + assert f.confidence == pytest.approx(1.0) + + def test_confidence_85_scale_normalized(self) -> None: + f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=85) + assert f.confidence == pytest.approx(0.85) + + def test_confidence_negative_clamped_to_zero(self) -> None: + f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=-10) + assert f.confidence == pytest.approx(0.0) + + def test_confidence_overlarge_clamped_to_one(self) -> None: + """Values > 100 (e.g. 150) are divided then clamped.""" + f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=150) + assert f.confidence == pytest.approx(1.0) + def test_confidence_validation(self) -> None: - with pytest.raises(ValueError): + with pytest.raises((ValueError, TypeError)): LLMFinding( rule_id="X", message="x", severity="LOW", start_line=1, - confidence=1.5, + confidence="not-a-number", ) def test_severity_validation(self) -> None: @@ -660,12 +678,31 @@ def test_valid_finding(self) -> None: assert len(result.findings) == 1 assert result.findings[0].confidence == 0.9 + def test_confidence_100_scale_normalized(self) -> None: + """Ollama-style 0-100 scale must be normalized to 0-1.""" + f = MetaAnalyzerFinding( + pattern_id="E1", is_vulnerability=True, confidence=100, intent="malicious", impact="high" + ) + assert f.confidence == pytest.approx(1.0) + + def test_confidence_75_scale_normalized(self) -> None: + f = MetaAnalyzerFinding( + pattern_id="E1", is_vulnerability=True, confidence=75, intent="malicious", impact="high" + ) + assert f.confidence == pytest.approx(0.75) + + def test_confidence_negative_clamped(self) -> None: + f = MetaAnalyzerFinding( + pattern_id="E1", is_vulnerability=True, confidence=-5, intent="malicious", impact="high" + ) + assert f.confidence == pytest.approx(0.0) + def test_confidence_validation(self) -> None: - with pytest.raises(ValueError): + with pytest.raises((ValueError, TypeError)): MetaAnalyzerFinding( pattern_id="E1", is_vulnerability=True, - confidence=1.5, + confidence="bad", intent="malicious", impact="high", )