Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/skillspector/llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,14 @@ def _clamp_start_line(cls, v: int) -> int:
# dropping the finding over an off-by-one.
return v if v >= 1 else 1

@field_validator("confidence")
@field_validator("confidence", mode="before")
@classmethod
def _clamp_confidence(cls, v: float) -> float:
# Clamp into [0.0, 1.0] so a slightly out-of-range model value
# normalises instead of failing the structured-output parse.
return min(1.0, max(0.0, v))
def _normalize_confidence(cls, v: object) -> float:
"""Accept 0-100 scale (e.g. from Ollama) and normalize to [0, 1]."""
v = float(v) # raises TypeError/ValueError for non-numeric inputs
if v > 1.0:
v = v / 100.0
return max(0.0, min(1.0, v))

def to_finding(self, file: str) -> Finding:
"""Convert to a :class:`Finding` for the graph state."""
Expand Down
12 changes: 7 additions & 5 deletions src/skillspector/nodes/meta_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,14 @@ class MetaAnalyzerFinding(BaseModel):
explanation: str = Field(default="", description="Why this is dangerous (2-3 sentences)")
remediation: str = Field(default="", description="How to fix the issue (actionable steps)")

@field_validator("confidence")
@field_validator("confidence", mode="before")
@classmethod
def _clamp_confidence(cls, v: float) -> float:
# Clamp into [0.0, 1.0] so a slightly out-of-range model value
# normalises instead of failing the structured-output parse.
return min(1.0, max(0.0, v))
def _normalize_confidence(cls, v: object) -> float:
"""Accept 0-100 scale (e.g. from Ollama) and normalize to [0, 1]."""
v = float(v) # raises TypeError/ValueError for non-numeric inputs
if v > 1.0:
v = v / 100.0
return max(0.0, min(1.0, v))


class OverallAssessment(BaseModel):
Expand Down
85 changes: 58 additions & 27 deletions tests/nodes/test_llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,13 +601,33 @@ def test_valid_finding(self) -> None:
assert len(result.findings) == 1
assert result.findings[0].confidence == 0.9

def test_confidence_is_clamped(self) -> None:
"""Out-of-range confidence is clamped, not rejected, so a slightly off
model value does not fail the whole structured-output parse."""
hi = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=1.5)
lo = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=-0.3)
assert hi.confidence == 1.0
assert lo.confidence == 0.0
def test_confidence_100_scale_normalized(self) -> None:
"""Ollama and some models return confidence on 0-100 scale; must be normalized."""
f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=100)
assert f.confidence == pytest.approx(1.0)

def test_confidence_85_scale_normalized(self) -> None:
f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=85)
assert f.confidence == pytest.approx(0.85)

def test_confidence_negative_clamped_to_zero(self) -> None:
f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=-10)
assert f.confidence == pytest.approx(0.0)

def test_confidence_overlarge_clamped_to_one(self) -> None:
"""Values > 100 (e.g. 150) are divided then clamped."""
f = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=150)
assert f.confidence == pytest.approx(1.0)

def test_confidence_validation(self) -> None:
with pytest.raises((ValueError, TypeError)):
LLMFinding(
rule_id="X",
message="x",
severity="LOW",
start_line=1,
confidence="not-a-number",
)

def test_severity_validation(self) -> None:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -693,25 +713,34 @@ def test_valid_finding(self) -> None:
assert len(result.findings) == 1
assert result.findings[0].confidence == 0.9

def test_confidence_is_clamped(self) -> None:
"""Out-of-range confidence is clamped, not rejected, so a slightly off
model value does not fail the whole structured-output parse."""
high = MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=1.5,
intent="malicious",
impact="high",
def test_confidence_100_scale_normalized(self) -> None:
"""Ollama-style 0-100 scale must be normalized to 0-1."""
f = MetaAnalyzerFinding(
pattern_id="E1", is_vulnerability=True, confidence=100, intent="malicious", impact="high"
)
low = MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=-0.2,
intent="malicious",
impact="high",
assert f.confidence == pytest.approx(1.0)

def test_confidence_75_scale_normalized(self) -> None:
f = MetaAnalyzerFinding(
pattern_id="E1", is_vulnerability=True, confidence=75, intent="malicious", impact="high"
)
assert f.confidence == pytest.approx(0.75)

def test_confidence_negative_clamped(self) -> None:
f = MetaAnalyzerFinding(
pattern_id="E1", is_vulnerability=True, confidence=-5, intent="malicious", impact="high"
)
assert high.confidence == 1.0
assert low.confidence == 0.0
assert f.confidence == pytest.approx(0.0)

def test_confidence_validation(self) -> None:
with pytest.raises((ValueError, TypeError)):
MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence="bad",
intent="malicious",
impact="high",
)

def test_intent_validation(self) -> None:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -793,10 +822,12 @@ def test_llm_finding_schema_has_no_numeric_bounds(self) -> None:
def test_meta_finding_schema_has_no_numeric_bounds(self) -> None:
assert self._numeric_keywords(MetaAnalyzerFinding.model_json_schema()) == set()

def test_llm_finding_clamps_confidence(self) -> None:
hi = LLMFinding(rule_id="R", message="m", severity="LOW", start_line=1, confidence=1.5)
def test_llm_finding_normalizes_confidence(self) -> None:
# Values > 1.0 are treated as 0-100 scale and rescaled: 85 → 0.85
hi = LLMFinding(rule_id="R", message="m", severity="LOW", start_line=1, confidence=85)
# Negative values are clamped to 0.0
lo = LLMFinding(rule_id="R", message="m", severity="LOW", start_line=1, confidence=-0.3)
assert hi.confidence == 1.0
assert hi.confidence == pytest.approx(0.85)
assert lo.confidence == 0.0

def test_llm_finding_clamps_start_line(self) -> None:
Expand Down