From 154c1e857661b9025a512845c205b8d3b5c94cfb Mon Sep 17 00:00:00 2001 From: mimran-khan Date: Tue, 23 Jun 2026 01:22:13 +0530 Subject: [PATCH 1/2] fix(scoring): document confidence scaling, sort by severity within rule bucket Addresses non-blocking review feedback: 1. Docstring now explicitly documents that each finding's contribution is scaled by its confidence value, and that zero-confidence findings are skipped without consuming a weight slot. 2. Findings are now sorted by (rule_id, severity DESC) before applying diminishing weights, ensuring the highest-severity occurrence of each rule always receives the full weight regardless of input order. --- src/skillspector/nodes/report.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/skillspector/nodes/report.py b/src/skillspector/nodes/report.py index da32dac..3169c8f 100644 --- a/src/skillspector/nodes/report.py +++ b/src/skillspector/nodes/report.py @@ -94,13 +94,26 @@ def _compute_risk_score( a quarter. Occurrences beyond the third are ignored for scoring purposes. This prevents repeated pattern matches from inflating the score unboundedly. + Each finding's contribution is also scaled by its confidence value (clamped + to [0, 1]). Findings with confidence <= 0 are skipped entirely — they do not + contribute to the score but remain in the reported findings list. + + Within each rule_id bucket, findings are processed in severity-descending + order so that the highest-severity occurrence always receives the full weight. + Base points per severity: CRITICAL=50, HIGH=25, MEDIUM=10, LOW=5. Multiplier: 1.3x if has_executable_scripts. """ + severity_rank = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3} + sorted_findings = sorted( + findings, + key=lambda f: (f.rule_id or "UNKNOWN", severity_rank.get((f.severity or "LOW").upper(), 4)), + ) + rule_occurrence_count: dict[str, int] = {} score = 0.0 - for f in findings: + for f in sorted_findings: confidence = max(0.0, min(1.0, f.confidence)) if confidence <= 0.0: continue From 4d50828b7a6dd074da39012885e9c98363621da3 Mon Sep 17 00:00:00 2001 From: mimran-khan Date: Tue, 23 Jun 2026 15:05:11 +0530 Subject: [PATCH 2/2] test(scoring): add regression test for input-order-dependent severity sort Add test_same_rule_low_before_critical_sorted_correctly that feeds [LOW, CRITICAL] for the same rule_id. Without the severity sort the score is 30 (LOW gets full weight); with it the score is 52 (CRITICAL gets full weight). This locks in the fix and guards against regression. Addresses review feedback from @keshprad and @rng1995. Signed-off-by: mimran-khan --- tests/nodes/test_report.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/nodes/test_report.py b/tests/nodes/test_report.py index 4f45443..1cf3e0e 100644 --- a/tests/nodes/test_report.py +++ b/tests/nodes/test_report.py @@ -205,6 +205,21 @@ def test_same_rule_mixed_severities(self) -> None: # First TM1: 50*1.0, second TM1: 5*0.5 = 2.5 -> total 52.5 -> 52 assert score == 52 + def test_same_rule_low_before_critical_sorted_correctly(self) -> None: + """LOW before CRITICAL in input order must still score as if CRITICAL came first. + + Without severity sorting, LOW gets the full weight (5*1.0=5) and CRITICAL + gets the diminished weight (50*0.5=25), yielding 30. With sorting, CRITICAL + gets full weight (50*1.0=50) and LOW gets diminished (5*0.5=2.5), yielding 52. + """ + findings = [ + _finding("TM1", "LOW", confidence=1.0), + _finding("TM1", "CRITICAL", confidence=1.0), + ] + score, _, _ = _compute_risk_score(findings, False) + # Sorted: CRITICAL first (50*1.0) + LOW second (5*0.5=2.5) = 52.5 -> 52 + assert score == 52 + def test_exact_band_boundary_21_is_medium(self) -> None: findings = [ _finding("R1", "MEDIUM", confidence=1.0),