diff --git a/src/skillspector/nodes/report.py b/src/skillspector/nodes/report.py
index da32dac..3169c8f 100644
--- a/src/skillspector/nodes/report.py
+++ b/src/skillspector/nodes/report.py
@@ -94,13 +94,26 @@ def _compute_risk_score(
     a quarter. Occurrences beyond the third are ignored for scoring purposes.
     This prevents repeated pattern matches from inflating the score unboundedly.
 
+    Each finding's contribution is also scaled by its confidence value (clamped
+    to [0, 1]). Findings with confidence <= 0 are skipped entirely — they do not
+    contribute to the score but remain in the reported findings list.
+
+    Within each rule_id bucket, findings are processed in severity-descending
+    order so that the highest-severity occurrence always receives the full weight.
+
     Base points per severity: CRITICAL=50, HIGH=25, MEDIUM=10, LOW=5.
     Multiplier: 1.3x if has_executable_scripts.
     """
+    severity_rank = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
+    sorted_findings = sorted(
+        findings,
+        key=lambda f: (f.rule_id or "UNKNOWN", severity_rank.get((f.severity or "LOW").upper(), 4)),
+    )
+
     rule_occurrence_count: dict[str, int] = {}
     score = 0.0
 
-    for f in findings:
+    for f in sorted_findings:
         confidence = max(0.0, min(1.0, f.confidence))
         if confidence <= 0.0:
             continue
diff --git a/tests/nodes/test_report.py b/tests/nodes/test_report.py
index 4f45443..1cf3e0e 100644
--- a/tests/nodes/test_report.py
+++ b/tests/nodes/test_report.py
@@ -205,6 +205,21 @@ def test_same_rule_mixed_severities(self) -> None:
         # First TM1: 50*1.0, second TM1: 5*0.5 = 2.5 -> total 52.5 -> 52
         assert score == 52
 
+    def test_same_rule_low_before_critical_sorted_correctly(self) -> None:
+        """LOW before CRITICAL in input order must still score as if CRITICAL came first.
+
+        Without severity sorting, LOW gets the full weight (5*1.0=5) and CRITICAL
+        gets the diminished weight (50*0.5=25), yielding 30. With sorting, CRITICAL
+        gets full weight (50*1.0=50) and LOW gets diminished (5*0.5=2.5), yielding 52.
+        """
+        findings = [
+            _finding("TM1", "LOW", confidence=1.0),
+            _finding("TM1", "CRITICAL", confidence=1.0),
+        ]
+        score, _, _ = _compute_risk_score(findings, False)
+        # Sorted: CRITICAL first (50*1.0) + LOW second (5*0.5=2.5) = 52.5 -> 52
+        assert score == 52
+
     def test_exact_band_boundary_21_is_medium(self) -> None:
         findings = [
             _finding("R1", "MEDIUM", confidence=1.0),