NVIDIA · keshprad · Jun 24, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 23, 2026
diff --git a/src/skillspector/nodes/analyzers/static_patterns_memory_poisoning.py b/src/skillspector/nodes/analyzers/static_patterns_memory_poisoning.py
@@ -81,7 +81,7 @@
 
 # MP2: Context Window Stuffing — filling context to displace content
 MP2_PATTERNS = [
-    (r"((\S)(?!\2).{1,19}?)\1{20,}", 0.8),
+    (r"(.{2,20}?)\1{20,}", 0.8),
     (
         r"(?:repeat|duplicate|echo)\s+(?:this|the\s+following)\s+(?:\d{3,}|many|hundreds?|thousands?)\s+times?",
         0.85,
@@ -182,6 +182,10 @@ def ctx(start: int) -> str:
             )
     for pattern, confidence in MP2_PATTERNS:
         for match in re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE):
+            captured = match.group(1) if match.lastindex else match.group(0)
+            non_ws_chars = set(captured) - {" ", "\t", "\n", "\r"}
+            if len(non_ws_chars) <= 1 and not any(c in captured for c in (" ", "\t")):
+                continue
             line_num = get_line_number(content, match.start())
             findings.append(
                 AnalyzerFinding(

diff --git a/tests/nodes/analyzers/test_mp2_regex_backtracking.py b/tests/nodes/analyzers/test_mp2_regex_backtracking.py
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for MP2 regex pattern — catastrophic backtracking prevention."""
+
+from __future__ import annotations
+
+import time
+
+import pytest
+
+from skillspector.nodes.analyzers import static_patterns_memory_poisoning as mp_module
+
+
+class TestMP2DetectsStuffing:
+    """MP2 correctly detects repeated content (context window stuffing)."""
+
+    def test_repeated_phrase_detected(self) -> None:
+        """A multi-char phrase repeated 25+ times triggers MP2."""
+        content = "ABCD" * 30
+        findings = mp_module.analyze(content, "test.md", "markdown")
+        mp2 = [f for f in findings if f.rule_id == "MP2"]
+        assert len(mp2) >= 1
+
+    def test_repeated_short_phrase_detected(self) -> None:
+        """A short multi-char phrase repeated 25+ times triggers MP2."""
+        content = "DEADBEEF_PAYLOAD" * 25
+        findings = mp_module.analyze(content, "payload.md", "markdown")
+        mp2 = [f for f in findings if f.rule_id == "MP2"]
+        assert len(mp2) >= 1
+
+    def test_short_repetition_not_detected(self) -> None:
+        """Under 20 repetitions should not trigger the repetition pattern."""
+        content = "hello world. " * 5
+        findings = mp_module.analyze(content, "normal.md", "markdown")
+        mp2_repetition = [
+            f for f in findings
+            if f.rule_id == "MP2" and "Context Window Stuffing" in f.message
+        ]
+        assert len(mp2_repetition) == 0
+
+    def test_separator_line_not_detected(self) -> None:
+        """Single-char separators like '=' * 80 should be suppressed."""
+        content = "=" * 80
+        findings = mp_module.analyze(content, "readme.md", "markdown")
+        mp2 = [f for f in findings if f.rule_id == "MP2"]
+        assert len(mp2) == 0
+
+    def test_whitespace_bearing_stuffing_detected(self) -> None:
+        """Repeated tokens containing whitespace (e.g. 'x ' * 30) must not be suppressed."""
+        content = "x " * 30
+        findings = mp_module.analyze(content, "payload.md", "markdown")
+        mp2 = [f for f in findings if f.rule_id == "MP2"]
+        assert len(mp2) >= 1, "Whitespace-bearing stuffing should be detected, not suppressed"
+
+
+class TestMP2NoBacktracking:
+    """MP2 regex completes in bounded time on adversarial inputs."""
+
+    @pytest.mark.timeout(5)
+    def test_non_matching_random_input_completes_fast(self) -> None:
+        """Non-repeating input of moderate size should complete within 5 seconds.
+
+        The old regex with nested lazy quantifier and backreference would hang
+        on non-matching inputs due to catastrophic backtracking.
+        """
+        content = "".join(chr(65 + (i % 26)) for i in range(2000))
+        start = time.monotonic()
+        mp_module.analyze(content, "adversarial.txt", "text")
+        elapsed = time.monotonic() - start
+        assert elapsed < 5.0, f"MP2 regex took {elapsed:.1f}s — possible backtracking"
+
+    @pytest.mark.timeout(5)
+    def test_near_miss_pattern_completes_fast(self) -> None:
+        """Input with almost-repeating but not-quite structure completes quickly.
+
+        This is the classic ReDoS vector: content that almost matches but
+        requires the regex engine to explore many backtracking paths.
+        """
+        content = ("abcdefghij" * 19) + "abcdefghiX" + ("abcdefghij" * 5)
+        start = time.monotonic()
+        mp_module.analyze(content, "nearmiss.txt", "text")
+        elapsed = time.monotonic() - start
+        assert elapsed < 5.0, f"MP2 regex took {elapsed:.1f}s — possible backtracking"
+
+    @pytest.mark.timeout(5)
+    def test_large_non_repeating_content(self) -> None:
+        """5KB of non-repeating text should not cause regex to hang."""
+        lines = [f"Line {i}: This is unique content number {i * 7 + 3}." for i in range(100)]
+        content = "\n".join(lines)
+        start = time.monotonic()
+        mp_module.analyze(content, "large.md", "markdown")
+        elapsed = time.monotonic() - start
+        assert elapsed < 5.0, f"MP2 regex took {elapsed:.1f}s on 5KB input"