From b0755a34f296d9427b760f2dca7e97559320519c Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 18:35:15 -0400
Subject: [PATCH 1/9] feat: extract vuln-assessment-methodology into standalone
 capability

Generic vulnerability assessment guidance (source-to-sink tracing,
disprove-first analysis, severity matrix, quality checklist, anti-patterns)
was embedded in the dotnet-reversing skill. Extract it into a new
vuln-assessment-methodology capability so any security capability can
load it. dotnet-reversing and mcr-analysis now reference it while
retaining .NET-specific patterns and a compact inline severity table
for standalone viability.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../skills/dotnet-reversing/SKILL.md          | 155 ++++++++++++---
 .../skills/mcr-analysis/SKILL.md              |  77 +++++++-
 .../capability.yaml                           |  21 ++
 .../vuln-assessment-methodology/SKILL.md      | 181 ++++++++++++++++++
 4 files changed, 403 insertions(+), 31 deletions(-)
 create mode 100644 capabilities/vuln-assessment-methodology/capability.yaml
 create mode 100644 capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md

diff --git a/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md b/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
index 208de2e..7369e19 100644
--- a/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
+++ b/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
@@ -5,6 +5,9 @@ description: Use when reverse engineering .NET assemblies, decompiling DLLs/EXEs
 
 # .NET Reverse Engineering
 
+Load the `vuln-assessment-methodology` skill alongside this one for severity
+calibration, disprove-first discipline, and the quality checklist.
+
 ## Quick Start
 
 ```
@@ -61,17 +64,27 @@ Use `dotnet_search_by_name` for name-based searches and `dotnet_search_reference
 - **XML (XXE):** `XmlReader`, `XmlDocument`, `XDocument`, `XmlTextReader`
 - **LDAP:** `DirectorySearcher`, `DirectoryEntry`, `System.DirectoryServices`
 
-### Phase 3: Decompile Suspicious Code
+### Phase 3: Decompile and Verify
 ```
 dotnet_decompile_type(path="App.dll", type_name="App.Services.AuthenticationService")
 ```
-Read the actual C# source. Look for:
+Read the actual C# source. When you find a dangerous pattern, **read the full
+function and its callers** before drawing conclusions. Check for:
 - Hardcoded credentials
 - Weak crypto (MD5, SHA1, DES, static IVs/keys)
-- SQL string concatenation
+- SQL string concatenation — **but check if the concatenated value comes from
+  user input (HTTP param) vs config/env var. Only HTTP-sourced values are high severity.**
 - Unsanitized user input in file paths
 - Dangerous deserialization
-- Command injection
+- Command injection — **but check if the calling function has validation/filtering.
+  If validation exists, look for bypasses rather than reporting "no sanitization."**
+
+#### .NET-specific: JWT ReadToken is not always a finding
+
+`ReadToken`/`ReadJwtToken` without `ValidateToken` is NOT a vulnerability when
+the token is validated by a downstream service (Azure AD, ARM) or used only for
+metadata extraction (expiry, caching). Only report it when unvalidated claims
+drive authorization decisions.
 
 ### Phase 4: Trace Attack Paths
 ```
@@ -83,7 +96,26 @@ dotnet_get_call_flows(
 ```
 Find how vulnerable methods are reached from entry points (controllers, handlers, public APIs).
 
-### Phase 5: Report Findings
+### Phase 5: Assess Severity and Report
+
+Assign severity based on actual exploitability — not the vulnerability class
+name. The `vuln-assessment-methodology` skill has the full guidance; the
+essentials:
+
+| Source of dangerous input | Access required | Severity |
+|---|---|---|
+| HTTP request parameter | Unauthenticated, internet-facing | Critical/High |
+| HTTP request parameter | Authenticated user | High/Medium |
+| HTTP request parameter | Internal network only | Medium |
+| Config file / env var | Container or host access | Low |
+| Hardcoded value (as sink input) | N/A | Not a finding (but hardcoded credentials are — see methodology skill) |
+
+**Before reporting every finding:**
+- Trace the data flow from attacker-controlled source to sink
+- Actively try to disprove it — look for validation, encoding, authorization
+- If defensive code exists, demonstrate a specific bypass or retract
+- Verify severity reflects exploitability, not vulnerability class name
+
 ```
 report_finding(
     file="App.dll",
@@ -100,53 +132,123 @@ finish_task(success=True, markdown_summary="Found 2 high-severity issues...")
 ```
 Always report findings to persist them to the Dreadnode platform.
 
-## Common Vulnerability Patterns
+## .NET Vulnerability Patterns: Vulnerable vs Safe
+
+For each pattern, both vulnerable AND safe versions are shown. You must
+check which one the code matches before reporting.
 
 ### Hardcoded Credentials
 ```csharp
-// Look for string literals in auth code
+// VULNERABLE — real secret in source code
 private static string ApiKey = "sk-1234567890abcdef";
 connectionString = "Server=db;User=admin;Password=P@ssw0rd";
+
+// NOT A FINDING — loaded from config/env
+var apiKey = Configuration["ApiKey"];
+var connStr = Environment.GetEnvironmentVariable("DB_CONNECTION");
+
+// NOT A FINDING — misleading error message (not a real credential)
+throw new Exception("Api Key is invalid. Subscription validation failed.");
 ```
 
 ### Insecure Deserialization
 ```csharp
-// BinaryFormatter = RCE
+// VULNERABLE — BinaryFormatter with untrusted input
 BinaryFormatter formatter = new BinaryFormatter();
-object obj = formatter.Deserialize(stream);  // VULNERABLE
+object obj = formatter.Deserialize(untrustedStream);
 
-// Type-controlling JSON deserialization
+// VULNERABLE — TypeNameHandling enables type control
 JsonConvert.DeserializeObject(json, new JsonSerializerSettings {
-    TypeNameHandling = TypeNameHandling.All  // VULNERABLE
+    TypeNameHandling = TypeNameHandling.All
 });
+
+// SAFE — System.Text.Json (no type handling by default)
+var obj = JsonSerializer.Deserialize<MyType>(json);
+
+// SAFE — TypeNameHandling.None (default)
+JsonConvert.DeserializeObject<MyType>(json);
 ```
 
 ### Command Injection
 ```csharp
-// User input in process arguments
-Process.Start("cmd.exe", "/c " + userInput);  // VULNERABLE
+// VULNERABLE — direct interpolation
+Process.Start("cmd.exe", "/c " + userInput);
+Arguments = $"-c \"{command} {string.Join(" ", args)}\"";
+
+// PARTIALLY SAFE — has validation, but check for bypasses
+var error = ValidateCommand(command); // blocks ; && || | etc.
+if (error != null) return error;
+// If ValidateCommand misses characters like " or ${ }, it's
+// an incomplete validation bypass (Medium), not "no sanitization" (High)
+
+// SAFE — no shell, direct exec with argument array
+Process.Start("myapp", new[] { "--flag", sanitizedValue });
 ```
 
-### Path Traversal
+### SQL Injection
 ```csharp
-// Unsanitized path concatenation
-string path = Path.Combine(baseDir, userFileName);  // VULNERABLE if userFileName = "../../../etc/passwd"
-File.ReadAllText(path);
+// VULNERABLE — user input concatenated into SQL
+string query = "SELECT * FROM users WHERE id = " + request.UserId;
+
+// LOW RISK — env var / config value concatenated (defense-in-depth issue)
+// Attacker needs container access to control env var
+string proc = "[" + schemaFromEnvVar + "].[MyProcedure]";
+
+// SAFE — parameterized query
+cmd.CommandText = "SELECT * FROM users WHERE id = @id";
+cmd.Parameters.AddWithValue("@id", userId);
 ```
 
-### SQL Injection
+### Blazor XSS (MarkupString)
+```csharp
+// VULNERABLE — user input directly cast to MarkupString
+builder.AddContent(0, (MarkupString)userInput);
+
+// SAFE — HtmlEncoded BEFORE MarkupString cast
+var encoded = WebUtility.HtmlEncode(userInput);
+var colored = AnsiParser.ConvertToHtml(encoded, state); // adds <span> tags
+builder.AddContent(0, (MarkupString)colored); // MarkupString needed for spans
+
+// SAFE — Markdown pipeline with HTML disabled
+pipeline.DisableHtml();
+var html = Markdown.ToHtml(input, pipeline);
+builder.AddContent(0, (MarkupString)html);
+```
+
+### JWT Validation
 ```csharp
-// String concatenation in queries
-string query = "SELECT * FROM users WHERE id = " + userId;  // VULNERABLE
-cmd.CommandText = query;
+// VULNERABLE — claims trusted for local authorization
+var token = new JwtSecurityTokenHandler().ReadJwtToken(jwt);
+if (token.Claims.First(c => c.Type == "role").Value == "admin")
+    GrantAdminAccess(); // No signature verification!
+
+// SAFE — token read for metadata, validated by downstream service
+var token = handler.ReadJwtToken(jwt);
+var expiry = token.ValidTo; // Just extracting expiry for caching
+return DelegatedTokenCredential.Create(jwt); // Azure AD validates the sig
+```
+
+### Path Traversal
+```csharp
+// VULNERABLE — user input in path without validation
+string path = Path.Combine(baseDir, userFileName);
+File.ReadAllText(path);
+
+// SAFE — canonicalization check
+string full = Path.GetFullPath(Path.Combine(baseDir, userFileName));
+if (!full.StartsWith(baseDir)) throw new SecurityException();
 ```
 
 ### Weak Cryptography
 ```csharp
-// Deprecated algorithms
+// VULNERABLE
 MD5.Create().ComputeHash(data);  // Weak hash
 DES.Create();  // Weak cipher
 new RijndaelManaged { Mode = CipherMode.ECB };  // Weak mode
+
+// SAFE
+SHA256.Create().ComputeHash(data);
+Aes.Create();  // AES with CBC/GCM
 ```
 
 ## Critical Rules
@@ -154,15 +256,14 @@ new RijndaelManaged { Mode = CipherMode.ECB };  // Weak mode
 **DO:**
 - Always start with `dotnet_scan_binaries` to find targets
 - Use `dotnet_decompile_type` for targeted analysis (not `dotnet_decompile_module`)
-- Report ALL findings with `report_finding` — even low-severity ones
-- Use `report_auth` immediately when you find credentials
+- Report all verified findings with `report_finding` — even low-severity ones
+- Use `report_auth` only for real credentials, not error messages or placeholders
 - Call `finish_task` when analysis is complete
 
 **DO NOT:**
+- Report `ReadToken`/`ReadJwtToken` as "JWT bypass" when the token is validated server-side
+- Report `MarkupString` as XSS when the content is `HtmlEncode`d upstream
 - Use `dotnet_decompile_module` on large assemblies — it will overflow context
-- Skip reporting — findings must be persisted to the platform
-- Analyze only one assembly — check ALL binaries in the target directory
-- Ignore Microsoft/System assemblies completely — they can have vulnerabilities too
 
 ## Tips
 
diff --git a/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md b/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md
index d5ff938..36950e4 100644
--- a/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md
+++ b/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md
@@ -5,7 +5,11 @@ description: Use when analyzing .NET applications from Microsoft Container Regis
 
 # MCR Container Image Analysis
 
-Extract and analyze .NET assemblies from Microsoft Container Registry images without executing any container code. Uses pure HTTP—no Docker required.
+Extract and analyze .NET assemblies from Microsoft Container Registry images
+without executing any container code. Uses pure HTTP—no Docker required.
+
+Load the `vuln-assessment-methodology` skill alongside this one for severity
+calibration, disprove-first discipline, and the quality checklist.
 
 ## When to Use MCR Tools
 
@@ -44,6 +48,20 @@ dotnet_scan_binaries(path="~/workspace/mcr/dotnet_aspnet_8.0")  # analyze
 | `azure-functions/*` | Azure Functions runtime |
 | `appsvc/*` | Azure App Service images |
 
+Use `mcr_search_repositories` to discover repos beyond these — the catalog has
+~3,200 entries across Azure services, infrastructure, and tooling.
+
+## Not All MCR Images Are .NET
+
+Many MCR images use Go, Python, TypeScript, or Rust. Extraction will return
+"No .NET assemblies found" for these. This is common for infrastructure and
+networking components (CNI plugins, proxies, tunnels, AI/ML runtimes).
+
+If extraction fails:
+1. Try `dll_only=false` — some images use AOT compilation or non-standard layouts
+2. Try a different platform (`linux/arm64` vs `linux/amd64`)
+3. Accept that the image may not contain .NET code and move on
+
 ## Workflow
 
 ### 1. Find the Target Image
@@ -78,21 +96,72 @@ dotnet_search_references(path="~/workspace/mcr/.../TargetAssembly.dll", search="
 
 For app images (`appsvc/*`, `azure-functions/*`), prioritize assemblies under `/app/` over runtime DLLs. For runtime images (`dotnet/runtime`, `dotnet/aspnet`), target `System.Private.CoreLib.dll` or `Microsoft.AspNetCore.dll` directly.
 
+## MCR-Specific Attack Surface
+
+When analyzing assemblies extracted from MCR images, look for these in addition
+to standard .NET vulnerability patterns:
+
+1. **ONNX/ML model loading** — Path traversal in model file paths
+2. **ANSI/terminal parsers** — Escape sequence injection breaking HTML context
+3. **Protobuf/gRPC handling** — Oversized message DoS, recursive depth bombs
+4. **URL parsers** — Scheme bypass, authority confusion, attribute breakout
+
+## Prioritizing MCR Repos for Security Analysis
+
+Not all MCR repos are equally interesting. Prioritize:
+
+**Highest value:**
+- New products/services (few tags, v0.x/v1.x — less mature, less audited)
+- API gateways and reverse proxies (parse untrusted HTTP — smuggling, injection)
+- Auth/identity services (JWT, certificate, token handling)
+- Database access layers (SQL injection, query injection)
+- AI/ML services (model loading, prompt handling, inference pipelines)
+
+**Medium value:**
+- Emulators (often have weaker auth than production counterparts)
+- Internal/SRE tools (may rely on network isolation instead of auth)
+- Monitoring/observability dashboards (render untrusted telemetry data)
+
+**Lower value:**
+- Mature Microsoft runtime images (dotnet/runtime, dotnet/aspnet — heavily audited)
+- Helm charts and Bicep modules (infrastructure-as-code, not runtime code)
+- Build tools and SDKs (not typically internet-facing)
+
+## Delegating Analysis to Subagents
+
+When dispatching subagents to analyze extracted assemblies:
+
+1. **Load the analysis guidance** — ensure subagents have both the
+   `dotnet-reversing` and `vuln-assessment-methodology` skills loaded
+2. **Tell them what NOT to report** — share known false-positive patterns
+   from previous analysis of similar codebases
+3. **Specify the application assemblies** — list the non-framework DLLs
+   explicitly so they don't waste time on Microsoft.AspNetCore.* etc.
+4. **Set threat model context** — tell them if the target is public-facing,
+   internal, or a dev tool so they assign severity appropriately
+5. **Require disproof attempts** — instruct subagents to try to disprove
+   each finding before reporting it
+
 ## Critical Rules
 
 **DO:**
 - Always use `mcr_list_tags` before `mcr_pull_and_extract` to pick the right version
 - Use specific version tags (e.g., `8.0.25`) not floating tags (`8.0`, `latest`)
 - After extraction, immediately run `dotnet_scan_binaries` on the output directory
-- Prioritize `/app/` assemblies over runtime assemblies when analyzing app images
+- Prioritize `/app/` or `/emulator/` assemblies over runtime assemblies
+- Try `dll_only=false` if default extraction finds nothing
+- Check tag counts and version numbers to gauge maturity (few tags = newer = less audited)
 
 **DO NOT:**
-- Skip the extraction step and try to analyze MCR URLs directly — you must extract first
+- Skip the extraction step and try to analyze MCR URLs directly
 - Use `latest` tag for security analysis — it changes over time
-- Forget to note the output directory path from `mcr_pull_and_extract`
+- Assume extraction failure means the image is empty — it may not be .NET
+- Dispatch subagents without the analysis guidance loaded
 
 ## Tips
 
 - **Version pinning**: Use specific tags like `8.0.25` instead of `8.0` or `latest` for reproducibility
 - **Cache reuse**: Repeated extractions of the same image skip the download
 - **Large images**: SDK images are huge (~800MB); prefer runtime/aspnet images when possible
+- **Parallel extraction**: Extract multiple images simultaneously while waiting for results
+- **Cross-reference tags**: Repos with very few tags or only `latest` are brand new — potentially less audited
diff --git a/capabilities/vuln-assessment-methodology/capability.yaml b/capabilities/vuln-assessment-methodology/capability.yaml
new file mode 100644
index 0000000..c5e34af
--- /dev/null
+++ b/capabilities/vuln-assessment-methodology/capability.yaml
@@ -0,0 +1,21 @@
+schema: 1
+name: vuln-assessment-methodology
+version: "1.0.0"
+description: >
+  Cross-cutting methodology for vulnerability assessment. Provides
+  source-to-sink tracing discipline, threat-model-aware severity
+  assignment, disprove-first analysis, and anti-pattern guidance.
+  Load alongside any domain-specific security capability to reduce
+  false positives and severity inflation.
+
+author:
+  name: Dreadnode
+  url: https://dreadnode.io
+license: MIT
+repository: https://github.com/dreadnode/capabilities
+keywords:
+  - vulnerability-assessment
+  - methodology
+  - security
+  - false-positive-prevention
+  - severity-calibration
diff --git a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
new file mode 100644
index 0000000..36ddef5
--- /dev/null
+++ b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
@@ -0,0 +1,181 @@
+---
+name: vuln-assessment-methodology
+description: "Load when performing vulnerability assessment in any domain. Enforces source-to-sink tracing, disprove-first analysis, threat-model-aware severity, and finding quality standards. Prevents false positives and severity inflation."
+---
+
+# Vulnerability Assessment Methodology
+
+**The goal is accurate, honest findings — not volume.** One correctly-assessed
+finding is worth more than ten inflated ones. A false positive or overstated
+severity damages credibility and wastes human reviewer time.
+
+## Hard Rules
+
+### 1. NEVER report a sink without tracing the full data flow
+
+Seeing a dangerous function is NOT a finding. You MUST trace the data from
+**attacker-controlled source** through every transformation to the sink.
+
+If sanitization or validation exists anywhere in the chain, the finding is
+invalid unless you can demonstrate a specific bypass of that defense.
+
+**Before reporting, answer these questions:**
+- What is the attacker-controlled input? (HTTP request body, OTLP payload, env var, etc.)
+- What transformations does it undergo? (encoding, parameterization, validation, etc.)
+- Does any transformation neutralize the attack?
+- Can you construct a concrete input that survives all transformations and triggers at the sink?
+
+### 2. Try to DISPROVE your finding before reporting it
+
+Once you suspect a vulnerability, actively look for evidence that it is NOT
+exploitable. Read the FULL function, not just the dangerous line. Look for:
+- Validation/sanitization functions called earlier in the same method
+- Input filtering in the caller
+- Authorization checks on the controller or route
+- Type constraints that limit the input domain
+- Configuration that must be explicitly set to enable the dangerous path
+
+**If you find defensive code, your job is to either demonstrate a bypass
+or downgrade/retract the finding.** Not ignore it.
+
+### 3. Severity must reflect the ACTUAL threat model, not the vulnerability class name
+
+"SQL injection" is not automatically HIGH. "No authentication" is not
+automatically CRITICAL. Severity depends on:
+
+**Source controllability:**
+- HTTP request parameter from untrusted user → High source risk
+- Configuration file set at deployment → Low source risk
+- Environment variable set by platform operator → Low source risk
+- Hardcoded constant → Not attacker-controlled at all
+
+**Access prerequisites:**
+- Unauthenticated from the internet → Highest risk
+- Authenticated user → Lower risk (depends on user trust level)
+- Requires network access to internal service → Lower risk (lateral movement required)
+- Requires container/host access → Lowest risk (attacker already has code exec)
+
+**Deployment context:**
+- Public-facing API → Full severity
+- Internal tool behind network isolation → Note the dependency on network controls
+- Development/emulator tool → Consider intended use case
+- Infrastructure component in a managed platform → Consider platform's security boundary
+
+**Apply this matrix before assigning severity:**
+
+| Source of dangerous input | Access required | Severity |
+|---|---|---|
+| HTTP request param | Unauthenticated, internet-facing | Critical/High |
+| HTTP request param | Authenticated user | High/Medium |
+| HTTP request param | Internal network only | Medium (note network dependency) |
+| Config/env var | Container-level access | Low (defense-in-depth) |
+| Hardcoded value (as sink input) | N/A | Not a finding (but hardcoded credentials are — see Severity Guide) |
+
+### 4. Read the COMPLETE defensive code, not just the vulnerable line
+
+Common mistake: seeing `bash -c "{command}"` and reporting "command injection,
+no sanitization" — while the calling function has a `ValidateCommand()` method
+that blocks `;`, `&&`, `||`, `|`, `>`, `<`, backtick, `$(`, etc.
+
+If validation exists but is incomplete (e.g., blocks `;` but not `"`), report
+the **specific bypass**, not "no sanitization." The severity should reflect the
+narrowness of the bypass, not the theoretical impact of unrestricted injection.
+
+**Example of correct reporting:**
+- BAD: "Command injection via bash -c with no sanitization" (HIGH)
+- GOOD: "Incomplete command validation in ValidateCommand() — blocks common
+  injection chars but misses `\"`, allowing quote-escape breakout from bash -c
+  wrapping. Exploitable via RunCommandAsync if API is reachable." (MEDIUM)
+
+### 5. Configuration options are not vulnerabilities
+
+A "dev mode" or "unsecured mode" that disables authentication is a **design
+decision**, not a vulnerability — unless it can be enabled by an attacker or
+is accidentally deployed in production with no safeguards.
+
+### 6. Internal tools have different threat models
+
+Internal/infrastructure tools (SRE agents, platform operators, admin dashboards)
+often intentionally omit authentication because they rely on network isolation
+(Kubernetes network policies, service mesh, private VNet). This IS an attack
+surface if network isolation fails, but:
+
+- Report it as a **dependency on network controls**, not as "missing security"
+- Note what an attacker needs BEFORE they can reach the service
+- Don't call it "CRITICAL" unless you have evidence it's internet-reachable
+
+### 7. AI prompt injection is a design concern, not a code bug
+
+Any AI feature that processes user-generated content inherently allows prompt
+injection. This is a product design tradeoff, not an exploitable code
+vulnerability — unless the AI output is used dangerously (fed into `eval()`,
+used to construct SQL, rendered as HTML without encoding).
+
+### 8. Distinguish application code from framework code
+
+Focus on application-specific code. Don't report known framework behaviors
+as vulnerabilities. Framework defaults are by definition expected behavior.
+
+## Severity Assignment Guide
+
+- **Critical**: Unauth RCE on internet-facing service, hardcoded prod credentials,
+  authentication bypass allowing full admin access.
+  You must demonstrate: attacker has no access prerequisites beyond a network connection,
+  and impact is code execution or full data access.
+- **High**: Authed RCE, SQL injection via HTTP params, stored XSS on sensitive pages,
+  SSRF to internal services from internet-facing endpoints.
+  You must demonstrate: attacker-controlled input reaches a dangerous sink with
+  exploitable impact, even if some access is required.
+- **Medium**: Incomplete validation bypass, internal-only exposure, defense-in-depth
+  gaps, or issues with moderate impact (information disclosure, limited injection).
+- **Low**: Defense-in-depth improvements (env var in SQL query, missing but
+  non-exploitable sanitization), code quality issues that could become
+  vulnerabilities if assumptions change.
+- **Not a finding**: Dangerous function with complete upstream sanitization,
+  configuration option working as documented, token parsed for metadata only,
+  framework default behavior, theoretical attack requiring attacker to already
+  have higher privileges.
+
+## Reporting Standards
+
+**Only use finding-report tools for:**
+- Issues where you traced the data flow end-to-end
+- Issues where you actively tried to disprove the finding and couldn't
+- Issues with code evidence showing both the vulnerable path AND
+  the absence (or bypass) of defensive code
+
+**Severity in the report must:**
+- Reflect the actual threat model, not the vulnerability class name
+- State the access prerequisites explicitly
+- Note any existing defensive code and why it's insufficient
+- Be defensible under peer review by a senior security engineer
+
+**Credential reports are for:**
+- Actual hardcoded credentials (connection strings, API keys, passwords)
+- NOT for misleading error messages, placeholder strings, or example values
+
+## Quality Checklist (ALL must pass before reporting)
+
+- [ ] Traced data flow from source to sink
+- [ ] Checked for sanitization/encoding at every step
+- [ ] Read the FULL function containing the dangerous code, not just the dangerous line
+- [ ] Actively tried to disprove this finding
+- [ ] Verified this is application code, not framework behavior
+- [ ] Considered deployment context and threat model
+- [ ] Can construct a concrete exploit input
+- [ ] Severity reflects actual exploitability, not vulnerability class name
+- [ ] Finding would survive review by a skeptical senior security engineer
+
+## Anti-patterns
+
+| Anti-pattern | Example | Why it's wrong |
+|---|---|---|
+| Sink-only analysis | "Dangerous function found → vuln" | Didn't check for upstream defenses |
+| Ignoring defensive code | "Injection, no sanitization" when validation exists | Didn't read the full function |
+| Class-name severity | "SQL injection → HIGH" regardless of source | Env var source ≠ HTTP param source |
+| Feature-as-vulnerability | "Unsecured mode exists" | Documented design decision |
+| Framework noise | "Framework uses cookies" | Expected framework behavior |
+| Theoretical-only | "If an attacker could modify env vars..." | Attacker already has code exec |
+| Quantity over quality | Reporting 10 low-confidence findings | 1 verified finding > 10 guesses |
+| Context-free severity | "No auth → CRITICAL" on internal tool | Deployment model matters |
+| Confirmation bias | Finding a sink, then rationalizing why mitigations don't count | Try to disprove first |

From 9818bab246e01225c11d0846da6725a1ff64f188 Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 19:49:39 -0400
Subject: [PATCH 2/9] =?UTF-8?q?chore:=20tighten=20skill=20files=20?=
 =?UTF-8?q?=E2=80=94=20cut=20redundancy,=20fix=20stale=20refs,=20correct?=
 =?UTF-8?q?=20severity=20example?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- vuln-assessment-methodology: consolidate severity guidance (drop
  redundant prose lists, merge severity guide into rule 3, remove
  quality checklist that restated the rules, trim reporting standards)
- dotnet-reversing: fix report_finding example to use criticality
  "critical" for hardcoded credential, remove low-signal weak crypto
  pattern, fix stale "quality checklist" reference
- mcr-analysis: fix stale "quality checklist" reference, deduplicate
  dll_only fallback between sections

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../skills/dotnet-reversing/SKILL.md          |  18 +-
 .../skills/mcr-analysis/SKILL.md              |   3 +-
 .../vuln-assessment-methodology/SKILL.md      | 165 +++++-------------
 3 files changed, 46 insertions(+), 140 deletions(-)

diff --git a/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md b/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
index 7369e19..5acef6d 100644
--- a/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
+++ b/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
@@ -6,7 +6,7 @@ description: Use when reverse engineering .NET assemblies, decompiling DLLs/EXEs
 # .NET Reverse Engineering
 
 Load the `vuln-assessment-methodology` skill alongside this one for severity
-calibration, disprove-first discipline, and the quality checklist.
+calibration, disprove-first discipline, and reporting standards.
 
 ## Quick Start
 
@@ -120,8 +120,8 @@ essentials:
 report_finding(
     file="App.dll",
     method="AuthService.ValidateToken",
-    criticality="high",
-    content="Hardcoded JWT secret found:\n```csharp\nprivate static string Secret = \"supersecret123\";\n```"
+    criticality="critical",
+    content="Hardcoded JWT signing secret in source code:\n```csharp\nprivate static string Secret = \"supersecret123\";\n```"
 )
 
 report_auth(auth_material="API key in config: `sk-1234567890abcdef`")
@@ -239,18 +239,6 @@ string full = Path.GetFullPath(Path.Combine(baseDir, userFileName));
 if (!full.StartsWith(baseDir)) throw new SecurityException();
 ```
 
-### Weak Cryptography
-```csharp
-// VULNERABLE
-MD5.Create().ComputeHash(data);  // Weak hash
-DES.Create();  // Weak cipher
-new RijndaelManaged { Mode = CipherMode.ECB };  // Weak mode
-
-// SAFE
-SHA256.Create().ComputeHash(data);
-Aes.Create();  // AES with CBC/GCM
-```
-
 ## Critical Rules
 
 **DO:**
diff --git a/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md b/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md
index 36950e4..9f09030 100644
--- a/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md
+++ b/capabilities/dotnet-reversing/skills/mcr-analysis/SKILL.md
@@ -9,7 +9,7 @@ Extract and analyze .NET assemblies from Microsoft Container Registry images
 without executing any container code. Uses pure HTTP—no Docker required.
 
 Load the `vuln-assessment-methodology` skill alongside this one for severity
-calibration, disprove-first discipline, and the quality checklist.
+calibration, disprove-first discipline, and reporting standards.
 
 ## When to Use MCR Tools
 
@@ -149,7 +149,6 @@ When dispatching subagents to analyze extracted assemblies:
 - Use specific version tags (e.g., `8.0.25`) not floating tags (`8.0`, `latest`)
 - After extraction, immediately run `dotnet_scan_binaries` on the output directory
 - Prioritize `/app/` or `/emulator/` assemblies over runtime assemblies
-- Try `dll_only=false` if default extraction finds nothing
 - Check tag counts and version numbers to gauge maturity (few tags = newer = less audited)
 
 **DO NOT:**
diff --git a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
index 36ddef5..4ccc0e8 100644
--- a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
+++ b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
@@ -6,176 +6,95 @@ description: "Load when performing vulnerability assessment in any domain. Enfor
 # Vulnerability Assessment Methodology
 
 **The goal is accurate, honest findings — not volume.** One correctly-assessed
-finding is worth more than ten inflated ones. A false positive or overstated
-severity damages credibility and wastes human reviewer time.
+finding is worth more than ten inflated ones.
 
 ## Hard Rules
 
 ### 1. NEVER report a sink without tracing the full data flow
 
-Seeing a dangerous function is NOT a finding. You MUST trace the data from
-**attacker-controlled source** through every transformation to the sink.
+Seeing a dangerous function is NOT a finding. Trace from **attacker-controlled
+source** through every transformation to the sink. If sanitization exists, the
+finding is invalid unless you demonstrate a specific bypass.
 
-If sanitization or validation exists anywhere in the chain, the finding is
-invalid unless you can demonstrate a specific bypass of that defense.
-
-**Before reporting, answer these questions:**
-- What is the attacker-controlled input? (HTTP request body, OTLP payload, env var, etc.)
-- What transformations does it undergo? (encoding, parameterization, validation, etc.)
+**Before reporting, answer:**
+- What is the attacker-controlled input?
+- What transformations does it undergo?
 - Does any transformation neutralize the attack?
-- Can you construct a concrete input that survives all transformations and triggers at the sink?
+- Can you construct a concrete input that reaches the sink?
 
 ### 2. Try to DISPROVE your finding before reporting it
 
-Once you suspect a vulnerability, actively look for evidence that it is NOT
-exploitable. Read the FULL function, not just the dangerous line. Look for:
-- Validation/sanitization functions called earlier in the same method
-- Input filtering in the caller
-- Authorization checks on the controller or route
-- Type constraints that limit the input domain
-- Configuration that must be explicitly set to enable the dangerous path
-
-**If you find defensive code, your job is to either demonstrate a bypass
-or downgrade/retract the finding.** Not ignore it.
-
-### 3. Severity must reflect the ACTUAL threat model, not the vulnerability class name
-
-"SQL injection" is not automatically HIGH. "No authentication" is not
-automatically CRITICAL. Severity depends on:
-
-**Source controllability:**
-- HTTP request parameter from untrusted user → High source risk
-- Configuration file set at deployment → Low source risk
-- Environment variable set by platform operator → Low source risk
-- Hardcoded constant → Not attacker-controlled at all
+Actively look for evidence it is NOT exploitable. Read the FULL function. Look
+for validation, input filtering, authorization checks, type constraints,
+config gates. If you find defensive code, demonstrate a bypass or retract.
 
-**Access prerequisites:**
-- Unauthenticated from the internet → Highest risk
-- Authenticated user → Lower risk (depends on user trust level)
-- Requires network access to internal service → Lower risk (lateral movement required)
-- Requires container/host access → Lowest risk (attacker already has code exec)
+### 3. Severity must reflect the ACTUAL threat model
 
-**Deployment context:**
-- Public-facing API → Full severity
-- Internal tool behind network isolation → Note the dependency on network controls
-- Development/emulator tool → Consider intended use case
-- Infrastructure component in a managed platform → Consider platform's security boundary
-
-**Apply this matrix before assigning severity:**
+Assign severity by source, access, and context — not vulnerability class name.
 
 | Source of dangerous input | Access required | Severity |
 |---|---|---|
 | HTTP request param | Unauthenticated, internet-facing | Critical/High |
 | HTTP request param | Authenticated user | High/Medium |
-| HTTP request param | Internal network only | Medium (note network dependency) |
-| Config/env var | Container-level access | Low (defense-in-depth) |
-| Hardcoded value (as sink input) | N/A | Not a finding (but hardcoded credentials are — see Severity Guide) |
+| HTTP request param | Internal network only | Medium |
+| Config/env var | Container-level access | Low |
+| Hardcoded value (as sink input) | N/A | Not a finding (but hardcoded credentials are — see below) |
 
-### 4. Read the COMPLETE defensive code, not just the vulnerable line
+**What each level requires:**
+- **Critical**: Unauth RCE, hardcoded prod credentials, full auth bypass. Attacker needs nothing beyond a network connection.
+- **High**: Authed RCE, SQL injection via HTTP params, stored XSS, SSRF from internet-facing endpoint.
+- **Medium**: Incomplete validation bypass, internal-only exposure, defense-in-depth gaps.
+- **Low**: Defense-in-depth issues (env var in SQL), code quality that could become exploitable.
+- **Not a finding**: Complete upstream sanitization, config-as-designed, token parsed for metadata only, framework defaults, attacker already needs higher privileges.
 
-Common mistake: seeing `bash -c "{command}"` and reporting "command injection,
-no sanitization" — while the calling function has a `ValidateCommand()` method
-that blocks `;`, `&&`, `||`, `|`, `>`, `<`, backtick, `$(`, etc.
+### 4. Read the COMPLETE defensive code
 
 If validation exists but is incomplete (e.g., blocks `;` but not `"`), report
-the **specific bypass**, not "no sanitization." The severity should reflect the
-narrowness of the bypass, not the theoretical impact of unrestricted injection.
+the **specific bypass**, not "no sanitization." Severity reflects the bypass
+narrowness, not unrestricted injection impact.
 
-**Example of correct reporting:**
 - BAD: "Command injection via bash -c with no sanitization" (HIGH)
-- GOOD: "Incomplete command validation in ValidateCommand() — blocks common
-  injection chars but misses `\"`, allowing quote-escape breakout from bash -c
-  wrapping. Exploitable via RunCommandAsync if API is reachable." (MEDIUM)
+- GOOD: "Incomplete validation in ValidateCommand() — misses `\"`, allowing
+  quote-escape from bash -c wrapping" (MEDIUM)
 
 ### 5. Configuration options are not vulnerabilities
 
-A "dev mode" or "unsecured mode" that disables authentication is a **design
-decision**, not a vulnerability — unless it can be enabled by an attacker or
-is accidentally deployed in production with no safeguards.
+Dev mode / unsecured mode is a design decision unless attacker-toggleable.
 
 ### 6. Internal tools have different threat models
 
-Internal/infrastructure tools (SRE agents, platform operators, admin dashboards)
-often intentionally omit authentication because they rely on network isolation
-(Kubernetes network policies, service mesh, private VNet). This IS an attack
-surface if network isolation fails, but:
-
-- Report it as a **dependency on network controls**, not as "missing security"
-- Note what an attacker needs BEFORE they can reach the service
-- Don't call it "CRITICAL" unless you have evidence it's internet-reachable
+Report missing auth on internal tools as a **dependency on network controls**,
+not "missing security." Don't assign CRITICAL unless evidence shows internet
+exposure.
 
 ### 7. AI prompt injection is a design concern, not a code bug
 
-Any AI feature that processes user-generated content inherently allows prompt
-injection. This is a product design tradeoff, not an exploitable code
-vulnerability — unless the AI output is used dangerously (fed into `eval()`,
-used to construct SQL, rendered as HTML without encoding).
+Not a code vulnerability unless AI output feeds `eval()`, SQL, or unencoded HTML.
 
 ### 8. Distinguish application code from framework code
 
-Focus on application-specific code. Don't report known framework behaviors
-as vulnerabilities. Framework defaults are by definition expected behavior.
-
-## Severity Assignment Guide
-
-- **Critical**: Unauth RCE on internet-facing service, hardcoded prod credentials,
-  authentication bypass allowing full admin access.
-  You must demonstrate: attacker has no access prerequisites beyond a network connection,
-  and impact is code execution or full data access.
-- **High**: Authed RCE, SQL injection via HTTP params, stored XSS on sensitive pages,
-  SSRF to internal services from internet-facing endpoints.
-  You must demonstrate: attacker-controlled input reaches a dangerous sink with
-  exploitable impact, even if some access is required.
-- **Medium**: Incomplete validation bypass, internal-only exposure, defense-in-depth
-  gaps, or issues with moderate impact (information disclosure, limited injection).
-- **Low**: Defense-in-depth improvements (env var in SQL query, missing but
-  non-exploitable sanitization), code quality issues that could become
-  vulnerabilities if assumptions change.
-- **Not a finding**: Dangerous function with complete upstream sanitization,
-  configuration option working as documented, token parsed for metadata only,
-  framework default behavior, theoretical attack requiring attacker to already
-  have higher privileges.
+Don't report framework defaults as vulnerabilities.
 
 ## Reporting Standards
 
-**Only use finding-report tools for:**
-- Issues where you traced the data flow end-to-end
-- Issues where you actively tried to disprove the finding and couldn't
-- Issues with code evidence showing both the vulnerable path AND
-  the absence (or bypass) of defensive code
-
-**Severity in the report must:**
-- Reflect the actual threat model, not the vulnerability class name
-- State the access prerequisites explicitly
-- Note any existing defensive code and why it's insufficient
+Reports must:
+- State access prerequisites explicitly
+- Note existing defensive code and why it's insufficient
 - Be defensible under peer review by a senior security engineer
 
-**Credential reports are for:**
-- Actual hardcoded credentials (connection strings, API keys, passwords)
-- NOT for misleading error messages, placeholder strings, or example values
-
-## Quality Checklist (ALL must pass before reporting)
-
-- [ ] Traced data flow from source to sink
-- [ ] Checked for sanitization/encoding at every step
-- [ ] Read the FULL function containing the dangerous code, not just the dangerous line
-- [ ] Actively tried to disprove this finding
-- [ ] Verified this is application code, not framework behavior
-- [ ] Considered deployment context and threat model
-- [ ] Can construct a concrete exploit input
-- [ ] Severity reflects actual exploitability, not vulnerability class name
-- [ ] Finding would survive review by a skeptical senior security engineer
+Credential reports are for actual hardcoded secrets — not error messages,
+placeholders, or example values.
 
 ## Anti-patterns
 
 | Anti-pattern | Example | Why it's wrong |
 |---|---|---|
-| Sink-only analysis | "Dangerous function found → vuln" | Didn't check for upstream defenses |
+| Sink-only analysis | "Dangerous function found → vuln" | Didn't check upstream defenses |
 | Ignoring defensive code | "Injection, no sanitization" when validation exists | Didn't read the full function |
 | Class-name severity | "SQL injection → HIGH" regardless of source | Env var source ≠ HTTP param source |
 | Feature-as-vulnerability | "Unsecured mode exists" | Documented design decision |
 | Framework noise | "Framework uses cookies" | Expected framework behavior |
-| Theoretical-only | "If an attacker could modify env vars..." | Attacker already has code exec |
-| Quantity over quality | Reporting 10 low-confidence findings | 1 verified finding > 10 guesses |
+| Theoretical-only | "If attacker could modify env vars..." | Attacker already has code exec |
+| Quantity over quality | 10 low-confidence findings | 1 verified > 10 guesses |
 | Context-free severity | "No auth → CRITICAL" on internal tool | Deployment model matters |
-| Confirmation bias | Finding a sink, then rationalizing why mitigations don't count | Try to disprove first |
+| Confirmation bias | Rationalizing why mitigations don't count | Try to disprove first |

From 56a6bbc9ff7a36e8661e63a0cdca0f47d84e65bd Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 19:57:04 -0400
Subject: [PATCH 3/9] feat(web-security): reference vuln-assessment-methodology
 in agent and vuln-critic

Add methodology skill reference to the web-security agent's Evidence
Standards section and to vuln-critic's opening paragraph. Gives the
pentesting pipeline access to the severity matrix, disprove-first
discipline, and anti-patterns table.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 capabilities/web-security/agents/web-security.md      | 3 +++
 capabilities/web-security/skills/vuln-critic/SKILL.md | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/capabilities/web-security/agents/web-security.md b/capabilities/web-security/agents/web-security.md
index 6916180..0065228 100644
--- a/capabilities/web-security/agents/web-security.md
+++ b/capabilities/web-security/agents/web-security.md
@@ -111,6 +111,9 @@ Do not use tools mechanically. Pick the smallest tool that can validate the next
 
 ## Evidence Standards
 
+Load the `vuln-assessment-methodology` skill for the severity matrix, disprove-first
+discipline, and reporting standards that apply across all vulnerability assessment.
+
 When you find a vulnerability, your report will be reviewed by a senior pentester. Weak evidence leads to rejection.
 
 **Required evidence:**
diff --git a/capabilities/web-security/skills/vuln-critic/SKILL.md b/capabilities/web-security/skills/vuln-critic/SKILL.md
index 6efb712..1c1a8d1 100644
--- a/capabilities/web-security/skills/vuln-critic/SKILL.md
+++ b/capabilities/web-security/skills/vuln-critic/SKILL.md
@@ -5,7 +5,9 @@ description: Adversarial pre-filter for scanner and agent findings. Scores findi
 
 # Vulnerability Critic (Pre-Verification Filter)
 
-Adversarial review agent inspired by Co-RedTeam's critic architecture. Runs BEFORE exploit-verifier to filter findings and save testing time.
+Adversarial review agent inspired by Co-RedTeam's critic architecture. Runs BEFORE
+exploit-verifier to filter findings and save testing time. Load the
+`vuln-assessment-methodology` skill for the severity matrix and anti-patterns table.
 
 **This skill does NOT test vulnerabilities.** It evaluates the QUALITY and PLAUSIBILITY of reported findings from pentesting workflows, scanner results, and agent output, and produces a prioritized, filtered finding list for exploit-verifier to consume.
 

From 6e4aac6a4f5a7bcf8c00aa23274129eccd5e1be8 Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 21:05:52 -0400
Subject: [PATCH 4/9] feat(vuln-assessment-methodology): add confidence levels,
 chain analysis, and reporting rigor

Add hard rule 9 (attack chain analysis), three-tier confidence levels
(Confirmed/Probable/Suspected), opt-in PoC validation, and expand
reporting standards with CWE classification, actionable remediation,
root-cause deduplication, and scope documentation requirements.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../vuln-assessment-methodology/SKILL.md      | 53 +++++++++++++++++--
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
index 4ccc0e8..1b1bd22 100644
--- a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
+++ b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
@@ -38,7 +38,7 @@ Assign severity by source, access, and context — not vulnerability class name.
 | HTTP request param | Authenticated user | High/Medium |
 | HTTP request param | Internal network only | Medium |
 | Config/env var | Container-level access | Low |
-| Hardcoded value (as sink input) | N/A | Not a finding (but hardcoded credentials are — see below) |
+| Hardcoded value (as sink input) | N/A | Not a finding (but hardcoded credentials are — actual secrets, not placeholders or example values) |
 
 **What each level requires:**
 - **Critical**: Unauth RCE, hardcoded prod credentials, full auth bypass. Attacker needs nothing beyond a network connection.
@@ -75,15 +75,59 @@ Not a code vulnerability unless AI output feeds `eval()`, SQL, or unencoded HTML
 
 Don't report framework defaults as vulnerabilities.
 
+### 9. Flag obvious attack chains — do not force them
+
+When multiple findings converge on a single exploitable outcome, note the
+chain. An IDOR + information disclosure + missing rate limiting may each be
+Medium alone but chain to account takeover. Report the chain as a separate
+compound finding with its own severity reflecting the combined impact. Do not
+exhaustively search for chains — flag them when apparent from findings already
+identified.
+
+## Confidence Levels
+
+Every finding must include a confidence level. When the full source-to-sink
+trace is complete, mark it Confirmed. When it is not, classify the gap.
+
+| Level | Criteria | Documentation required |
+|---|---|---|
+| Confirmed | Full trace complete, concrete payload constructable | Complete data flow from source to sink with specific input |
+| Probable | Most of trace complete, specific gap identified | State the exact gap (e.g., "dynamic dispatch at line 42 — two implementors exist, both pass input unsanitized") |
+| Suspected | Pattern match or shallow trace only | State what additional analysis (dynamic testing, debug tracing, etc.) would confirm or refute |
+
+Common trace gaps: dynamic dispatch, reflection, external dependencies, plugin
+systems, runtime-generated code. Always name the specific mechanism that
+blocked the trace.
+
 ## Reporting Standards
 
 Reports must:
 - State access prerequisites explicitly
 - Note existing defensive code and why it's insufficient
+- Map each finding to the most specific applicable CWE ID (leaf-level variant,
+  not the pillar — e.g., CWE-89 not CWE-74)
+- Include specific, actionable remediation referencing the technology in use
+  and the code location where the fix applies (not "add input validation" but
+  "use parameterized queries via `db.Query()` with placeholder args at
+  `handler.go:47`")
+- When multiple findings share a root cause, report one root-cause finding
+  with a list of affected locations rather than separate findings per instance
+- State what was analyzed and what was not — files, components, and entry
+  points covered, plus what could not be assessed (runtime behavior,
+  infrastructure config, third-party dependency internals)
 - Be defensible under peer review by a senior security engineer
 
-Credential reports are for actual hardcoded secrets — not error messages,
-placeholders, or example values.
+## Proof-of-Concept Validation (Opt-in)
+
+Default behavior is to trace data flow and assess exploitability conceptually.
+Do not construct payloads or simulate execution unless the user requests it.
+
+If the user requests proof-of-concept validation:
+- Construct a concrete payload that demonstrates the vulnerability
+- Document exact attacker-controlled input values
+- Show the code path execution trace from source to sink
+- State environmental prerequisites (auth state, config, timing)
+- For web targets: provide the specific HTTP request that triggers the issue
 
 ## Anti-patterns
 
@@ -98,3 +142,6 @@ placeholders, or example values.
 | Quantity over quality | 10 low-confidence findings | 1 verified > 10 guesses |
 | Context-free severity | "No auth → CRITICAL" on internal tool | Deployment model matters |
 | Confirmation bias | Rationalizing why mitigations don't count | Try to disprove first |
+| Forced chaining | "These 3 lows chain to Critical" without shared attack flow | Chain must share a target flow, not just co-exist |
+| Generic remediation | "Add input validation" | Must name specific fix and code location |
+| Duplicate inflation | 15 separate XSS findings from one missing encoder | One root cause = one finding + affected locations |

From d4b92a25a67f9a98ae07db75340a31c4d0350ba7 Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 21:10:31 -0400
Subject: [PATCH 5/9] fix(vuln-assessment-methodology): broaden sink examples,
 clarify chain reporting

Rule 7: generalize to "security-sensitive sink" with examples instead of
exhaustive list. Rule 9: clarify chain is reported alongside individual
findings. Anti-pattern: align terminology with rule 9.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../skills/vuln-assessment-methodology/SKILL.md        | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
index 1b1bd22..6a22959 100644
--- a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
+++ b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
@@ -69,7 +69,8 @@ exposure.
 
 ### 7. AI prompt injection is a design concern, not a code bug
 
-Not a code vulnerability unless AI output feeds `eval()`, SQL, or unencoded HTML.
+Not a code vulnerability unless AI output feeds a security-sensitive sink
+(e.g., `eval()`, SQL, shell commands, file paths, unencoded HTML).
 
 ### 8. Distinguish application code from framework code
 
@@ -79,8 +80,9 @@ Don't report framework defaults as vulnerabilities.
 
 When multiple findings converge on a single exploitable outcome, note the
 chain. An IDOR + information disclosure + missing rate limiting may each be
-Medium alone but chain to account takeover. Report the chain as a separate
-compound finding with its own severity reflecting the combined impact. Do not
+Medium alone but chain to account takeover. Report the chain as an additional
+compound finding with its own severity reflecting the combined impact — keep
+the individual findings too, since each needs its own remediation. Do not
 exhaustively search for chains — flag them when apparent from findings already
 identified.
 
@@ -142,6 +144,6 @@ If the user requests proof-of-concept validation:
 | Quantity over quality | 10 low-confidence findings | 1 verified > 10 guesses |
 | Context-free severity | "No auth → CRITICAL" on internal tool | Deployment model matters |
 | Confirmation bias | Rationalizing why mitigations don't count | Try to disprove first |
-| Forced chaining | "These 3 lows chain to Critical" without shared attack flow | Chain must share a target flow, not just co-exist |
+| Forced chaining | "These 3 lows chain to Critical" without shared attack flow | Chain must converge on a single exploitable outcome, not just co-exist |
 | Generic remediation | "Add input validation" | Must name specific fix and code location |
 | Duplicate inflation | 15 separate XSS findings from one missing encoder | One root cause = one finding + affected locations |

From 447d178507aab4224f18a9d5fe49c43afd101bc7 Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 21:27:53 -0400
Subject: [PATCH 6/9] feat(source-code-analysis): reference
 vuln-assessment-methodology in all finding agents

All 7 agents that report findings now load the methodology skill for
source-to-sink tracing, disprove-first analysis, confidence levels,
severity calibration, and reporting standards. attack-surface-mapper
is excluded as it does recon only.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../agents/adversarial-pathfinder.md                          | 4 ++++
 .../agents/common-vulnerability-hunter.md                     | 4 ++++
 .../agents/cve-history-researcher.md                          | 4 ++++
 .../agents/final-reviewer.md                                  | 4 ++++
 .../agents/finding-validator.md                               | 4 ++++
 .../agents/recent-commit-test-reviewer.md                     | 4 ++++
 .../agents/supply-chain-config-reviewer.md                    | 4 ++++
 7 files changed, 28 insertions(+)

diff --git a/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md b/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md
index cda53d1..224d55a 100644
--- a/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md
+++ b/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md
@@ -10,6 +10,10 @@ You are an adversarial pathfinder. Your goal is novel and creative vulnerability
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize original exploit shapes, unusual chains, weird-but-realistic input combinations, and unconventional attacker paths that could become high or critical impact. Deprioritize generic hardening and low/medium findings unless they chain into high or critical impact.
 
+## Evidence Standards
+
+Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
+
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files directly before making claims. For shell commands, set `cwd` to the local checkout path. Start with targeted searches, file reads, git commands, and small interpreter snippets. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md b/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md
index 967e560..62ee1fe 100644
--- a/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md
+++ b/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md
@@ -10,6 +10,10 @@ You are a common vulnerability hunter. Your job is systematic coverage of common
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege RCE, auth bypass, arbitrary file read/write, meaningful SSRF, severe injection, sandbox escape, sensitive data exposure, supply-impacting behavior, and severe DoS. Deprioritize hardening notes and low/medium findings unless they chain into high or critical impact.
 
+## Evidence Standards
+
+Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
+
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files directly before making claims. For shell commands, set `cwd` to the local checkout path. Start with targeted source/sink searches, small file reads, git commands, and small interpreter snippets. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md b/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md
index 527c095..91c06e4 100644
--- a/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md
+++ b/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md
@@ -10,6 +10,10 @@ You are a CVE history researcher. Your job is to inspect the local checkout and
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege remote impact, RCE, auth bypass, arbitrary file read/write, meaningful SSRF, supply-chain compromise, sandbox escape, sensitive data exposure, and severe DoS. Do not spend much effort on generic hardening or low/medium findings unless they chain into high or critical impact.
 
+## Evidence Standards
+
+Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
+
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files directly before making claims. For shell commands, set `cwd` to the local checkout path. Start with targeted searches, file reads, git commands, and small interpreter snippets. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md b/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md
index ae46477..e3c3be3 100644
--- a/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md
+++ b/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md
@@ -22,6 +22,10 @@ Do not let a prior high or critical lead disappear silently.
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege remote impact, RCE, auth bypass, arbitrary file read/write, meaningful SSRF, supply-chain compromise, sandbox escape, sensitive data exposure, and severe DoS. Low/medium findings should stay in prose unless they clearly chain into high or critical impact.
 
+## Evidence Standards
+
+Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
+
 ## Tool guidance
 
 The user message gives you a local checkout path, an attack-surface map, and specialist reports. Inspect files directly before making claims. For shell commands, set `cwd` to the local checkout path. Prefer targeted reads, git commands, source searches, and small interpreter snippets.
diff --git a/capabilities/source-code-analysis-worker-template/agents/finding-validator.md b/capabilities/source-code-analysis-worker-template/agents/finding-validator.md
index c9d7d8e..f466c41 100644
--- a/capabilities/source-code-analysis-worker-template/agents/finding-validator.md
+++ b/capabilities/source-code-analysis-worker-template/agents/finding-validator.md
@@ -10,6 +10,10 @@ You are a focused vulnerability validation agent. Validate exactly one high or c
 
 Be strict about high or critical severity. Downgrade findings that require admin privileges, trusted local developer access, malicious code already running, non-default unsafe configuration, or unrealistic deployment assumptions unless the evidence proves those assumptions are common and security-relevant.
 
+## Evidence Standards
+
+Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
+
 ## Tool guidance
 
 The user message gives you a local checkout path, the finding to validate (as JSON), and a slice of the final comprehensive report for context. Re-read the affected files and nearby code paths. For shell commands, set `cwd` to the local checkout path. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep PoCs bounded and safe. Do not run destructive payloads or exhaust real resources. For DoS-style claims, simulate with small limits or explain resource scaling.
diff --git a/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md b/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md
index b59353a..0388432 100644
--- a/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md
+++ b/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md
@@ -10,6 +10,10 @@ You are a recent commit and test reviewer. Your job is to inspect recent git his
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege remote impact, RCE, auth bypass, arbitrary file read/write, meaningful SSRF, supply-chain compromise, sandbox escape, sensitive data exposure, and severe DoS. Do not spend much effort on generic hardening or low/medium findings unless they chain into high or critical impact.
 
+## Evidence Standards
+
+Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
+
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files and commits directly. For shell commands, set `cwd` to the local checkout path. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md b/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md
index e806f69..0417f63 100644
--- a/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md
+++ b/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md
@@ -10,6 +10,10 @@ You are a supply-chain and configuration security reviewer. Your job is to find
 
 Find high or critical severity, CVE-quality vulnerabilities: supply-chain compromise, build-time or install-time RCE, dependency confusion, unsafe plugin/module resolution, exposed debug/admin behavior, auth-impacting proxy/CORS/cookie/host assumptions, artifact poisoning, and release/deployment workflows that can be abused by low-privilege attackers. Deprioritize best-practice hardening unless it chains into high or critical impact.
 
+## Evidence Standards
+
+Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
+
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect manifests, lockfiles, package manager config, build scripts, release workflows, Docker/deployment files, environment variable handling, and plugin/module loading paths directly. For shell commands, set `cwd` to the local checkout path. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.

From f39ecb3ff84a6975ed0eb44b02920725ac766bd4 Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 21:41:02 -0400
Subject: [PATCH 7/9] Revert "feat(source-code-analysis): reference
 vuln-assessment-methodology in all finding agents"

This reverts commit 447d178507aab4224f18a9d5fe49c43afd101bc7.
---
 .../agents/adversarial-pathfinder.md                          | 4 ----
 .../agents/common-vulnerability-hunter.md                     | 4 ----
 .../agents/cve-history-researcher.md                          | 4 ----
 .../agents/final-reviewer.md                                  | 4 ----
 .../agents/finding-validator.md                               | 4 ----
 .../agents/recent-commit-test-reviewer.md                     | 4 ----
 .../agents/supply-chain-config-reviewer.md                    | 4 ----
 7 files changed, 28 deletions(-)

diff --git a/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md b/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md
index 224d55a..cda53d1 100644
--- a/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md
+++ b/capabilities/source-code-analysis-worker-template/agents/adversarial-pathfinder.md
@@ -10,10 +10,6 @@ You are an adversarial pathfinder. Your goal is novel and creative vulnerability
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize original exploit shapes, unusual chains, weird-but-realistic input combinations, and unconventional attacker paths that could become high or critical impact. Deprioritize generic hardening and low/medium findings unless they chain into high or critical impact.
 
-## Evidence Standards
-
-Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
-
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files directly before making claims. For shell commands, set `cwd` to the local checkout path. Start with targeted searches, file reads, git commands, and small interpreter snippets. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md b/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md
index 62ee1fe..967e560 100644
--- a/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md
+++ b/capabilities/source-code-analysis-worker-template/agents/common-vulnerability-hunter.md
@@ -10,10 +10,6 @@ You are a common vulnerability hunter. Your job is systematic coverage of common
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege RCE, auth bypass, arbitrary file read/write, meaningful SSRF, severe injection, sandbox escape, sensitive data exposure, supply-impacting behavior, and severe DoS. Deprioritize hardening notes and low/medium findings unless they chain into high or critical impact.
 
-## Evidence Standards
-
-Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
-
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files directly before making claims. For shell commands, set `cwd` to the local checkout path. Start with targeted source/sink searches, small file reads, git commands, and small interpreter snippets. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md b/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md
index 91c06e4..527c095 100644
--- a/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md
+++ b/capabilities/source-code-analysis-worker-template/agents/cve-history-researcher.md
@@ -10,10 +10,6 @@ You are a CVE history researcher. Your job is to inspect the local checkout and
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege remote impact, RCE, auth bypass, arbitrary file read/write, meaningful SSRF, supply-chain compromise, sandbox escape, sensitive data exposure, and severe DoS. Do not spend much effort on generic hardening or low/medium findings unless they chain into high or critical impact.
 
-## Evidence Standards
-
-Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
-
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files directly before making claims. For shell commands, set `cwd` to the local checkout path. Start with targeted searches, file reads, git commands, and small interpreter snippets. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md b/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md
index e3c3be3..ae46477 100644
--- a/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md
+++ b/capabilities/source-code-analysis-worker-template/agents/final-reviewer.md
@@ -22,10 +22,6 @@ Do not let a prior high or critical lead disappear silently.
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege remote impact, RCE, auth bypass, arbitrary file read/write, meaningful SSRF, supply-chain compromise, sandbox escape, sensitive data exposure, and severe DoS. Low/medium findings should stay in prose unless they clearly chain into high or critical impact.
 
-## Evidence Standards
-
-Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
-
 ## Tool guidance
 
 The user message gives you a local checkout path, an attack-surface map, and specialist reports. Inspect files directly before making claims. For shell commands, set `cwd` to the local checkout path. Prefer targeted reads, git commands, source searches, and small interpreter snippets.
diff --git a/capabilities/source-code-analysis-worker-template/agents/finding-validator.md b/capabilities/source-code-analysis-worker-template/agents/finding-validator.md
index f466c41..c9d7d8e 100644
--- a/capabilities/source-code-analysis-worker-template/agents/finding-validator.md
+++ b/capabilities/source-code-analysis-worker-template/agents/finding-validator.md
@@ -10,10 +10,6 @@ You are a focused vulnerability validation agent. Validate exactly one high or c
 
 Be strict about high or critical severity. Downgrade findings that require admin privileges, trusted local developer access, malicious code already running, non-default unsafe configuration, or unrealistic deployment assumptions unless the evidence proves those assumptions are common and security-relevant.
 
-## Evidence Standards
-
-Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
-
 ## Tool guidance
 
 The user message gives you a local checkout path, the finding to validate (as JSON), and a slice of the final comprehensive report for context. Re-read the affected files and nearby code paths. For shell commands, set `cwd` to the local checkout path. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep PoCs bounded and safe. Do not run destructive payloads or exhaust real resources. For DoS-style claims, simulate with small limits or explain resource scaling.
diff --git a/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md b/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md
index 0388432..b59353a 100644
--- a/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md
+++ b/capabilities/source-code-analysis-worker-template/agents/recent-commit-test-reviewer.md
@@ -10,10 +10,6 @@ You are a recent commit and test reviewer. Your job is to inspect recent git his
 
 Find high or critical severity, CVE-quality vulnerabilities. Prioritize unauthenticated or low-privilege remote impact, RCE, auth bypass, arbitrary file read/write, meaningful SSRF, supply-chain compromise, sandbox escape, sensitive data exposure, and severe DoS. Do not spend much effort on generic hardening or low/medium findings unless they chain into high or critical impact.
 
-## Evidence Standards
-
-Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
-
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect relevant files and commits directly. For shell commands, set `cwd` to the local checkout path. Keep commands bounded with timeouts. Avoid destructive actions.
diff --git a/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md b/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md
index 0417f63..e806f69 100644
--- a/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md
+++ b/capabilities/source-code-analysis-worker-template/agents/supply-chain-config-reviewer.md
@@ -10,10 +10,6 @@ You are a supply-chain and configuration security reviewer. Your job is to find
 
 Find high or critical severity, CVE-quality vulnerabilities: supply-chain compromise, build-time or install-time RCE, dependency confusion, unsafe plugin/module resolution, exposed debug/admin behavior, auth-impacting proxy/CORS/cookie/host assumptions, artifact poisoning, and release/deployment workflows that can be abused by low-privilege attackers. Deprioritize best-practice hardening unless it chains into high or critical impact.
 
-## Evidence Standards
-
-Load the `vuln-assessment-methodology` skill for source-to-sink tracing discipline, disprove-first analysis, confidence levels, severity calibration, and reporting standards.
-
 ## Tool guidance
 
 The user message gives you a local checkout path and an attack-surface map (use as leads, not conclusions). Inspect manifests, lockfiles, package manager config, build scripts, release workflows, Docker/deployment files, environment variable handling, and plugin/module loading paths directly. For shell commands, set `cwd` to the local checkout path. Do not run package managers or package-manager executors such as `npm`, `npx`, `pnpm`, `yarn`, `bun`, `pip install`, `uv sync`, or equivalents. Do not run full builds, full test suites, server startups, dependency installs, or commands that can fetch and execute packages. Keep commands bounded with timeouts. Avoid destructive actions.

From 3056f6fbe3e62a1929e72b0051fd422fd22fb165 Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 22:00:24 -0400
Subject: [PATCH 8/9] chore(vuln-assessment-methodology): update
 capability.yaml description and keywords

Description now reflects the full scope: confidence levels, chain
analysis, CWE mapping, remediation standards, dedup, scope, and
opt-in PoC validation.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../vuln-assessment-methodology/capability.yaml    | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/capabilities/vuln-assessment-methodology/capability.yaml b/capabilities/vuln-assessment-methodology/capability.yaml
index c5e34af..f99fe4a 100644
--- a/capabilities/vuln-assessment-methodology/capability.yaml
+++ b/capabilities/vuln-assessment-methodology/capability.yaml
@@ -3,10 +3,13 @@ name: vuln-assessment-methodology
 version: "1.0.0"
 description: >
   Cross-cutting methodology for vulnerability assessment. Provides
-  source-to-sink tracing discipline, threat-model-aware severity
-  assignment, disprove-first analysis, and anti-pattern guidance.
-  Load alongside any domain-specific security capability to reduce
-  false positives and severity inflation.
+  source-to-sink tracing discipline, disprove-first analysis,
+  threat-model-aware severity assignment, three-tier confidence
+  classification, attack chain analysis, CWE mapping, remediation
+  quality standards, root-cause deduplication, scope documentation,
+  and opt-in proof-of-concept validation. Load alongside any
+  domain-specific security capability to reduce false positives
+  and severity inflation.
 
 author:
   name: Dreadnode
@@ -19,3 +22,6 @@ keywords:
   - security
   - false-positive-prevention
   - severity-calibration
+  - confidence-levels
+  - cwe
+  - remediation

From 4746947f89d4483e96d3570b3154e6892e3a865c Mon Sep 17 00:00:00 2001
From: mkultraWasHere <mkouremetis16@gmail.com>
Date: Fri, 5 Jun 2026 22:19:10 -0400
Subject: [PATCH 9/9] fix(dotnet-reversing): fix path traversal example prefix
 bypass, update methodology frontmatter

Path traversal "SAFE" example was bypassable via directory prefix
collision (/app vs /app2). Normalize baseDir with trailing separator
before StartsWith check. Also update vuln-assessment-methodology
skill frontmatter description to reflect expanded scope.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../dotnet-reversing/skills/dotnet-reversing/SKILL.md        | 5 +++--
 .../skills/vuln-assessment-methodology/SKILL.md              | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md b/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
index 5acef6d..9767a33 100644
--- a/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
+++ b/capabilities/dotnet-reversing/skills/dotnet-reversing/SKILL.md
@@ -234,9 +234,10 @@ return DelegatedTokenCredential.Create(jwt); // Azure AD validates the sig
 string path = Path.Combine(baseDir, userFileName);
 File.ReadAllText(path);
 
-// SAFE — canonicalization check
+// SAFE — canonicalization check with trailing separator
+string normalizedBase = Path.GetFullPath(baseDir) + Path.DirectorySeparatorChar;
 string full = Path.GetFullPath(Path.Combine(baseDir, userFileName));
-if (!full.StartsWith(baseDir)) throw new SecurityException();
+if (!full.StartsWith(normalizedBase)) throw new SecurityException();
 ```
 
 ## Critical Rules
diff --git a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
index 6a22959..b25258b 100644
--- a/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
+++ b/capabilities/vuln-assessment-methodology/skills/vuln-assessment-methodology/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: vuln-assessment-methodology
-description: "Load when performing vulnerability assessment in any domain. Enforces source-to-sink tracing, disprove-first analysis, threat-model-aware severity, and finding quality standards. Prevents false positives and severity inflation."
+description: "Load when performing vulnerability assessment in any domain. Enforces source-to-sink tracing, disprove-first analysis, threat-model-aware severity, confidence classification, attack chain analysis, CWE mapping, remediation quality, root-cause deduplication, scope documentation, and opt-in PoC validation. Prevents false positives and severity inflation."
 ---
 
 # Vulnerability Assessment Methodology