diff --git a/src/helpers/detect-markdown.ts b/src/helpers/detect-markdown.ts index 715fd79..cbc2234 100644 --- a/src/helpers/detect-markdown.ts +++ b/src/helpers/detect-markdown.ts @@ -4,11 +4,26 @@ const MD_HEADING = /^#{1,6}\s+\S/m; const MD_LINK = /\[[^\]]+\]\([^)]+\)/; const MD_CODE_FENCE = /^```/m; +/** + * Strip fenced code blocks and inline code spans so that HTML tags mentioned + * inside code (e.g. `` or a fenced HTML snippet) don't produce false + * positives when checking for HTML patterns. + */ +function stripCode(text: string): string { + // Strip fenced code blocks (``` or ~~~, with optional language tag) + text = text.replace(/^(`{3,}|~{3,})[^\n]*\n[\s\S]*?\n\1[ \t]*$/gm, ''); + // Strip inline code spans + text = text.replace(/`[^`\n]+`/g, '``'); + return text; +} + /** * Returns true if the body looks like HTML (contains DOCTYPE, , , or tags). + * Fenced code blocks and inline code spans are stripped first to avoid false positives + * from markdown that mentions HTML tags in code examples. */ export function looksLikeHtml(body: string): boolean { - const sample = body.slice(0, 2000); + const sample = stripCode(body.slice(0, 2000)); return HTML_PATTERNS.some((p) => p.test(sample)); } diff --git a/test/unit/helpers/detect-markdown.test.ts b/test/unit/helpers/detect-markdown.test.ts index 76cb9e7..c622c57 100644 --- a/test/unit/helpers/detect-markdown.test.ts +++ b/test/unit/helpers/detect-markdown.test.ts @@ -26,6 +26,26 @@ describe('looksLikeHtml', () => { it('returns false for markdown', () => { expect(looksLikeHtml('# Hello\n\nThis is **markdown**.')).toBe(false); }); + + it('ignores HTML tags inside fenced code blocks', () => { + const md = '# Example\n\n```html\n\n\nHello\n\n```\n'; + expect(looksLikeHtml(md)).toBe(false); + }); + + it('ignores HTML tags inside inline code spans', () => { + const md = '# Setup\n\nAdd the script before the closing `` tag.\n'; + expect(looksLikeHtml(md)).toBe(false); + }); + + it('ignores HTML tags inside tilde fenced code blocks', () => { + const md = '# Example\n\n~~~html\n\nTest\n\n~~~\n'; + expect(looksLikeHtml(md)).toBe(false); + }); + + it('still detects real HTML outside of code blocks', () => { + const html = '\n\n```not a code block\n```\n'; + expect(looksLikeHtml(html)).toBe(true); + }); }); describe('looksLikeMarkdown', () => { @@ -48,4 +68,10 @@ describe('looksLikeMarkdown', () => { it('returns false for plain text with no markdown signals', () => { expect(looksLikeMarkdown('Just some plain text without any formatting.')).toBe(false); }); + + it('returns true for markdown containing HTML examples in code', () => { + const md = + '# Web API\n\nAdd the script before ``.\n\n```html\nHello\n```\n'; + expect(looksLikeMarkdown(md)).toBe(true); + }); });