Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion src/helpers/detect-markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,26 @@ const MD_HEADING = /^#{1,6}\s+\S/m;
const MD_LINK = /\[[^\]]+\]\([^)]+\)/;
const MD_CODE_FENCE = /^```/m;

/**
* Strip fenced code blocks and inline code spans so that HTML tags mentioned
* inside code (e.g. `<body>` or a fenced HTML snippet) don't produce false
* positives when checking for HTML patterns.
*/
function stripCode(text: string): string {
// Strip fenced code blocks (``` or ~~~, with optional language tag)
text = text.replace(/^(`{3,}|~{3,})[^\n]*\n[\s\S]*?\n\1[ \t]*$/gm, '');
// Strip inline code spans
text = text.replace(/`[^`\n]+`/g, '``');
return text;
}

/**
* Returns true if the body looks like HTML (contains DOCTYPE, <html>, <head>, or <body> tags).
* Fenced code blocks and inline code spans are stripped first to avoid false positives
* from markdown that mentions HTML tags in code examples.
*/
export function looksLikeHtml(body: string): boolean {
const sample = body.slice(0, 2000);
const sample = stripCode(body.slice(0, 2000));
return HTML_PATTERNS.some((p) => p.test(sample));
}

Expand Down
26 changes: 26 additions & 0 deletions test/unit/helpers/detect-markdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,26 @@ describe('looksLikeHtml', () => {
it('returns false for markdown', () => {
expect(looksLikeHtml('# Hello\n\nThis is **markdown**.')).toBe(false);
});

it('ignores HTML tags inside fenced code blocks', () => {
const md = '# Example\n\n```html\n<!DOCTYPE html>\n<html>\n<body>Hello</body>\n</html>\n```\n';
expect(looksLikeHtml(md)).toBe(false);
});

it('ignores HTML tags inside inline code spans', () => {
const md = '# Setup\n\nAdd the script before the closing `</body>` tag.\n';
expect(looksLikeHtml(md)).toBe(false);
});

it('ignores HTML tags inside tilde fenced code blocks', () => {
const md = '# Example\n\n~~~html\n<html>\n<head><title>Test</title></head>\n</html>\n~~~\n';
expect(looksLikeHtml(md)).toBe(false);
});

it('still detects real HTML outside of code blocks', () => {
const html = '<!DOCTYPE html>\n<html>\n```not a code block\n```\n</html>';
expect(looksLikeHtml(html)).toBe(true);
});
});

describe('looksLikeMarkdown', () => {
Expand All @@ -48,4 +68,10 @@ describe('looksLikeMarkdown', () => {
it('returns false for plain text with no markdown signals', () => {
expect(looksLikeMarkdown('Just some plain text without any formatting.')).toBe(false);
});

it('returns true for markdown containing HTML examples in code', () => {
const md =
'# Web API\n\nAdd the script before `</body>`.\n\n```html\n<html><body>Hello</body></html>\n```\n';
expect(looksLikeMarkdown(md)).toBe(true);
});
});