Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions src/generators/metadata/constants.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,5 @@ export const DOC_API_HEADING_TYPES = [
},
];

// This regex is used to match basic TypeScript generic types (e.g., Promise<string>)
export const TYPE_GENERIC_REGEX = /^([^<]+)<([^>]+)>$/;

// This is the base URL of the Man7 documentation
export const DOC_MAN_BASE_URL = 'http://man7.org/linux/man-pages/man';
23 changes: 23 additions & 0 deletions src/generators/metadata/utils/__tests__/transformers.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,27 @@ describe('transformTypeToReferenceLink', () => {
'[`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)&gt; & [`<Array>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)&gt;'
);
});

it('should transform a function returning a Generic type', () => {
strictEqual(
transformTypeToReferenceLink('(err: Error) => Promise<boolean>', {}),
'(err: Error) =&gt; [`<Promise>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)&lt;[`<boolean>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)&gt;'
);
});

it('should respect precedence: Unions (|) are weaker than Intersections (&)', () => {
strictEqual(
transformTypeToReferenceLink('string | number & boolean', {}),
'[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [`<boolean>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)'
);
});

it('should handle extreme nested combinations of functions, generics, unions, and intersections', () => {
const input =
'(str: MyType) => Promise<Map<string, number & string>, Map<string | number>>';
const expected =
'(str: MyType) =&gt; [`<Promise>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)&lt;[`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)&gt;, [`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)&gt;&gt;';

strictEqual(transformTypeToReferenceLink(input, {}), expected);
});
});
111 changes: 3 additions & 108 deletions src/generators/metadata/utils/transformers.mjs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import {
DOC_MAN_BASE_URL,
DOC_API_HEADING_TYPES,
TYPE_GENERIC_REGEX,
} from '../constants.mjs';
import { DOC_MAN_BASE_URL, DOC_API_HEADING_TYPES } from '../constants.mjs';
import { slug } from './slugger.mjs';
import { parseType } from './typeParser.mjs';
import { transformNodesToString } from '../../../utils/unist.mjs';
import BUILTIN_TYPE_MAP from '../maps/builtin.json' with { type: 'json' };
import MDN_TYPE_MAP from '../maps/mdn.json' with { type: 'json' };
Expand All @@ -22,84 +19,7 @@ export const transformUnixManualToLink = (
) => {
return `[\`${text}\`](${DOC_MAN_BASE_URL}${sectionNumber}/${command}.${sectionNumber}${sectionLetter}.html)`;
};
/**
* Safely splits the string by `|` or `&` at the top level (ignoring those
* inside `< >`), and returns both the pieces and the separator used.
*
* @param {string} str The type string to split
* @returns {{ pieces: string[], separator: string }} The split pieces and the separator string used to join them (` | ` or ` & `)
*/
const splitByOuterSeparator = str => {
const pieces = [];
let current = '';
let depth = 0;
let separator;

for (const char of str) {
if (char === '<') {
depth++;
} else if (char === '>') {
depth--;
} else if ((char === '|' || char === '&') && depth === 0) {
pieces.push(current);
current = '';
separator ??= ` ${char} `;
continue;
}
current += char;
}

pieces.push(current);
return { pieces, separator };
};

/**
* Attempts to parse and format a basic Generic type (e.g., Promise<string>).
* It also supports union and multi-parameter types within the generic brackets.
*
* @param {string} typePiece The plain type piece to be evaluated
* @param {Function} transformType The function used to resolve individual types into links
* @returns {string|null} The formatted Markdown link, or null if no match is found
*/
const formatBasicGeneric = (typePiece, transformType) => {
const genericMatch = typePiece.match(TYPE_GENERIC_REGEX);

if (genericMatch) {
const baseType = genericMatch[1].trim();
const innerType = genericMatch[2].trim();

const baseResult = transformType(baseType.replace(/\[\]$/, ''));
const baseFormatted = baseResult
? `[\`<${baseType}>\`](${baseResult})`
: `\`<${baseType}>\``;

// Split while capturing delimiters (| or ,) to preserve original syntax
const parts = innerType.split(/([|,])/);

const innerFormatted = parts
.map(part => {
const trimmed = part.trim();
// If it is a delimiter, return it as is
if (trimmed === '|') {
return ' | ';
}

if (trimmed === ',') {
return ', ';
}

const innerRes = transformType(trimmed.replace(/\[\]$/, ''));
return innerRes
? `[\`<${trimmed}>\`](${innerRes})`
: `\`<${trimmed}>\``;
})
.join('');

return `${baseFormatted}&lt;${innerFormatted}&gt;`;
}

return null;
};
/**
* This method replaces plain text Types within the Markdown content into Markdown links
* that link to the actual relevant reference for such type (either internal or external link)
Expand Down Expand Up @@ -150,32 +70,7 @@ export const transformTypeToReferenceLink = (type, record) => {
return '';
};

const { pieces: outerPieces, separator } = splitByOuterSeparator(typeInput);

const typePieces = outerPieces.map(piece => {
// This is the content to render as the text of the Markdown link
const trimmedPiece = piece.trim();

// 1. Attempt to format as a basic Generic type first
const genericMarkdown = formatBasicGeneric(trimmedPiece, transformType);
if (genericMarkdown) {
return genericMarkdown;
}

// 2. Fallback to the logic for plain types
// This is what we will compare against the API types mappings
// The ReGeX below is used to remove `[]` from the end of the type
const result = transformType(trimmedPiece.replace(/\[\]$/, ''));

// If we have a valid result and the piece is not empty, we return the Markdown link
if (trimmedPiece.length && result.length) {
return `[\`<${trimmedPiece}>\`](${result})`;
}
});

// Filter out pieces that we failed to map and then join the valid ones
// using the same separator that appeared in the original type string
const markdownLinks = typePieces.filter(Boolean).join(separator);
const markdownLinks = parseType(typeInput, transformType);

// Return the replaced links or the original content if they all failed to be replaced
// Note that if some failed to get replaced, only the valid ones will be returned
Expand Down
170 changes: 170 additions & 0 deletions src/generators/metadata/utils/typeParser.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/**
* Safely splits a string by a given set of separators at depth 0 (ignoring those inside < > or ( )).
*
* @param {string} str The string to split
* @param {string} separator The separator to split by (e.g., '|', '&', ',', '=>')
* @returns {string[]} The split pieces
*/
const splitByOuterSeparator = (str, separator) => {
const pieces = [];
let current = '';
let depth = 0;

for (let i = 0; i < str.length; i++) {
const char = str[i];

// Track depth using brackets and parentheses
if (char === '<' || char === '(') {
depth++;
} else if ((char === '>' && str[i - 1] !== '=') || char === ')') {
depth--;
}

// Check for multi-character separators like '=>'
const isArrow = separator === '=>' && char === '=' && str[i + 1] === '>';
// Check for single-character separators
const isCharSeparator = separator === char;

if (depth === 0 && (isCharSeparator || isArrow)) {
pieces.push(current.trim());
current = '';
if (isArrow) {
i++;
} // skip the '>' part of '=>'
continue;
}

current += char;
}

pieces.push(current.trim());
return pieces;
};
/**
* Safely removes outer parentheses from a type string if they wrap the entire string.
* This prevents "depth blindness" in the parser by unwrapping types like `(string | number)`
* into `string | number`, while safely ignoring disconnected groups like `(A) | (B)`.
*
* @param {string} typeString The type string to evaluate and potentially unwrap.
* @returns {string} The unwrapped type string, or the original string if not fully wrapped.
*/
export const stripOuterParentheses = typeString => {
let trimmed = typeString.trim();

if (trimmed.startsWith('(') && trimmed.endsWith(')')) {
let depth = 0;
let isValidWrapper = true;

// Iterate through the string, ignoring the last closing parenthesis
for (let i = 0; i < trimmed.length - 1; i++) {
if (trimmed[i] === '(') {
depth++;
} else if (trimmed[i] === ')') {
depth--;
}

// If depth hits 0 before the end, it means the parentheses don't wrap the whole string
if (depth === 0) {
isValidWrapper = false;
break;
}
}

if (isValidWrapper) {
return trimmed.slice(1, -1).trim();
}
}

return trimmed;
};
/**
* Recursively parses advanced TypeScript types, including Unions, Intersections, Functions, and Nested Generics.
* * @param {string} typeString The plain type string to evaluate
* @param {Function} transformType The function used to resolve individual types into links
* @returns {string|null} The formatted Markdown link(s), or null if the base type doesn't map
*/
export const parseType = (typeString, transformType) => {
// Clean the string and strip unnecessary outer parentheses to prevent depth blindness (e.g., "(string | number)" -> "string | number")
const trimmed = stripOuterParentheses(typeString);
if (!trimmed) {
return null;
}

// Handle Functions (=>)
if (trimmed.includes('=>')) {
const parts = splitByOuterSeparator(trimmed, '=>');
if (parts.length > 1) {
const params = parts[0];

// Join the rest back together to handle higher-order functions
const returnType = parts.slice(1).join(' => ');

// Preserve the function signature, just link the return type for now
// (Mapping param types inside the signature string is complex and often unnecessary for simple docs)
const parsedReturn =
parseType(returnType, transformType) || `\`<${returnType}>\``;
return `${params} =&gt; ${parsedReturn}`;
}
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arrow function checked first causes wrong operator precedence

Medium Severity

The => handler runs before | and &, giving => the lowest precedence in this top-down recursive parser. For a type like Promise<string> | (err: Error) => void, splitByOuterSeparator splits on => at depth 0, producing params Promise<string> | (err: Error) and return void — treating the whole expression as a single function type. The correct parse is a union of Promise<string> and (err: Error) => void. Any union or intersection appearing before a function arrow at the same nesting level will be swallowed into the parameter side.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit bb43bc8. Configure here.


// Handle Unions (|)
if (trimmed.includes('|')) {
const parts = splitByOuterSeparator(trimmed, '|');
if (parts.length > 1) {
// Re-evaluate each part recursively and join with ' | '
const resolvedParts = parts.map(
p => parseType(p, transformType) || `\`<${p}>\``
);
return resolvedParts.join(' | ');
}
}

// Handle Intersections (&)
if (trimmed.includes('&')) {
const parts = splitByOuterSeparator(trimmed, '&');
if (parts.length > 1) {
// Re-evaluate each part recursively and join with ' & '
const resolvedParts = parts.map(
p => parseType(p, transformType) || `\`<${p}>\``
);
return resolvedParts.join(' & ');
}
}

// Handle Generics (Base<Inner, Inner>)
// Check if it's a generic wrapped in an array (e.g., Promise<string>[])
const isGenericArray = trimmed.endsWith('[]');
const genericTarget = isGenericArray ? trimmed.slice(0, -2).trim() : trimmed;

if (genericTarget.includes('<') && genericTarget.endsWith('>')) {
const firstBracketIndex = genericTarget.indexOf('<');
const baseType = genericTarget.slice(0, firstBracketIndex).trim();
const innerType = genericTarget.slice(firstBracketIndex + 1, -1).trim();

const cleanBaseType = baseType.replace(/\[\]$/, ''); // Just in case of Base[]<Inner>
const baseResult = transformType(cleanBaseType);

const baseFormatted = baseResult
? `[\`<${cleanBaseType}>\`](${baseResult})`
: `\`<${cleanBaseType}>\``;

const innerArgs = splitByOuterSeparator(innerType, ',');
const innerFormatted = innerArgs
.map(arg => parseType(arg, transformType) || `\`<${arg}>\``)
.join(', ');

return `${baseFormatted}&lt;${innerFormatted}&gt;${isGenericArray ? '[]' : ''}`;
}

// Base Case: Plain Type (e.g., string, Buffer, Function)
// Preserve array notation for base types
const isArray = trimmed.endsWith('[]');
const cleanType = trimmed.replace(/\[\]$/, '');

const result = transformType(cleanType);
if (cleanType.length && result) {
return `[\`<${cleanType}>\`](${result})${isArray ? '[]' : ''}`;
}

return null;
};
Loading