diff --git a/src/generators/metadata/constants.mjs b/src/generators/metadata/constants.mjs index e8ac140b..aa2edf27 100644 --- a/src/generators/metadata/constants.mjs +++ b/src/generators/metadata/constants.mjs @@ -56,8 +56,5 @@ export const DOC_API_HEADING_TYPES = [ }, ]; -// This regex is used to match basic TypeScript generic types (e.g., Promise) -export const TYPE_GENERIC_REGEX = /^([^<]+)<([^>]+)>$/; - // This is the base URL of the Man7 documentation export const DOC_MAN_BASE_URL = 'http://man7.org/linux/man-pages/man'; diff --git a/src/generators/metadata/utils/__tests__/transformers.test.mjs b/src/generators/metadata/utils/__tests__/transformers.test.mjs index ef625d4c..d84d2b21 100644 --- a/src/generators/metadata/utils/__tests__/transformers.test.mjs +++ b/src/generators/metadata/utils/__tests__/transformers.test.mjs @@ -75,4 +75,27 @@ describe('transformTypeToReferenceLink', () => { '[``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)> & [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)>' ); }); + + it('should transform a function returning a Generic type', () => { + strictEqual( + transformTypeToReferenceLink('(err: Error) => Promise', {}), + '(err: Error) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)>' + ); + }); + + it('should respect precedence: Unions (|) are weaker than Intersections (&)', () => { + strictEqual( + transformTypeToReferenceLink('string | number & boolean', {}), + '[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)' + ); + }); + + it('should handle extreme nested combinations of functions, generics, unions, and intersections', () => { + const input = + '(str: MyType) => Promise, Map>'; + const expected = + '(str: MyType) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)>, [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)>>'; + + strictEqual(transformTypeToReferenceLink(input, {}), expected); + }); }); diff --git a/src/generators/metadata/utils/transformers.mjs b/src/generators/metadata/utils/transformers.mjs index 7fac4e85..be5ee983 100644 --- a/src/generators/metadata/utils/transformers.mjs +++ b/src/generators/metadata/utils/transformers.mjs @@ -1,9 +1,6 @@ -import { - DOC_MAN_BASE_URL, - DOC_API_HEADING_TYPES, - TYPE_GENERIC_REGEX, -} from '../constants.mjs'; +import { DOC_MAN_BASE_URL, DOC_API_HEADING_TYPES } from '../constants.mjs'; import { slug } from './slugger.mjs'; +import { parseType } from './typeParser.mjs'; import { transformNodesToString } from '../../../utils/unist.mjs'; import BUILTIN_TYPE_MAP from '../maps/builtin.json' with { type: 'json' }; import MDN_TYPE_MAP from '../maps/mdn.json' with { type: 'json' }; @@ -22,84 +19,7 @@ export const transformUnixManualToLink = ( ) => { return `[\`${text}\`](${DOC_MAN_BASE_URL}${sectionNumber}/${command}.${sectionNumber}${sectionLetter}.html)`; }; -/** - * Safely splits the string by `|` or `&` at the top level (ignoring those - * inside `< >`), and returns both the pieces and the separator used. - * - * @param {string} str The type string to split - * @returns {{ pieces: string[], separator: string }} The split pieces and the separator string used to join them (` | ` or ` & `) - */ -const splitByOuterSeparator = str => { - const pieces = []; - let current = ''; - let depth = 0; - let separator; - - for (const char of str) { - if (char === '<') { - depth++; - } else if (char === '>') { - depth--; - } else if ((char === '|' || char === '&') && depth === 0) { - pieces.push(current); - current = ''; - separator ??= ` ${char} `; - continue; - } - current += char; - } - - pieces.push(current); - return { pieces, separator }; -}; - -/** - * Attempts to parse and format a basic Generic type (e.g., Promise). - * It also supports union and multi-parameter types within the generic brackets. - * - * @param {string} typePiece The plain type piece to be evaluated - * @param {Function} transformType The function used to resolve individual types into links - * @returns {string|null} The formatted Markdown link, or null if no match is found - */ -const formatBasicGeneric = (typePiece, transformType) => { - const genericMatch = typePiece.match(TYPE_GENERIC_REGEX); - - if (genericMatch) { - const baseType = genericMatch[1].trim(); - const innerType = genericMatch[2].trim(); - - const baseResult = transformType(baseType.replace(/\[\]$/, '')); - const baseFormatted = baseResult - ? `[\`<${baseType}>\`](${baseResult})` - : `\`<${baseType}>\``; - // Split while capturing delimiters (| or ,) to preserve original syntax - const parts = innerType.split(/([|,])/); - - const innerFormatted = parts - .map(part => { - const trimmed = part.trim(); - // If it is a delimiter, return it as is - if (trimmed === '|') { - return ' | '; - } - - if (trimmed === ',') { - return ', '; - } - - const innerRes = transformType(trimmed.replace(/\[\]$/, '')); - return innerRes - ? `[\`<${trimmed}>\`](${innerRes})` - : `\`<${trimmed}>\``; - }) - .join(''); - - return `${baseFormatted}<${innerFormatted}>`; - } - - return null; -}; /** * This method replaces plain text Types within the Markdown content into Markdown links * that link to the actual relevant reference for such type (either internal or external link) @@ -150,32 +70,7 @@ export const transformTypeToReferenceLink = (type, record) => { return ''; }; - const { pieces: outerPieces, separator } = splitByOuterSeparator(typeInput); - - const typePieces = outerPieces.map(piece => { - // This is the content to render as the text of the Markdown link - const trimmedPiece = piece.trim(); - - // 1. Attempt to format as a basic Generic type first - const genericMarkdown = formatBasicGeneric(trimmedPiece, transformType); - if (genericMarkdown) { - return genericMarkdown; - } - - // 2. Fallback to the logic for plain types - // This is what we will compare against the API types mappings - // The ReGeX below is used to remove `[]` from the end of the type - const result = transformType(trimmedPiece.replace(/\[\]$/, '')); - - // If we have a valid result and the piece is not empty, we return the Markdown link - if (trimmedPiece.length && result.length) { - return `[\`<${trimmedPiece}>\`](${result})`; - } - }); - - // Filter out pieces that we failed to map and then join the valid ones - // using the same separator that appeared in the original type string - const markdownLinks = typePieces.filter(Boolean).join(separator); + const markdownLinks = parseType(typeInput, transformType); // Return the replaced links or the original content if they all failed to be replaced // Note that if some failed to get replaced, only the valid ones will be returned diff --git a/src/generators/metadata/utils/typeParser.mjs b/src/generators/metadata/utils/typeParser.mjs new file mode 100644 index 00000000..69ae7cac --- /dev/null +++ b/src/generators/metadata/utils/typeParser.mjs @@ -0,0 +1,170 @@ +/** + * Safely splits a string by a given set of separators at depth 0 (ignoring those inside < > or ( )). + * + * @param {string} str The string to split + * @param {string} separator The separator to split by (e.g., '|', '&', ',', '=>') + * @returns {string[]} The split pieces + */ +const splitByOuterSeparator = (str, separator) => { + const pieces = []; + let current = ''; + let depth = 0; + + for (let i = 0; i < str.length; i++) { + const char = str[i]; + + // Track depth using brackets and parentheses + if (char === '<' || char === '(') { + depth++; + } else if ((char === '>' && str[i - 1] !== '=') || char === ')') { + depth--; + } + + // Check for multi-character separators like '=>' + const isArrow = separator === '=>' && char === '=' && str[i + 1] === '>'; + // Check for single-character separators + const isCharSeparator = separator === char; + + if (depth === 0 && (isCharSeparator || isArrow)) { + pieces.push(current.trim()); + current = ''; + if (isArrow) { + i++; + } // skip the '>' part of '=>' + continue; + } + + current += char; + } + + pieces.push(current.trim()); + return pieces; +}; +/** + * Safely removes outer parentheses from a type string if they wrap the entire string. + * This prevents "depth blindness" in the parser by unwrapping types like `(string | number)` + * into `string | number`, while safely ignoring disconnected groups like `(A) | (B)`. + * + * @param {string} typeString The type string to evaluate and potentially unwrap. + * @returns {string} The unwrapped type string, or the original string if not fully wrapped. + */ +export const stripOuterParentheses = typeString => { + let trimmed = typeString.trim(); + + if (trimmed.startsWith('(') && trimmed.endsWith(')')) { + let depth = 0; + let isValidWrapper = true; + + // Iterate through the string, ignoring the last closing parenthesis + for (let i = 0; i < trimmed.length - 1; i++) { + if (trimmed[i] === '(') { + depth++; + } else if (trimmed[i] === ')') { + depth--; + } + + // If depth hits 0 before the end, it means the parentheses don't wrap the whole string + if (depth === 0) { + isValidWrapper = false; + break; + } + } + + if (isValidWrapper) { + return trimmed.slice(1, -1).trim(); + } + } + + return trimmed; +}; +/** + * Recursively parses advanced TypeScript types, including Unions, Intersections, Functions, and Nested Generics. + * * @param {string} typeString The plain type string to evaluate + * @param {Function} transformType The function used to resolve individual types into links + * @returns {string|null} The formatted Markdown link(s), or null if the base type doesn't map + */ +export const parseType = (typeString, transformType) => { + // Clean the string and strip unnecessary outer parentheses to prevent depth blindness (e.g., "(string | number)" -> "string | number") + const trimmed = stripOuterParentheses(typeString); + if (!trimmed) { + return null; + } + + // Handle Functions (=>) + if (trimmed.includes('=>')) { + const parts = splitByOuterSeparator(trimmed, '=>'); + if (parts.length > 1) { + const params = parts[0]; + + // Join the rest back together to handle higher-order functions + const returnType = parts.slice(1).join(' => '); + + // Preserve the function signature, just link the return type for now + // (Mapping param types inside the signature string is complex and often unnecessary for simple docs) + const parsedReturn = + parseType(returnType, transformType) || `\`<${returnType}>\``; + return `${params} => ${parsedReturn}`; + } + } + + // Handle Unions (|) + if (trimmed.includes('|')) { + const parts = splitByOuterSeparator(trimmed, '|'); + if (parts.length > 1) { + // Re-evaluate each part recursively and join with ' | ' + const resolvedParts = parts.map( + p => parseType(p, transformType) || `\`<${p}>\`` + ); + return resolvedParts.join(' | '); + } + } + + // Handle Intersections (&) + if (trimmed.includes('&')) { + const parts = splitByOuterSeparator(trimmed, '&'); + if (parts.length > 1) { + // Re-evaluate each part recursively and join with ' & ' + const resolvedParts = parts.map( + p => parseType(p, transformType) || `\`<${p}>\`` + ); + return resolvedParts.join(' & '); + } + } + + // Handle Generics (Base) + // Check if it's a generic wrapped in an array (e.g., Promise[]) + const isGenericArray = trimmed.endsWith('[]'); + const genericTarget = isGenericArray ? trimmed.slice(0, -2).trim() : trimmed; + + if (genericTarget.includes('<') && genericTarget.endsWith('>')) { + const firstBracketIndex = genericTarget.indexOf('<'); + const baseType = genericTarget.slice(0, firstBracketIndex).trim(); + const innerType = genericTarget.slice(firstBracketIndex + 1, -1).trim(); + + const cleanBaseType = baseType.replace(/\[\]$/, ''); // Just in case of Base[] + const baseResult = transformType(cleanBaseType); + + const baseFormatted = baseResult + ? `[\`<${cleanBaseType}>\`](${baseResult})` + : `\`<${cleanBaseType}>\``; + + const innerArgs = splitByOuterSeparator(innerType, ','); + const innerFormatted = innerArgs + .map(arg => parseType(arg, transformType) || `\`<${arg}>\``) + .join(', '); + + return `${baseFormatted}<${innerFormatted}>${isGenericArray ? '[]' : ''}`; + } + + // Base Case: Plain Type (e.g., string, Buffer, Function) + // Preserve array notation for base types + const isArray = trimmed.endsWith('[]'); + const cleanType = trimmed.replace(/\[\]$/, ''); + + const result = transformType(cleanType); + if (cleanType.length && result) { + return `[\`<${cleanType}>\`](${result})${isArray ? '[]' : ''}`; + } + + return null; +};