import unescape from 'lodash/unescape'; import marked from 'marked'; export enum TokenType { HEADING = 'heading', LIST = 'list', LIST_ITEM = 'listItem', PARAGRAPH = 'paragraph', TEXT = 'text', BLOCKQUOTE = 'blockquote', SPACE = 'space', CODE = 'code', } type SimpleToken = { type: Type; raw?: string }; export type TextToken = SimpleToken & { text: string }; export type HeadingToken = TextToken & { depth: number; tokens: Token[]; }; export type ListToken = SimpleToken & { depth: number; items: ListItemToken[]; ordered?: boolean; start?: string; loose?: boolean; }; export type ListItemToken = TextToken & { depth: number; tokens: Token[]; task?: boolean; checked?: boolean; loose?: boolean; }; export type ParagraphToken = TextToken; export type SpaceToken = SimpleToken; export type CodeToken = TextToken & { lang: string; }; export type BlockquoteToken = TextToken & { tokens: Token[]; }; export type Token = | HeadingToken | ListToken | ListItemToken | ParagraphToken | TextToken | SpaceToken | CodeToken | BlockquoteToken; export interface Tokens extends Array { links?: any; } export interface Renderer { render(tokens: Token[]): string; } /** * Receives markdown text and returns an array of tokens. */ export function lexify(text: string): Tokens { const tokens = marked.lexer(text); recursivelyFixTokens(tokens); return tokens; } /** * Receives an array of tokens and renders them to markdown. */ export function render(tokens: Tokens, renderer: Renderer = new MarkdownRenderer()): string { // `marked` module is good enough in terms of lexifying, but its main purpose is to // convert markdown to html, so we need to write our own renderer for changelogs. return unescape(renderer.render(tokens).trim() + EOL); } /** * Creates heading token with given text and depth. */ export function createHeadingToken(text: string, depth: number = 1): HeadingToken { return { type: TokenType.HEADING, depth, text, tokens: [createTextToken(text)], }; } /** * Returns a token from given text. */ export function createTextToken(text: string): TextToken { return { type: TokenType.TEXT, text, }; } export function createListToken(depth: number = 1): ListToken { return { type: TokenType.LIST, depth, items: [], }; } export function createListItemToken(text: string, depth: number = 0): ListItemToken { return { type: TokenType.LIST_ITEM, depth, text, tokens: [createTextToken(text)], }; } /** * Type guard for tokens extending TextToken. */ export function isTextToken(token: Token): token is TextToken { return token?.type === TokenType.TEXT; } /** * Type guard for HeadingToken type. */ export function isHeadingToken(token: Token): token is HeadingToken { return token?.type === TokenType.HEADING; } /** * Type guard for ListToken type. */ export function isListToken(token: Token): token is ListToken { return token?.type === TokenType.LIST; } /** * Type guard for ListItemToken type. */ export function isListItemToken(token: Token): token is ListItemToken { return token?.type === TokenType.LIST_ITEM; } /** * Indents subsequent lines in given string. */ export function indentMultilineString(str: string, depth: number = 0, indent: string = ' ') { return str.replace(/\n/g, '\n' + indent.repeat(depth)); } /** * Fixes given tokens in place. We need to know depth of the list */ function recursivelyFixTokens(tokens: Token[], listDepth: number = 0): void { for (const token of tokens) { if (token.type === TokenType.LIST) { token.depth = listDepth; for (const item of token.items) { item.type = TokenType.LIST_ITEM; item.depth = listDepth; recursivelyFixTokens(item.tokens, listDepth + 1); } } } } const EOL = '\n'; export type RenderingContext = Partial<{ indent: number; orderedList: boolean; itemIndex: number; }>; export class MarkdownRenderer implements Renderer { render(tokens: Token[]): string { let output = ''; for (const token of tokens) { output += this.renderToken(token, { indent: 0 }); } return output; } /* helpers */ renderToken(token: Token, ctx: RenderingContext): string { switch (token.type) { case TokenType.HEADING: return this.heading(token); case TokenType.LIST: return this.list(token, ctx); case TokenType.LIST_ITEM: return this.listItem(token, ctx); case TokenType.PARAGRAPH: return this.paragraph(token, ctx); case TokenType.TEXT: return this.text(token, ctx); case TokenType.SPACE: return this.space(token); case TokenType.CODE: return this.code(token, ctx); case TokenType.BLOCKQUOTE: return this.blockquote(token, ctx); default: // `marked` provides much more tokens, however we don't need to go so deep. // So far we needed only tokens with above types. throw new Error(`Cannot parse token: ${token}`); } } indent(depth?: number, indentStr: string = ' '): string { return depth ? indentStr.repeat(depth) : ''; } /* tokens */ heading(token: HeadingToken): string { return this.indent(token.depth, '#') + ' ' + token.text + EOL.repeat(2); } list(token: ListToken, ctx: { indent?: number }): string { let output = ''; for (let i = 0; i < token.items.length; i++) { output += this.listItem(token.items[i], { ...ctx, orderedList: token.ordered, itemIndex: i + 1, }); } return output + EOL; } listItem(token: ListItemToken, ctx: RenderingContext): string { const indent = ctx.indent ?? 0; const bullet = ctx.orderedList ? `${ctx.itemIndex ?? 1}.` : '-'; let output = this.indent(indent) + bullet + ' '; if (token.tokens[0]) { // `renderToken` result is indented by default (e.g. got TextToken), but when dealing with lists // then list items indents are handled in above code instead output += this.renderToken(token.tokens[0], ctx).trim() + EOL; } for (const child of token.tokens.slice(1)) { output += this.renderToken(child, { ...ctx, indent: indent + 1 }).trimRight() + EOL; } return output.trimRight() + EOL; } paragraph(token: ParagraphToken, ctx: RenderingContext): string { return this.indent(ctx.indent) + token.text + EOL; } text(token: TextToken, ctx: RenderingContext): string { // TextToken may have children which we don't really need - they would render to `text` either way. return this.indent(ctx.indent) + token.text; } space(token: SpaceToken): string { // Actually formatting of other tokens is good enough that we don't need to render additional newlines. return EOL; } code(token: CodeToken, ctx: RenderingContext): string { const lines = token.text.split(EOL); const indentStr = this.indent(ctx?.indent); lines.unshift('```' + token.lang ?? ''); lines.push('```'); return indentStr + lines.join(EOL + indentStr); } blockquote(token: BlockquoteToken, ctx: RenderingContext): string { const indentStr = this.indent(ctx.indent); return ( indentStr + token.tokens .map((child) => '> ' + this.renderToken(child, { ...ctx, indent: 0 }).trimRight()) .join(EOL + indentStr) ); } }