xref: /expo/tools/src/Markdown.ts (revision a272999e)
1import unescape from 'lodash/unescape';
2import marked from 'marked';
3
4export enum TokenType {
5  HEADING = 'heading',
6  LIST = 'list',
7  LIST_ITEM = 'listItem',
8  PARAGRAPH = 'paragraph',
9  TEXT = 'text',
10  BLOCKQUOTE = 'blockquote',
11  SPACE = 'space',
12  CODE = 'code',
13}
14
15type SimpleToken<Type> = { type: Type; raw?: string };
16
17export type TextToken<Type = TokenType.TEXT> = SimpleToken<Type> & { text: string };
18
19export type HeadingToken = TextToken<TokenType.HEADING> & {
20  depth: number;
21  tokens: Token[];
22};
23
24export type ListToken = SimpleToken<TokenType.LIST> & {
25  depth: number;
26  items: ListItemToken[];
27  ordered?: boolean;
28  start?: string;
29  loose?: boolean;
30};
31
32export type ListItemToken = TextToken<TokenType.LIST_ITEM> & {
33  depth: number;
34  tokens: Token[];
35  task?: boolean;
36  checked?: boolean;
37  loose?: boolean;
38};
39
40export type ParagraphToken = TextToken<TokenType.PARAGRAPH>;
41
42export type SpaceToken = SimpleToken<TokenType.SPACE>;
43
44export type CodeToken = TextToken<TokenType.CODE> & {
45  lang: string;
46};
47
48export type BlockquoteToken = TextToken<TokenType.BLOCKQUOTE> & {
49  tokens: Token[];
50};
51
52export type Token =
53  | HeadingToken
54  | ListToken
55  | ListItemToken
56  | ParagraphToken
57  | TextToken
58  | SpaceToken
59  | CodeToken
60  | BlockquoteToken;
61
62export interface Tokens extends Array<Token> {
63  links?: any;
64}
65
66export interface Renderer {
67  render(tokens: Token[]): string;
68}
69
70/**
71 * Receives markdown text and returns an array of tokens.
72 */
73export function lexify(text: string): Tokens {
74  const tokens = marked.lexer(text);
75  recursivelyFixTokens(tokens);
76  return tokens;
77}
78
79/**
80 * Receives an array of tokens and renders them to markdown.
81 */
82export function render(tokens: Tokens, renderer: Renderer = new MarkdownRenderer()): string {
83  // `marked` module is good enough in terms of lexifying, but its main purpose is to
84  // convert markdown to html, so we need to write our own renderer for changelogs.
85  return unescape(renderer.render(tokens).trim() + EOL);
86}
87
88/**
89 * Creates heading token with given text and depth.
90 */
91export function createHeadingToken(text: string, depth: number = 1): HeadingToken {
92  return {
93    type: TokenType.HEADING,
94    depth,
95    text,
96    tokens: [createTextToken(text)],
97  };
98}
99
100/**
101 * Returns a token from given text.
102 */
103export function createTextToken(text: string): TextToken {
104  return {
105    type: TokenType.TEXT,
106    text,
107  };
108}
109
110export function createListToken(depth: number = 1): ListToken {
111  return {
112    type: TokenType.LIST,
113    depth,
114    items: [],
115  };
116}
117
118export function createListItemToken(text: string, depth: number = 0): ListItemToken {
119  return {
120    type: TokenType.LIST_ITEM,
121    depth,
122    text,
123    tokens: [createTextToken(text)],
124  };
125}
126
127/**
128 * Type guard for tokens extending TextToken.
129 */
130export function isTextToken(token: Token): token is TextToken {
131  return token?.type === TokenType.TEXT;
132}
133
134/**
135 * Type guard for HeadingToken type.
136 */
137export function isHeadingToken(token: Token): token is HeadingToken {
138  return token?.type === TokenType.HEADING;
139}
140
141/**
142 * Type guard for ListToken type.
143 */
144export function isListToken(token: Token): token is ListToken {
145  return token?.type === TokenType.LIST;
146}
147
148/**
149 * Type guard for ListItemToken type.
150 */
151export function isListItemToken(token: Token): token is ListItemToken {
152  return token?.type === TokenType.LIST_ITEM;
153}
154
155/**
156 * Indents subsequent lines in given string.
157 */
158export function indentMultilineString(str: string, depth: number = 0, indent: string = '  ') {
159  return str.replace(/\n/g, '\n' + indent.repeat(depth));
160}
161
162/**
163 * Fixes given tokens in place. We need to know depth of the list
164 */
165function recursivelyFixTokens(tokens: Token[], listDepth: number = 0): void {
166  for (const token of tokens) {
167    if (token.type === TokenType.LIST) {
168      token.depth = listDepth;
169
170      for (const item of token.items) {
171        item.type = TokenType.LIST_ITEM;
172        item.depth = listDepth;
173        recursivelyFixTokens(item.tokens, listDepth + 1);
174      }
175    }
176  }
177}
178
179const EOL = '\n';
180
181export type RenderingContext = Partial<{
182  indent: number;
183  orderedList: boolean;
184  itemIndex: number;
185}>;
186
187export class MarkdownRenderer implements Renderer {
188  render(tokens: Token[]): string {
189    let output = '';
190    for (const token of tokens) {
191      output += this.renderToken(token, { indent: 0 });
192    }
193    return output;
194  }
195
196  /* helpers */
197
198  renderToken(token: Token, ctx: RenderingContext): string {
199    switch (token.type) {
200      case TokenType.HEADING:
201        return this.heading(token);
202      case TokenType.LIST:
203        return this.list(token, ctx);
204      case TokenType.LIST_ITEM:
205        return this.listItem(token, ctx);
206      case TokenType.PARAGRAPH:
207        return this.paragraph(token, ctx);
208      case TokenType.TEXT:
209        return this.text(token, ctx);
210      case TokenType.SPACE:
211        return this.space(token);
212      case TokenType.CODE:
213        return this.code(token, ctx);
214      case TokenType.BLOCKQUOTE:
215        return this.blockquote(token, ctx);
216      default:
217        // `marked` provides much more tokens, however we don't need to go so deep.
218        // So far we needed only tokens with above types.
219        throw new Error(`Cannot parse token: ${token}`);
220    }
221  }
222
223  indent(depth?: number, indentStr: string = '  '): string {
224    return depth ? indentStr.repeat(depth) : '';
225  }
226
227  /* tokens */
228
229  heading(token: HeadingToken): string {
230    return this.indent(token.depth, '#') + ' ' + token.text + EOL.repeat(2);
231  }
232
233  list(token: ListToken, ctx: { indent?: number }): string {
234    let output = '';
235    for (let i = 0; i < token.items.length; i++) {
236      output += this.listItem(token.items[i], {
237        ...ctx,
238        orderedList: token.ordered,
239        itemIndex: i + 1,
240      });
241    }
242    return output + EOL;
243  }
244
245  listItem(token: ListItemToken, ctx: RenderingContext): string {
246    const indent = ctx.indent ?? 0;
247    const bullet = ctx.orderedList ? `${ctx.itemIndex ?? 1}.` : '-';
248    let output = this.indent(indent) + bullet + ' ';
249
250    if (token.tokens[0]) {
251      // `renderToken` result is indented by default (e.g. got TextToken), but when dealing with lists
252      // then list items indents are handled in above code instead
253      output += this.renderToken(token.tokens[0], ctx).trim() + EOL;
254    }
255
256    for (const child of token.tokens.slice(1)) {
257      output += this.renderToken(child, { ...ctx, indent: indent + 1 }).trimRight() + EOL;
258    }
259    return output.trimRight() + EOL;
260  }
261
262  paragraph(token: ParagraphToken, ctx: RenderingContext): string {
263    return this.indent(ctx.indent) + token.text + EOL;
264  }
265
266  text(token: TextToken, ctx: RenderingContext): string {
267    // TextToken may have children which we don't really need - they would render to `text` either way.
268    return this.indent(ctx.indent) + token.text;
269  }
270
271  space(token: SpaceToken): string {
272    // Actually formatting of other tokens is good enough that we don't need to render additional newlines.
273    return EOL;
274  }
275
276  code(token: CodeToken, ctx: RenderingContext): string {
277    const lines = token.text.split(EOL);
278    const indentStr = this.indent(ctx?.indent);
279
280    lines.unshift('```' + token.lang ?? '');
281    lines.push('```');
282
283    return indentStr + lines.join(EOL + indentStr);
284  }
285
286  blockquote(token: BlockquoteToken, ctx: RenderingContext): string {
287    const indentStr = this.indent(ctx.indent);
288
289    return (
290      indentStr +
291      token.tokens
292        .map((child) => '> ' + this.renderToken(child, { ...ctx, indent: 0 }).trimRight())
293        .join(EOL + indentStr)
294    );
295  }
296}
297