xref: /expo/docs/scripts/create-sitemap.js (revision 023bc8ea)
1import fs from 'fs';
2import path from 'path';
3import { SitemapStream } from 'sitemap';
4
5const IGNORED_PAGES = [
6  '/404', // We don't want to add the 404 error page as sitemap entry
7  '/versions', // Skip the redirect to latest, use `/versions/latest` instead
8];
9
10/**
11 * Create a sitemap for crawlers like Algolia Docsearch.
12 * This allows crawlers to index _all_ pages, without a full page-link-chain.
13 */
14export default function createSitemap({
15  pathMap,
16  domain,
17  output,
18  pathsPriority = [],
19  pathsHidden = [],
20}) {
21  if (!pathMap) throw new Error(`⚠️ Couldn't generate sitemap, no 'pathMap' provided`);
22  if (!domain) throw new Error(`⚠️ Couldn't generate sitemap, no 'domain' provided`);
23  if (!output) throw new Error(`⚠️ Couldn't generate sitemap, no 'output' provided`);
24
25  // Make sure both hidden and prioritized paths are prefixed with slash
26  pathsPriority = pathsPriority.map(pathWithStartingSlash);
27  pathsHidden = pathsHidden.map(pathWithStartingSlash);
28
29  // Get a list of URLs from the pathMap that we can use in the sitemap
30  const urls = Object.keys(pathMap)
31    .filter(
32      url => !IGNORED_PAGES.includes(url) && !pathsHidden.find(hidden => url.startsWith(hidden))
33    )
34    .map(pathWithTrailingSlash)
35    .sort((a, b) => pathSortedByPriority(a, b, pathsPriority));
36
37  const target = fs.createWriteStream(output);
38  const sitemap = new SitemapStream({
39    hostname: domain,
40    xmlns: {
41      news: false,
42      xhtml: false,
43      image: false,
44      video: false,
45    },
46  });
47
48  sitemap.pipe(target);
49  urls.forEach(url => sitemap.write({ url }));
50  sitemap.end();
51
52  return urls;
53}
54
55function pathWithTrailingSlash(url) {
56  return !path.extname(url) && !url.endsWith('/') ? `${url}/` : url;
57}
58
59function pathWithStartingSlash(url) {
60  return url.startsWith('/') ? url : `/${url}`;
61}
62
63/**
64 * This will sort the paths by their priority.
65 * It applies the following rules:
66 *   - Index page is always moved to the top
67 *   - Matches the order of prioritized paths using "startsWith" check
68 */
69function pathSortedByPriority(a, b, priorities = []) {
70  if (a === '/') return -1;
71  if (b === '/') return 1;
72
73  const aPriority = priorities.findIndex(prio => a.startsWith(prio));
74  const bPriority = priorities.findIndex(prio => b.startsWith(prio));
75  if (aPriority >= 0 || bPriority >= 0) {
76    return aPriority - bPriority;
77  }
78
79  return 0;
80}
81