1import fs from 'fs'; 2import path from 'path'; 3import { SitemapStream } from 'sitemap'; 4 5const IGNORED_PAGES = [ 6 '/404', // We don't want to add the 404 error page as sitemap entry 7 '/versions', // Skip the redirect to latest, use `/versions/latest` instead 8]; 9 10/** 11 * Create a sitemap for crawlers like Algolia Docsearch. 12 * This allows crawlers to index _all_ pages, without a full page-link-chain. 13 */ 14export default function createSitemap({ 15 pathMap, 16 domain, 17 output, 18 pathsPriority = [], 19 pathsHidden = [], 20}) { 21 if (!pathMap) throw new Error(`⚠️ Couldn't generate sitemap, no 'pathMap' provided`); 22 if (!domain) throw new Error(`⚠️ Couldn't generate sitemap, no 'domain' provided`); 23 if (!output) throw new Error(`⚠️ Couldn't generate sitemap, no 'output' provided`); 24 25 // Make sure both hidden and prioritized paths are prefixed with slash 26 pathsPriority = pathsPriority.map(pathWithStartingSlash); 27 pathsHidden = pathsHidden.map(pathWithStartingSlash); 28 29 // Get a list of URLs from the pathMap that we can use in the sitemap 30 const urls = Object.keys(pathMap) 31 .filter( 32 url => !IGNORED_PAGES.includes(url) && !pathsHidden.find(hidden => url.startsWith(hidden)) 33 ) 34 .map(pathWithTrailingSlash) 35 .sort((a, b) => pathSortedByPriority(a, b, pathsPriority)); 36 37 const target = fs.createWriteStream(output); 38 const sitemap = new SitemapStream({ 39 hostname: domain, 40 xmlns: { 41 news: false, 42 xhtml: false, 43 image: false, 44 video: false, 45 }, 46 }); 47 48 sitemap.pipe(target); 49 urls.forEach(url => sitemap.write({ url })); 50 sitemap.end(); 51 52 return urls; 53} 54 55function pathWithTrailingSlash(url) { 56 return !path.extname(url) && !url.endsWith('/') ? `${url}/` : url; 57} 58 59function pathWithStartingSlash(url) { 60 return url.startsWith('/') ? url : `/${url}`; 61} 62 63/** 64 * This will sort the paths by their priority. 65 * It applies the following rules: 66 * - Index page is always moved to the top 67 * - Matches the order of prioritized paths using "startsWith" check 68 */ 69function pathSortedByPriority(a, b, priorities = []) { 70 if (a === '/') return -1; 71 if (b === '/') return 1; 72 73 const aPriority = priorities.findIndex(prio => a.startsWith(prio)); 74 const bPriority = priorities.findIndex(prio => b.startsWith(prio)); 75 if (aPriority >= 0 || bPriority >= 0) { 76 return aPriority - bPriority; 77 } 78 79 return 0; 80} 81