From 07319d061c8af703a22403d9cd52acd0b458706d Mon Sep 17 00:00:00 2001 From: Alex Moon Date: Thu, 12 Mar 2020 17:09:14 +0800 Subject: [PATCH] feat(gatsby-plugin-sitemap): handle different query structures and allow custom siteUrl resolution (#21948) * fix: handle allSitePage.nodes query stil. feat: allow custom resolver for siteUrl * docs(plugin-sitemap): add doces for updates * Update packages/gatsby-plugin-sitemap/README.md Co-Authored-By: LB Co-authored-by: LB --- packages/gatsby-plugin-sitemap/README.md | 21 ++-- .../src/__tests__/internals.js | 74 ++++++++++-- .../gatsby-plugin-sitemap/src/gatsby-node.js | 25 +++- .../gatsby-plugin-sitemap/src/internals.js | 109 ++++++++++++------ 4 files changed, 168 insertions(+), 61 deletions(-) diff --git a/packages/gatsby-plugin-sitemap/README.md b/packages/gatsby-plugin-sitemap/README.md index 3c627525bdd52..cdf25cc8812ab 100644 --- a/packages/gatsby-plugin-sitemap/README.md +++ b/packages/gatsby-plugin-sitemap/README.md @@ -27,11 +27,12 @@ The `defaultOptions` [here](https://github.com/gatsbyjs/gatsby/blob/master/packa The options are as follows: -- `query` (GraphQL Query) The query for the data you need to generate the sitemap. It's required to get the `site.siteMetadata.siteUrl`. If you override the query, you probably will need to set a `serializer` to return the correct data for the sitemap. +- `query` (GraphQL Query) The query for the data you need to generate the sitemap. It's required to get the site's URL, if you are not fetching it from `site.siteMetadata.siteUrl`, you will need to set a custom `resolveSiteUrl` function. If you override the query, you probably will also need to set a `serializer` to return the correct data for the sitemap. Due to how this plugin was built it is currently expected/required to fetch the page paths from `allSitePage`, but you may use the `allSitePage.edges.node` or `allSitePage.nodes` query structure. - `output` (string) The filepath and name. Defaults to `/sitemap.xml`. - `exclude` (array of strings) An array of paths to exclude from the sitemap. - `createLinkInHead` (boolean) Whether to populate the `` of your site with a link to the sitemap. - `serialize` (function) Takes the output of the data query and lets you return an array of sitemap entries. +- `resolveSiteUrl` (function) Takes the output of the data query and lets you return the site URL. We _ALWAYS_ exclude the following pages: `/dev-404-page`,`/404` &`/offline-plugin-app-shell-fallback`, this cannot be changed. @@ -53,24 +54,26 @@ plugins: [ exclude: [`/category/*`, `/path/to/page`], query: ` { - site { - siteMetadata { + wp { + generalSettings { siteUrl } } allSitePage { - edges { - node { - path - } + node { + path } } }`, + resolveSiteUrl: ({site, allSitePage}) => { + //Alternativly, you may also pass in an environment variable (or any location) at the beginning of your `gatsby-config.js`. + return site.wp.generalSettings.siteUrl + }, serialize: ({ site, allSitePage }) => - allSitePage.edges.map(edge => { + allSitePage.nodes.map(node => { return { - url: site.siteMetadata.siteUrl + edge.node.path, + url: `${site.wp.generalSettings.siteUrl}${node.path}`, changefreq: `daily`, priority: 0.7, } diff --git a/packages/gatsby-plugin-sitemap/src/__tests__/internals.js b/packages/gatsby-plugin-sitemap/src/__tests__/internals.js index c094ecb32c767..8b047261d2c74 100644 --- a/packages/gatsby-plugin-sitemap/src/__tests__/internals.js +++ b/packages/gatsby-plugin-sitemap/src/__tests__/internals.js @@ -1,5 +1,5 @@ const { - runQuery, + filterQuery, defaultOptions: { serialize }, } = require(`../internals`) @@ -7,6 +7,10 @@ beforeEach(() => { global.__PATH_PREFIX__ = `` }) +const verifyUrlsExistInResults = (results, urls) => { + expect(results.map(result => result.url)).toEqual(urls) +} + describe(`results using default settings`, () => { const generateQueryResultsMock = ( { siteUrl } = { siteUrl: `http://dummy.url` } @@ -36,10 +40,6 @@ describe(`results using default settings`, () => { } } - const verifyUrlsExistInResults = (results, urls) => { - expect(results.map(result => result.url)).toEqual(urls) - } - const runTests = (pathPrefix = ``) => { beforeEach(() => { global.__PATH_PREFIX__ = pathPrefix @@ -47,7 +47,8 @@ describe(`results using default settings`, () => { it(`prepares all urls correctly`, async () => { const graphql = () => Promise.resolve(generateQueryResultsMock()) - const queryRecords = await runQuery(graphql, ``, [], pathPrefix) + const results = await graphql(``) + const queryRecords = filterQuery(results, [], pathPrefix) const urls = serialize(queryRecords) verifyUrlsExistInResults(urls, [ @@ -61,7 +62,9 @@ describe(`results using default settings`, () => { Promise.resolve( generateQueryResultsMock({ siteUrl: `http://dummy.url/` }) ) - const queryRecords = await runQuery(graphql, ``, [], pathPrefix) + + const data = await graphql(``) + const queryRecords = filterQuery(data, [], pathPrefix) const urls = serialize(queryRecords) verifyUrlsExistInResults(urls, [ @@ -72,7 +75,8 @@ describe(`results using default settings`, () => { it(`excludes pages without trailing slash`, async () => { const graphql = () => Promise.resolve(generateQueryResultsMock()) - const queryRecords = await runQuery(graphql, ``, [`/page-2`], pathPrefix) + const data = await graphql(``) + const queryRecords = filterQuery(data, [`/page-2`], pathPrefix) const urls = serialize(queryRecords) verifyUrlsExistInResults(urls, [`http://dummy.url${pathPrefix}/page-1`]) @@ -80,7 +84,8 @@ describe(`results using default settings`, () => { it(`excludes pages with trailing slash`, async () => { const graphql = () => Promise.resolve(generateQueryResultsMock()) - const queryRecords = await runQuery(graphql, ``, [`/page-2/`], pathPrefix) + const data = await graphql(``) + const queryRecords = filterQuery(data, [`/page-2/`], pathPrefix) const urls = serialize(queryRecords) verifyUrlsExistInResults(urls, [`http://dummy.url${pathPrefix}/page-1`]) @@ -92,7 +97,8 @@ describe(`results using default settings`, () => { expect.assertions(1) try { - await runQuery(graphql, ``, [], pathPrefix) + const data = await graphql(``) + filterQuery(data, [], pathPrefix) } catch (err) { expect(err.message).toEqual( expect.stringContaining(`SiteMetaData 'siteUrl' property is required`) @@ -109,3 +115,51 @@ describe(`results using default settings`, () => { runTests(`/path-prefix`) }) }) + +describe(`results using non default alternatives`, () => { + const generateQueryResultsMockNodes = ( + { siteUrl } = { siteUrl: `http://dummy.url` } + ) => { + return { + data: { + site: { + siteMetadata: { + siteUrl: siteUrl, + }, + }, + allSitePage: { + nodes: [ + { + path: `/page-1`, + }, + { + path: `/page-2`, + }, + ], + }, + }, + } + } + + it(`handles allSitePage.nodes type query properly`, async () => { + const graphql = () => Promise.resolve(generateQueryResultsMockNodes()) + const results = await graphql(``) + const queryRecords = filterQuery(results, [], ``) + const urls = serialize(queryRecords) + + verifyUrlsExistInResults(urls, [ + `http://dummy.url/page-1`, + `http://dummy.url/page-2`, + ]) + }) + + it(`handles custom siteUrl Resolver Properly type query properly`, async () => { + const customUrl = `https://another.dummy.url` + const customSiteResolver = () => customUrl + const graphql = () => Promise.resolve(generateQueryResultsMockNodes()) + const results = await graphql(``) + const queryRecords = filterQuery(results, [], ``, customSiteResolver) + + expect(queryRecords.site.siteMetadata.siteUrl).toEqual(customUrl) + }) +}) diff --git a/packages/gatsby-plugin-sitemap/src/gatsby-node.js b/packages/gatsby-plugin-sitemap/src/gatsby-node.js index a68064eb67ac1..fbd0aeb84381f 100644 --- a/packages/gatsby-plugin-sitemap/src/gatsby-node.js +++ b/packages/gatsby-plugin-sitemap/src/gatsby-node.js @@ -2,7 +2,7 @@ import path from "path" import sitemap from "sitemap" import { defaultOptions, - runQuery, + filterQuery, writeFile, renameFile, withoutTrailingSlash, @@ -18,7 +18,15 @@ exports.onPostBuild = async ( delete options.plugins delete options.createLinkInHead - const { query, serialize, output, exclude, hostname, ...rest } = { + const { + query, + serialize, + output, + exclude, + hostname, + resolveSiteUrl, + ...rest + } = { ...defaultOptions, ...options, } @@ -28,8 +36,15 @@ exports.onPostBuild = async ( // Paths we're excluding... const excludeOptions = exclude.concat(defaultOptions.exclude) - const queryRecords = await runQuery(graphql, query, excludeOptions, basePath) - const urls = serialize(queryRecords) + const queryRecords = await graphql(query) + + const filteredRecords = filterQuery( + queryRecords, + excludeOptions, + basePath, + resolveSiteUrl + ) + const urls = serialize(filteredRecords) if (!rest.sitemapSize || urls.length <= rest.sitemapSize) { const map = sitemap.createSitemap(rest) @@ -41,7 +56,7 @@ exports.onPostBuild = async ( site: { siteMetadata: { siteUrl }, }, - } = queryRecords + } = filteredRecords return new Promise(resolve => { // sitemap-index.xml is default file name. (https://git.io/fhNgG) const indexFilePath = path.join( diff --git a/packages/gatsby-plugin-sitemap/src/internals.js b/packages/gatsby-plugin-sitemap/src/internals.js index a13e59acead68..8e61b4afec0cc 100644 --- a/packages/gatsby-plugin-sitemap/src/internals.js +++ b/packages/gatsby-plugin-sitemap/src/internals.js @@ -8,46 +8,62 @@ export const withoutTrailingSlash = path => export const writeFile = pify(fs.writeFile) export const renameFile = pify(fs.rename) -export const runQuery = (handler, query, excludes, pathPrefix) => - handler(query).then(r => { - if (r.errors) { - throw new Error(r.errors.join(`, `)) - } +export function filterQuery( + results, + excludes, + pathPrefix, + resolveSiteUrl = defaultOptions.resolveSiteUrl +) { + const { errors, data } = results - // Removing excluded paths - r.data.allSitePage.edges = r.data.allSitePage.edges.filter( - page => - !excludes.some(excludedRoute => - minimatch( - withoutTrailingSlash(page.node.path), - withoutTrailingSlash(excludedRoute) - ) - ) - ) + if (errors) { + throw new Error(errors.join(`, `)) + } - // Add path prefix - r.data.allSitePage.edges = r.data.allSitePage.edges.map(page => { - page.node.path = (pathPrefix + page.node.path).replace(/^\/\//g, `/`) - return page - }) + let { allPages, originalType } = getNodes(data.allSitePage) - // siteUrl Validation - if ( - !r.data.site.siteMetadata.siteUrl || - r.data.site.siteMetadata.siteUrl.trim().length == 0 - ) { - throw new Error( - `SiteMetaData 'siteUrl' property is required and cannot be left empty. Check out the documentation to see a working example: https://www.gatsbyjs.org/packages/gatsby-plugin-sitemap/#how-to-use` + // Removing excluded paths + allPages = allPages.filter( + page => + !excludes.some(excludedRoute => + minimatch( + withoutTrailingSlash(page.path), + withoutTrailingSlash(excludedRoute) + ) ) - } + ) + + // Add path prefix + allPages = allPages.map(page => { + page.path = (pathPrefix + page.path).replace(/^\/\//g, `/`) + return page + }) + + // siteUrl Validation - // remove trailing slash of siteUrl - r.data.site.siteMetadata.siteUrl = withoutTrailingSlash( - r.data.site.siteMetadata.siteUrl + let siteUrl = resolveSiteUrl(data) + + if (!siteUrl || siteUrl.trim().length == 0) { + throw new Error( + `SiteMetaData 'siteUrl' property is required and cannot be left empty. Check out the documentation to see a working example: https://www.gatsbyjs.org/packages/gatsby-plugin-sitemap/#how-to-use` ) + } - return r.data - }) + // remove trailing slash of siteUrl + siteUrl = withoutTrailingSlash(siteUrl) + + return { + allSitePage: { + [originalType]: + originalType === `nodes` + ? allPages + : allPages.map(page => { + return { node: page } + }), + }, + site: { siteMetadata: { siteUrl } }, + } +} export const defaultOptions = { query: ` @@ -74,12 +90,31 @@ export const defaultOptions = { `/offline-plugin-app-shell-fallback`, ], createLinkInHead: true, - serialize: ({ site, allSitePage }) => - allSitePage.edges.map(edge => { + serialize: ({ site, allSitePage }) => { + const { allPages } = getNodes(allSitePage) + return allPages?.map(page => { return { - url: site.siteMetadata.siteUrl + edge.node.path, + url: `${site.siteMetadata?.siteUrl ?? ``}${page.path}`, changefreq: `daily`, priority: 0.7, } - }), + }) + }, + resolveSiteUrl: data => data.site.siteMetadata.siteUrl, +} + +function getNodes(results) { + if (`nodes` in results) { + return { allPages: results.nodes, originalType: `nodes` } + } + + if (`edges` in results) { + return { + allPages: results?.edges?.map(edge => edge.node), + originalType: `edges`, + } + } + throw new Error( + `[gatsby-plugin-sitemap]: Plugin is unsure how to handle the results of your query, you'll need to write custom page filter and serilizer in your gatsby conig` + ) }