Skip to content

Commit

Permalink
feat(gatsby-plugin-sitemap): handle different query structures and al…
Browse files Browse the repository at this point in the history
…low custom siteUrl resolution (#21948)

* fix: handle allSitePage.nodes query stil. feat: allow custom resolver for siteUrl

* docs(plugin-sitemap): add doces for updates

* Update packages/gatsby-plugin-sitemap/README.md

Co-Authored-By: LB <laurie@gatsbyjs.com>

Co-authored-by: LB <laurie@gatsbyjs.com>
  • Loading branch information
moonmeister and LB committed Mar 12, 2020
1 parent 306cadd commit 07319d0
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 61 deletions.
21 changes: 12 additions & 9 deletions packages/gatsby-plugin-sitemap/README.md
Expand Up @@ -27,11 +27,12 @@ The `defaultOptions` [here](https://github.com/gatsbyjs/gatsby/blob/master/packa

The options are as follows:

- `query` (GraphQL Query) The query for the data you need to generate the sitemap. It's required to get the `site.siteMetadata.siteUrl`. If you override the query, you probably will need to set a `serializer` to return the correct data for the sitemap.
- `query` (GraphQL Query) The query for the data you need to generate the sitemap. It's required to get the site's URL, if you are not fetching it from `site.siteMetadata.siteUrl`, you will need to set a custom `resolveSiteUrl` function. If you override the query, you probably will also need to set a `serializer` to return the correct data for the sitemap. Due to how this plugin was built it is currently expected/required to fetch the page paths from `allSitePage`, but you may use the `allSitePage.edges.node` or `allSitePage.nodes` query structure.
- `output` (string) The filepath and name. Defaults to `/sitemap.xml`.
- `exclude` (array of strings) An array of paths to exclude from the sitemap.
- `createLinkInHead` (boolean) Whether to populate the `<head>` of your site with a link to the sitemap.
- `serialize` (function) Takes the output of the data query and lets you return an array of sitemap entries.
- `resolveSiteUrl` (function) Takes the output of the data query and lets you return the site URL.

We _ALWAYS_ exclude the following pages: `/dev-404-page`,`/404` &`/offline-plugin-app-shell-fallback`, this cannot be changed.

Expand All @@ -53,24 +54,26 @@ plugins: [
exclude: [`/category/*`, `/path/to/page`],
query: `
{
site {
siteMetadata {
wp {
generalSettings {
siteUrl
}
}
allSitePage {
edges {
node {
path
}
node {
path
}
}
}`,
resolveSiteUrl: ({site, allSitePage}) => {
//Alternativly, you may also pass in an environment variable (or any location) at the beginning of your `gatsby-config.js`.
return site.wp.generalSettings.siteUrl
},
serialize: ({ site, allSitePage }) =>
allSitePage.edges.map(edge => {
allSitePage.nodes.map(node => {
return {
url: site.siteMetadata.siteUrl + edge.node.path,
url: `${site.wp.generalSettings.siteUrl}${node.path}`,
changefreq: `daily`,
priority: 0.7,
}
Expand Down
74 changes: 64 additions & 10 deletions packages/gatsby-plugin-sitemap/src/__tests__/internals.js
@@ -1,12 +1,16 @@
const {
runQuery,
filterQuery,
defaultOptions: { serialize },
} = require(`../internals`)

beforeEach(() => {
global.__PATH_PREFIX__ = ``
})

const verifyUrlsExistInResults = (results, urls) => {
expect(results.map(result => result.url)).toEqual(urls)
}

describe(`results using default settings`, () => {
const generateQueryResultsMock = (
{ siteUrl } = { siteUrl: `http://dummy.url` }
Expand Down Expand Up @@ -36,18 +40,15 @@ describe(`results using default settings`, () => {
}
}

const verifyUrlsExistInResults = (results, urls) => {
expect(results.map(result => result.url)).toEqual(urls)
}

const runTests = (pathPrefix = ``) => {
beforeEach(() => {
global.__PATH_PREFIX__ = pathPrefix
})

it(`prepares all urls correctly`, async () => {
const graphql = () => Promise.resolve(generateQueryResultsMock())
const queryRecords = await runQuery(graphql, ``, [], pathPrefix)
const results = await graphql(``)
const queryRecords = filterQuery(results, [], pathPrefix)
const urls = serialize(queryRecords)

verifyUrlsExistInResults(urls, [
Expand All @@ -61,7 +62,9 @@ describe(`results using default settings`, () => {
Promise.resolve(
generateQueryResultsMock({ siteUrl: `http://dummy.url/` })
)
const queryRecords = await runQuery(graphql, ``, [], pathPrefix)

const data = await graphql(``)
const queryRecords = filterQuery(data, [], pathPrefix)
const urls = serialize(queryRecords)

verifyUrlsExistInResults(urls, [
Expand All @@ -72,15 +75,17 @@ describe(`results using default settings`, () => {

it(`excludes pages without trailing slash`, async () => {
const graphql = () => Promise.resolve(generateQueryResultsMock())
const queryRecords = await runQuery(graphql, ``, [`/page-2`], pathPrefix)
const data = await graphql(``)
const queryRecords = filterQuery(data, [`/page-2`], pathPrefix)
const urls = serialize(queryRecords)

verifyUrlsExistInResults(urls, [`http://dummy.url${pathPrefix}/page-1`])
})

it(`excludes pages with trailing slash`, async () => {
const graphql = () => Promise.resolve(generateQueryResultsMock())
const queryRecords = await runQuery(graphql, ``, [`/page-2/`], pathPrefix)
const data = await graphql(``)
const queryRecords = filterQuery(data, [`/page-2/`], pathPrefix)
const urls = serialize(queryRecords)

verifyUrlsExistInResults(urls, [`http://dummy.url${pathPrefix}/page-1`])
Expand All @@ -92,7 +97,8 @@ describe(`results using default settings`, () => {
expect.assertions(1)

try {
await runQuery(graphql, ``, [], pathPrefix)
const data = await graphql(``)
filterQuery(data, [], pathPrefix)
} catch (err) {
expect(err.message).toEqual(
expect.stringContaining(`SiteMetaData 'siteUrl' property is required`)
Expand All @@ -109,3 +115,51 @@ describe(`results using default settings`, () => {
runTests(`/path-prefix`)
})
})

describe(`results using non default alternatives`, () => {
const generateQueryResultsMockNodes = (
{ siteUrl } = { siteUrl: `http://dummy.url` }
) => {
return {
data: {
site: {
siteMetadata: {
siteUrl: siteUrl,
},
},
allSitePage: {
nodes: [
{
path: `/page-1`,
},
{
path: `/page-2`,
},
],
},
},
}
}

it(`handles allSitePage.nodes type query properly`, async () => {
const graphql = () => Promise.resolve(generateQueryResultsMockNodes())
const results = await graphql(``)
const queryRecords = filterQuery(results, [], ``)
const urls = serialize(queryRecords)

verifyUrlsExistInResults(urls, [
`http://dummy.url/page-1`,
`http://dummy.url/page-2`,
])
})

it(`handles custom siteUrl Resolver Properly type query properly`, async () => {
const customUrl = `https://another.dummy.url`
const customSiteResolver = () => customUrl
const graphql = () => Promise.resolve(generateQueryResultsMockNodes())
const results = await graphql(``)
const queryRecords = filterQuery(results, [], ``, customSiteResolver)

expect(queryRecords.site.siteMetadata.siteUrl).toEqual(customUrl)
})
})
25 changes: 20 additions & 5 deletions packages/gatsby-plugin-sitemap/src/gatsby-node.js
Expand Up @@ -2,7 +2,7 @@ import path from "path"
import sitemap from "sitemap"
import {
defaultOptions,
runQuery,
filterQuery,
writeFile,
renameFile,
withoutTrailingSlash,
Expand All @@ -18,7 +18,15 @@ exports.onPostBuild = async (
delete options.plugins
delete options.createLinkInHead

const { query, serialize, output, exclude, hostname, ...rest } = {
const {
query,
serialize,
output,
exclude,
hostname,
resolveSiteUrl,
...rest
} = {
...defaultOptions,
...options,
}
Expand All @@ -28,8 +36,15 @@ exports.onPostBuild = async (
// Paths we're excluding...
const excludeOptions = exclude.concat(defaultOptions.exclude)

const queryRecords = await runQuery(graphql, query, excludeOptions, basePath)
const urls = serialize(queryRecords)
const queryRecords = await graphql(query)

const filteredRecords = filterQuery(
queryRecords,
excludeOptions,
basePath,
resolveSiteUrl
)
const urls = serialize(filteredRecords)

if (!rest.sitemapSize || urls.length <= rest.sitemapSize) {
const map = sitemap.createSitemap(rest)
Expand All @@ -41,7 +56,7 @@ exports.onPostBuild = async (
site: {
siteMetadata: { siteUrl },
},
} = queryRecords
} = filteredRecords
return new Promise(resolve => {
// sitemap-index.xml is default file name. (https://git.io/fhNgG)
const indexFilePath = path.join(
Expand Down
109 changes: 72 additions & 37 deletions packages/gatsby-plugin-sitemap/src/internals.js
Expand Up @@ -8,46 +8,62 @@ export const withoutTrailingSlash = path =>
export const writeFile = pify(fs.writeFile)
export const renameFile = pify(fs.rename)

export const runQuery = (handler, query, excludes, pathPrefix) =>
handler(query).then(r => {
if (r.errors) {
throw new Error(r.errors.join(`, `))
}
export function filterQuery(
results,
excludes,
pathPrefix,
resolveSiteUrl = defaultOptions.resolveSiteUrl
) {
const { errors, data } = results

// Removing excluded paths
r.data.allSitePage.edges = r.data.allSitePage.edges.filter(
page =>
!excludes.some(excludedRoute =>
minimatch(
withoutTrailingSlash(page.node.path),
withoutTrailingSlash(excludedRoute)
)
)
)
if (errors) {
throw new Error(errors.join(`, `))
}

// Add path prefix
r.data.allSitePage.edges = r.data.allSitePage.edges.map(page => {
page.node.path = (pathPrefix + page.node.path).replace(/^\/\//g, `/`)
return page
})
let { allPages, originalType } = getNodes(data.allSitePage)

// siteUrl Validation
if (
!r.data.site.siteMetadata.siteUrl ||
r.data.site.siteMetadata.siteUrl.trim().length == 0
) {
throw new Error(
`SiteMetaData 'siteUrl' property is required and cannot be left empty. Check out the documentation to see a working example: https://www.gatsbyjs.org/packages/gatsby-plugin-sitemap/#how-to-use`
// Removing excluded paths
allPages = allPages.filter(
page =>
!excludes.some(excludedRoute =>
minimatch(
withoutTrailingSlash(page.path),
withoutTrailingSlash(excludedRoute)
)
)
}
)

// Add path prefix
allPages = allPages.map(page => {
page.path = (pathPrefix + page.path).replace(/^\/\//g, `/`)
return page
})

// siteUrl Validation

// remove trailing slash of siteUrl
r.data.site.siteMetadata.siteUrl = withoutTrailingSlash(
r.data.site.siteMetadata.siteUrl
let siteUrl = resolveSiteUrl(data)

if (!siteUrl || siteUrl.trim().length == 0) {
throw new Error(
`SiteMetaData 'siteUrl' property is required and cannot be left empty. Check out the documentation to see a working example: https://www.gatsbyjs.org/packages/gatsby-plugin-sitemap/#how-to-use`
)
}

return r.data
})
// remove trailing slash of siteUrl
siteUrl = withoutTrailingSlash(siteUrl)

return {
allSitePage: {
[originalType]:
originalType === `nodes`
? allPages
: allPages.map(page => {
return { node: page }
}),
},
site: { siteMetadata: { siteUrl } },
}
}

export const defaultOptions = {
query: `
Expand All @@ -74,12 +90,31 @@ export const defaultOptions = {
`/offline-plugin-app-shell-fallback`,
],
createLinkInHead: true,
serialize: ({ site, allSitePage }) =>
allSitePage.edges.map(edge => {
serialize: ({ site, allSitePage }) => {
const { allPages } = getNodes(allSitePage)
return allPages?.map(page => {
return {
url: site.siteMetadata.siteUrl + edge.node.path,
url: `${site.siteMetadata?.siteUrl ?? ``}${page.path}`,
changefreq: `daily`,
priority: 0.7,
}
}),
})
},
resolveSiteUrl: data => data.site.siteMetadata.siteUrl,
}

function getNodes(results) {
if (`nodes` in results) {
return { allPages: results.nodes, originalType: `nodes` }
}

if (`edges` in results) {
return {
allPages: results?.edges?.map(edge => edge.node),
originalType: `edges`,
}
}
throw new Error(
`[gatsby-plugin-sitemap]: Plugin is unsure how to handle the results of your query, you'll need to write custom page filter and serilizer in your gatsby conig`
)
}

0 comments on commit 07319d0

Please sign in to comment.