Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add createSitemapItems hook #10083

Merged
merged 8 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,37 @@ describe('createSitemap', () => {
expect(sitemap).not.toContain('/tags');
});

it('excludes items that createSitemapItems configures to be ignored', async () => {
const sitemap = await createSitemap({
siteConfig,
routes: routes([
'/',
'/search/',
'/tags/',
'/search/foo',
'/tags/foo/bar',
]),
head: {},
options: {
...options,
createSitemapItems: async (params) => {
const {defaultCreateSitemapItems, ...rest} = params;
const sitemapItems = await defaultCreateSitemapItems(rest);
const sitemapsWithoutPageAndTags = sitemapItems.filter(
(sitemapItem) =>
!sitemapItem.url.includes('/tags/') &&
!sitemapItem.url.endsWith('/search/'),
);
return sitemapsWithoutPageAndTags;
},
},
});

expect(sitemap).not.toContain('/search/</loc>');
expect(sitemap).toContain('/search/foo');
expect(sitemap).not.toContain('/tags');
});

it('keep trailing slash unchanged', async () => {
const sitemap = await createSitemap({
siteConfig,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,44 @@ describe('validateOptions', () => {
);
});
});

describe('createSitemapItems', () => {
it('accept createSitemapItems undefined', () => {
const userOptions: Options = {
createSitemapItems: undefined,
};
expect(testValidate(userOptions)).toEqual(defaultOptions);
});

it('accept createSitemapItems valid', () => {
const userOptions: Options = {
createSitemapItems: async (params) => {
const {defaultCreateSitemapItems, ...rest} = params;
const sitemapItems = await defaultCreateSitemapItems(rest);
const sitemapsWithoutPageAndTags = sitemapItems.filter(
(sitemapItem) =>
!sitemapItem.url.includes('/tags/') &&
!sitemapItem.url.includes('/page/'),
);
return sitemapsWithoutPageAndTags;
},
};
expect(testValidate(userOptions)).toEqual({
...defaultOptions,
...userOptions,
});
});

it('rejects createSitemapItems bad input type', () => {
const userOptions: Options = {
// @ts-expect-error: test
createSitemapItems: 'not a function',
};
expect(() =>
testValidate(userOptions),
).toThrowErrorMatchingInlineSnapshot(
`""createSitemapItems" must be of type function"`,
);
});
});
});
37 changes: 20 additions & 17 deletions packages/docusaurus-plugin-sitemap/src/createSitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,9 @@ import type {ReactElement} from 'react';
import {createMatcher, flattenRoutes} from '@docusaurus/utils';
import {sitemapItemsToXmlString} from './xml';
import {createSitemapItem} from './createSitemapItem';
import type {SitemapItem} from './types';
import type {DocusaurusConfig, RouteConfig} from '@docusaurus/types';
import type {SitemapItem, DefaultCreateSitemapParams} from './types';
import type {RouteConfig} from '@docusaurus/types';
import type {HelmetServerState} from 'react-helmet-async';
import type {PluginOptions} from './options';

type CreateSitemapParams = {
siteConfig: DocusaurusConfig;
routes: RouteConfig[];
head: {[location: string]: HelmetServerState};
options: PluginOptions;
};

// Maybe we want to add a routeConfig.metadata.noIndex instead?
// But using Helmet is more reliable for third-party plugins...
Expand Down Expand Up @@ -61,7 +53,7 @@ function isNoIndexMetaRoute({
// - parent routes, used for layouts
// - routes matching options.ignorePatterns
// - routes with no index metadata
function getSitemapRoutes({routes, head, options}: CreateSitemapParams) {
function getSitemapRoutes({routes, head, options}: DefaultCreateSitemapParams) {
const {ignorePatterns} = options;

const ignoreMatcher = createMatcher(ignorePatterns);
Expand All @@ -75,8 +67,8 @@ function getSitemapRoutes({routes, head, options}: CreateSitemapParams) {
return flattenRoutes(routes).filter((route) => !isRouteExcluded(route));
}

async function createSitemapItems(
params: CreateSitemapParams,
async function defaultCreateSitemapItems(
params: DefaultCreateSitemapParams,
): Promise<SitemapItem[]> {
const sitemapRoutes = getSitemapRoutes(params);
if (sitemapRoutes.length === 0) {
Expand All @@ -94,13 +86,24 @@ async function createSitemapItems(
}

export default async function createSitemap(
params: CreateSitemapParams,
params: DefaultCreateSitemapParams,
): Promise<string | null> {
const items = await createSitemapItems(params);
if (items.length === 0) {
const {head, options, routes, siteConfig} = params;
const createSitemapItems =
params.options.createSitemapItems ?? defaultCreateSitemapItems;

const sitemapItems = await createSitemapItems({
head,
options,
routes,
siteConfig,
defaultCreateSitemapItems,
});
if (sitemapItems.length === 0) {
return null;
}
const xmlString = await sitemapItemsToXmlString(items, {

const xmlString = await sitemapItemsToXmlString(sitemapItems, {
lastmod: params.options.lastmod,
});
return xmlString;
Expand Down
22 changes: 21 additions & 1 deletion packages/docusaurus-plugin-sitemap/src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
import {Joi} from '@docusaurus/utils-validation';
import {ChangeFreqList, LastModOptionList} from './types';
import type {OptionValidationContext} from '@docusaurus/types';
import type {ChangeFreq, LastModOption} from './types';
import type {
ChangeFreq,
DefaultCreateSitemapParams,
LastModOption,
SitemapItem,
} from './types';

export type PluginOptions = {
/**
Expand Down Expand Up @@ -44,6 +49,19 @@ export type PluginOptions = {
* @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions
*/
priority: number | null;

/** Allow control over the construction of SitemapItems */
createSitemapItems?: CreateSitemapItemsFn;
};

type CreateSitemapItemsFn = (
params: CreateSitemapItemsParams,
) => Promise<SitemapItem[]>;

type CreateSitemapItemsParams = DefaultCreateSitemapParams & {
defaultCreateSitemapItems: (
params: DefaultCreateSitemapParams,
) => Promise<SitemapItem[]>;
};

export type Options = Partial<PluginOptions>;
Expand Down Expand Up @@ -90,6 +108,8 @@ const PluginOptionSchema = Joi.object<PluginOptions>({
.valid(null, ...LastModOptionList)
.default(DEFAULT_OPTIONS.lastmod),

createSitemapItems: Joi.function(),

ignorePatterns: Joi.array()
.items(Joi.string())
.default(DEFAULT_OPTIONS.ignorePatterns),
Expand Down
11 changes: 11 additions & 0 deletions packages/docusaurus-plugin-sitemap/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
* LICENSE file in the root directory of this source tree.
*/

import type {DocusaurusConfig, RouteConfig} from '@docusaurus/types';
import type {HelmetServerState} from 'react-helmet-async';
import type {PluginOptions} from './options';

export const LastModOptionList = ['date', 'datetime'] as const;

export type LastModOption = (typeof LastModOptionList)[number];
Expand Down Expand Up @@ -65,3 +69,10 @@ export type SitemapItem = {
*/
priority?: number | null;
};

export type DefaultCreateSitemapParams = {
siteConfig: DocusaurusConfig;
routes: RouteConfig[];
head: {[location: string]: HelmetServerState};
options: PluginOptions;
};
22 changes: 22 additions & 0 deletions website/docs/api/plugins/plugin-sitemap.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,25 @@ Accepted fields:
| `priority` | `number \| null` | `0.5` | See [sitemap docs](https://www.sitemaps.org/protocol.html#xmlTagDefinitions) |
| `ignorePatterns` | `string[]` | `[]` | A list of glob patterns; matching route paths will be filtered from the sitemap. Note that you may need to include the base URL in here. |
| `filename` | `string` | `sitemap.xml` | The path to the created sitemap file, relative to the output directory. Useful if you have two plugin instances outputting two files. |
| `createSitemapItems` | <code>[CreateSitemapItemsFn](#CreateSitemapItemsFn) \| undefined</code> | `undefined` | An optional function which can be used to transform and / or filter the items in the sitemap. |

```mdx-code-block
</APITable>
```

### Types {#types}

#### `CreateSitemapItemsFn` {#CreateSitemapItemsFn}

```ts
type CreateSitemapItemsFn = (params: {
siteConfig: DocusaurusConfig;
routes: RouteConfig[];
head: {[location: string]: HelmetServerState};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know postBuild already receives this, but IMHO this was a mistake and we shouldn't expose this "messy" data structure as part of our public API.

Unless it's really needed I'd prefer to remove it for now, or provide it with a clear Docusaurus public API.

Also React 19 has metadata apis now, so not sure it's a good time to provide such API. We can introduce it later once things get clearer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason the API is this shape is because it is modelled on the createFeedItems API which allows the caller to call the underlying default implementation and provides the necessary inputs to do so. We could have a different style of API here which doesn't work that way. Instead, perhaps, we could expose a "default" implementation which wraps the underlying dependencies of the actual implementation and just exposes a parameterless function that could be called. That would likely feel cleaner.

What do you think? The downside of this is inconsistency. The createFeedItems and createSitemapItems APIs will be similarly named but slightly different to use from one another. Not necessarily a big issue - but worth considering

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we talk about the same thing. What annoys me most is this {[location: string]: HelmetServerState};

And annoying code like this:

// Maybe we want to add a routeConfig.metadata.noIndex instead?
// But using Helmet is more reliable for third-party plugins...
function isNoIndexMetaRoute({
  head,
  route,
}: {
  head: {[location: string]: HelmetServerState};
  route: string;
}) {
  const isNoIndexMetaTag = ({
    name,
    content,
  }: {
    name?: string;
    content?: string;
  }): boolean => {
    if (!name || !content) {
      return false;
    }
    return (
      // meta name is not case-sensitive
      name.toLowerCase() === 'robots' &&
      // Robots directives are not case-sensitive
      content.toLowerCase().includes('noindex')
    );
  };

  // https://github.com/staylor/react-helmet-async/pull/167
  const meta = head[route]?.meta.toComponent() as unknown as
    | ReactElement<{name?: string; content?: string}>[]
    | undefined;
  return meta?.some((tag) =>
    isNoIndexMetaTag({name: tag.props.name, content: tag.props.content}),
  );
}

I'd prefer if we didn't expose HelmetServerState to the user. I don't think createFeedItems exposes it so far.

options: PluginOptions;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wonder if this one is really useful

I know createFeedItem takes the config (which IMHO is already not super useful), but it doesn't take the option so maybe we shouldn't add this here?

}) => Promise<SitemapItem[]>;
```

:::info

This plugin also respects some site config:
Expand Down Expand Up @@ -86,6 +100,14 @@ const config = {
priority: 0.5,
ignorePatterns: ['/tags/**'],
filename: 'sitemap.xml',
createSitemapItems: async (params) => {
const {defaultCreateSitemapItems, ...rest} = params;
const sitemapItems = await defaultCreateSitemapItems(rest);
const sitemapsWithoutPage = sitemapItems.filter(
(sitemapItem) => !sitemapItem.url.includes('/page/'),
);
return sitemapsWithoutPage;
},
};
```

Expand Down
10 changes: 10 additions & 0 deletions website/docusaurus.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,16 @@ export default async function createConfigAsync() {
lastmod: 'date',
priority: null,
changefreq: null,
createSitemapItems: async (params) => {
const {defaultCreateSitemapItems, ...rest} = params;
const sitemapItems = await defaultCreateSitemapItems(rest);
const sitemapsWithoutPageAndTags = sitemapItems.filter(
(sitemapItem) =>
!sitemapItem.url.includes('/tags/') &&
!sitemapItem.url.includes('/page/'),
);
return sitemapsWithoutPageAndTags;
},
},
} satisfies Preset.Options,
],
Expand Down