From a0b31bcaaa04db8ff0675b3653c841bf47c67814 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Thu, 22 Apr 2021 10:48:02 +0200 Subject: [PATCH] fix(gatsby): persist pages between runs (#28590) * fix(gatsby): add pages to saved redux state * chore: update test snapshots * unskip context change scenario for data tracking tests * mock Date.now so persistance tests don't rely on actual time (needed for updatedAt and snapshot testing) * refactor persistance tests a bit to allow for creating different pages per test scenario * shard pages state (on top of existing sharding for nodes) * drop page count check (there actually might be valid cases for 0 pages) * only show page context size warning for pages (and not nodes) * fix lint * adjust snapshot * test(artifacts): add case for changing page context * fix(gatsby): garbage collect stateful pages (#28760) * add stateful page to artifacts tests * fix(gatsby): garbage collect stateful pages * Revert "fix: clear tracked queries when deleting stale page-data files (#29431)" (#30848) This reverts commit 478cf68f0077d95d5e46a235cf235e01fe4eaddc. Co-authored-by: Reda Bacha --- .../artifacts/__tests__/index.js | 38 ++ integration-tests/artifacts/gatsby-node.js | 20 ++ packages/gatsby/src/bootstrap/index.ts | 8 +- .../src/query/__tests__/data-tracking.js | 2 +- .../__tests__/__snapshots__/index.js.snap | 17 + packages/gatsby/src/redux/__tests__/index.js | 337 +++++++++++++++++- packages/gatsby/src/redux/index.ts | 1 + packages/gatsby/src/redux/persist.ts | 61 +++- packages/gatsby/src/redux/reducers/queries.ts | 14 - packages/gatsby/src/redux/types.ts | 9 +- .../src/services/__tests__/create-pages.ts | 223 ++++++++++++ .../src/services/create-pages-statefully.ts | 32 -- packages/gatsby/src/services/create-pages.ts | 31 +- packages/gatsby/src/services/index.ts | 1 - .../src/state-machines/data-layer/index.ts | 11 +- .../src/state-machines/data-layer/services.ts | 2 - .../src/state-machines/data-layer/types.ts | 1 + .../src/state-machines/develop/index.ts | 9 +- packages/gatsby/src/utils/changed-pages.ts | 6 +- packages/gatsby/src/utils/page-data.ts | 12 +- 20 files changed, 718 insertions(+), 117 deletions(-) create mode 100644 packages/gatsby/src/services/__tests__/create-pages.ts delete mode 100644 packages/gatsby/src/services/create-pages-statefully.ts diff --git a/integration-tests/artifacts/__tests__/index.js b/integration-tests/artifacts/__tests__/index.js index 7ffb2a2e807f8..866f605e4fc67 100644 --- a/integration-tests/artifacts/__tests__/index.js +++ b/integration-tests/artifacts/__tests__/index.js @@ -230,6 +230,40 @@ function assertHTMLCorrectness(runNumber) { }) }) }) + + describe(`/changing-context/`, () => { + let pageDataContent + let htmlContent + beforeAll(() => { + pageDataContent = fs.readJsonSync( + path.join( + process.cwd(), + `public`, + `page-data`, + `changing-context`, + `page-data.json` + ) + ) + + htmlContent = fs.readFileSync( + path.join(process.cwd(), `public`, `changing-context`, `index.html`), + `utf-8` + ) + }) + + it(`html is correctly generated using fresh page context`, () => { + // remove from html content string as that's impl details of react ssr + expect(htmlContent.replace(//g, ``)).toContain( + `Dummy page for runNumber: ${runNumber}` + ) + }) + + it(`page-data is correctly generated using fresh page context`, () => { + expect(pageDataContent.result.pageContext).toEqual({ + dummyId: `runNumber: ${runNumber}`, + }) + }) + }) } function assertNodeCorrectness(runNumber) { @@ -486,6 +520,7 @@ describe(`Second run (different pages created, data changed)`, () => { `/static-query-result-tracking/should-invalidate/`, `/page-query-template-change/`, `/stale-pages/sometimes-i-have-trailing-slash-sometimes-i-dont/`, + `/changing-context/`, ] const expectedPagesToRemainFromPreviousBuild = [ @@ -495,6 +530,7 @@ describe(`Second run (different pages created, data changed)`, () => { `/static-query-result-tracking/stable/`, `/static-query-result-tracking/rerun-query-but-dont-recreate-html/`, `/page-that-will-have-trailing-slash-removed`, + `/stateful-page-not-recreated-in-third-run/`, ] const expectedPages = [ @@ -579,6 +615,7 @@ describe(`Third run (js change, all pages are recreated)`, () => { `/stale-pages/only-in-first/`, `/page-query-dynamic-1/`, `/page-query-dynamic-2/`, + `/stateful-page-not-recreated-in-third-run/`, ] let changedFileOriginalContent @@ -664,6 +701,7 @@ describe(`Fourth run (gatsby-browser change - cache get invalidated)`, () => { const expectedPages = [ `/stale-pages/only-not-in-first`, `/page-query-dynamic-4/`, + `/stateful-page-not-recreated-in-third-run/`, ] const unexpectedPages = [ diff --git a/integration-tests/artifacts/gatsby-node.js b/integration-tests/artifacts/gatsby-node.js index 415f2f0cbf5e6..824bfa8f95deb 100644 --- a/integration-tests/artifacts/gatsby-node.js +++ b/integration-tests/artifacts/gatsby-node.js @@ -164,6 +164,14 @@ exports.createPages = async ({ actions, graphql }) => { }` ) + actions.createPage({ + path: `/changing-context/`, + component: require.resolve(`./src/templates/dummy`), + context: { + dummyId: `runNumber: ${runNumber}`, + }, + }) + const { data } = await graphql(` { allDepPageQuery { @@ -189,6 +197,18 @@ exports.createPages = async ({ actions, graphql }) => { } } +exports.createPagesStatefully = async ({ actions }) => { + if (runNumber !== 3) { + actions.createPage({ + path: `/stateful-page-not-recreated-in-third-run/`, + component: require.resolve(`./src/templates/dummy`), + context: { + dummyId: `stateful-page`, + }, + }) + } +} + exports.onPreBuild = () => { console.log(`[test] onPreBuild`) changedBrowserCompilationHash = `not-changed` diff --git a/packages/gatsby/src/bootstrap/index.ts b/packages/gatsby/src/bootstrap/index.ts index 3773e160d15de..9853cbf25ea6c 100644 --- a/packages/gatsby/src/bootstrap/index.ts +++ b/packages/gatsby/src/bootstrap/index.ts @@ -6,7 +6,6 @@ import { sourceNodes, buildSchema, createPages, - createPagesStatefully, extractQueries, writeOutRedirects, postBootstrap, @@ -32,9 +31,12 @@ export async function bootstrap( const parentSpan = tracer.startSpan(`bootstrap`, spanArgs) - const bootstrapContext: IBuildContext = { + const bootstrapContext: IBuildContext & { + shouldRunCreatePagesStatefully: boolean + } = { ...initialContext, parentSpan, + shouldRunCreatePagesStatefully: true, } const context = { @@ -54,8 +56,6 @@ export async function bootstrap( await createPages(context) - await createPagesStatefully(context) - await handleStalePageData() await rebuildSchemaWithSitePage(context) diff --git a/packages/gatsby/src/query/__tests__/data-tracking.js b/packages/gatsby/src/query/__tests__/data-tracking.js index 3bc4f364cec92..ea162cbf12888 100644 --- a/packages/gatsby/src/query/__tests__/data-tracking.js +++ b/packages/gatsby/src/query/__tests__/data-tracking.js @@ -879,7 +879,7 @@ describe(`query caching between builds`, () => { }, 99999) }) - describe.skip(`Changing page context invalidates page queries`, () => { + describe(`Changing page context invalidates page queries`, () => { beforeAll(() => { let pageChangeCounter = 1 let nodeChangeCounter = 1 diff --git a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap index 883da4c9ec1e0..38b78ae034cf9 100644 --- a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap +++ b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap @@ -62,6 +62,23 @@ Object { }, }, "pageDataStats": Map {}, + "pages": Map { + "/my-sweet-new-page/" => Object { + "component": "/Users/username/dev/site/src/templates/my-sweet-new-page.js", + "componentChunkName": "component---users-username-dev-site-src-templates-my-sweet-new-page-js", + "componentPath": "/Users/username/dev/site/src/templates/my-sweet-new-page.js", + "context": Object { + "id": "123456", + }, + "internalComponentName": "Component/my-sweet-new-page/", + "isCreatedByStatefulCreatePages": false, + "matchPath": undefined, + "path": "/my-sweet-new-page/", + "pluginCreatorId": "", + "pluginCreator___NODE": "", + "updatedAt": 1, + }, + }, "pendingPageDataWrites": Object { "pagePaths": Set {}, }, diff --git a/packages/gatsby/src/redux/__tests__/index.js b/packages/gatsby/src/redux/__tests__/index.js index c52a49f77dff4..603ad139cd26e 100644 --- a/packages/gatsby/src/redux/__tests__/index.js +++ b/packages/gatsby/src/redux/__tests__/index.js @@ -1,11 +1,18 @@ const _ = require(`lodash`) const path = require(`path`) +const v8 = require(`v8`) +const reporter = require(`gatsby-cli/lib/reporter`) const writeToCache = jest.spyOn(require(`../persist`), `writeToCache`) +const v8Serialize = jest.spyOn(v8, `serialize`) +const v8Deserialize = jest.spyOn(v8, `deserialize`) +const reporterInfo = jest.spyOn(reporter, `info`).mockImplementation(jest.fn) +const reporterWarn = jest.spyOn(reporter, `warn`).mockImplementation(jest.fn) + const { saveState, store, readState } = require(`../index`) const { - actions: { createPage }, + actions: { createPage, createNode }, } = require(`../actions`) const mockWrittenContent = new Map() @@ -93,28 +100,48 @@ function getFakeNodes() { describe(`redux db`, () => { const initialComponentsState = _.cloneDeep(store.getState().components) - beforeEach(() => { + function createPages(pages) { + // mock Date.now so Date.now() doesn't change in between tests + const RealDateNow = Date.now + let DateNowCallCount = 0 + // simulate passage of time by increasing call counter (instead of actual time value) + Date.now = jest.fn(() => ++DateNowCallCount) + store.dispatch( - createPage( - { - path: `/my-sweet-new-page/`, - // seems like jest serializer doesn't play nice with Maps on Windows - component: `/Users/username/dev/site/src/templates/my-sweet-new-page.js`, - // The context is passed as props to the component as well - // as into the component's GraphQL query. - context: { - id: `123456`, - }, - }, - { name: `default-site-plugin` } + (Array.isArray(pages) ? pages : [pages]).map(page => + createPage(page, { + name: `default-site-plugin`, + }) ) ) + Date.now = RealDateNow + } + + const defaultPage = { + path: `/my-sweet-new-page/`, + // seems like jest serializer doesn't play nice with Maps on Windows + component: `/Users/username/dev/site/src/templates/my-sweet-new-page.js`, + // The context is passed as props to the component as well + // as into the component's GraphQL query. + context: { + id: `123456`, + }, + } + + beforeEach(() => { + store.dispatch({ + type: `DELETE_CACHE`, + }) writeToCache.mockClear() mockWrittenContent.clear() + reporterWarn.mockClear() + reporterInfo.mockClear() }) it(`should write redux cache to disk`, async () => { + createPages(defaultPage) + expect(initialComponentsState).toEqual(new Map()) store.getState().nodes = getFakeNodes() @@ -157,4 +184,286 @@ describe(`redux db`, () => { expect(writeToCache).not.toBeCalled() }) }) + + describe(`Sharding`, () => { + afterAll(() => { + v8Serialize.mockRestore() + v8Deserialize.mockRestore() + }) + + // we set limit to 1.5 * 1024 * 1024 * 1024 per shard + // simulating size for page and nodes will allow us to see if we create expected amount of shards + // and that we stitch them back together correctly + const nodeShardsScenarios = [ + { + numberOfNodes: 50000, + simulatedNodeObjectSize: 5 * 1024, + expectedNumberOfNodeShards: 1, + }, + { + numberOfNodes: 50, + simulatedNodeObjectSize: 5 * 1024 * 1024, + expectedNumberOfNodeShards: 1, + }, + { + numberOfNodes: 5, + simulatedNodeObjectSize: 0.6 * 1024 * 1024 * 1024, + expectedNumberOfNodeShards: 3, + }, + ] + const pageShardsScenarios = [ + { + numberOfPages: 50 * 1000, + simulatedPageObjectSize: 10 * 1024, + expectedNumberOfPageShards: 1, + expectedPageContextSizeWarning: false, + }, + { + numberOfPages: 50, + simulatedPageObjectSize: 10 * 1024 * 1024, + expectedNumberOfPageShards: 1, + expectedPageContextSizeWarning: true, + }, + { + numberOfPages: 5, + simulatedPageObjectSize: 0.9 * 1024 * 1024 * 1024, + expectedNumberOfPageShards: 5, + expectedPageContextSizeWarning: true, + }, + ] + + const scenarios = [] + for (const nodeShardsParams of nodeShardsScenarios) { + for (const pageShardsParams of pageShardsScenarios) { + scenarios.push([ + nodeShardsParams.numberOfNodes, + nodeShardsParams.simulatedNodeObjectSize, + nodeShardsParams.expectedNumberOfNodeShards, + pageShardsParams.numberOfPages, + pageShardsParams.simulatedPageObjectSize, + pageShardsParams.expectedNumberOfPageShards, + pageShardsParams.expectedPageContextSizeWarning + ? `with page context size warning` + : `without page context size warning`, + pageShardsParams.expectedPageContextSizeWarning, + ]) + } + } + + it.each(scenarios)( + `Scenario Nodes %i x %i bytes = %i shards / Pages %i x %i bytes = %i shards (%s)`, + async ( + numberOfNodes, + simulatedNodeObjectSize, + expectedNumberOfNodeShards, + numberOfPages, + simulatedPageObjectSize, + expectedNumberOfPageShards, + _expectedPageContextSizeWarningLabelForTestName, + expectedPageContextSizeWarning + ) => { + // just some baseline checking to make sure test setup is correct - check both in-memory state and persisted state + // and make sure it's empty + const initialStateInMemory = store.getState() + expect(initialStateInMemory.pages).toEqual(new Map()) + expect(initialStateInMemory.nodes).toEqual(new Map()) + + // we expect to have no persisted state yet - this returns empty object + // and let redux to use initial states for all redux slices + const initialPersistedState = readState() + expect(initialPersistedState.pages).toBeUndefined() + expect(initialPersistedState.nodes).toBeUndefined() + expect(initialPersistedState).toEqual({}) + + for (let nodeIndex = 0; nodeIndex < numberOfNodes; nodeIndex++) { + store.dispatch( + createNode( + { + id: `node-${nodeIndex}`, + context: { + objectType: `node`, + }, + internal: { + type: `Foo`, + contentDigest: `contentDigest-${nodeIndex}`, + }, + }, + { name: `gatsby-source-test` } + ) + ) + } + + createPages( + new Array(numberOfPages).fill(undefined).map((_, index) => { + return { + path: `/page-${index}/`, + component: `/Users/username/dev/site/src/templates/my-sweet-new-page.js`, + context: { + objectType: `page`, + possiblyHugeField: `let's pretend this field is huge (we will simulate that by mocking some things used to asses size of object)`, + }, + } + }) + ) + + const currentStateInMemory = store.getState() + expect(currentStateInMemory.nodes.size).toEqual(numberOfNodes) + expect(currentStateInMemory.pages.size).toEqual(numberOfPages) + + // this is just to make sure that any implementation changes in readState + // won't affect this test - so we clone current state of things and will + // use that for assertions + const clonedCurrentNodes = new Map(currentStateInMemory.nodes) + const clonedCurrentPages = new Map(currentStateInMemory.pages) + + // we expect to have no persisted state yet and that current in-memory state doesn't affect it + const persistedStateBeforeSaving = readState() + expect(persistedStateBeforeSaving.pages).toBeUndefined() + expect(persistedStateBeforeSaving.nodes).toBeUndefined() + expect(persistedStateBeforeSaving).toEqual({}) + + // simulate that nodes/pages have sizes set in scenario parameters + // it changes implementation to JSON.stringify because calling v8.serialize + // again cause max stack size errors :shrug: - this also requires adjusting + // deserialize implementation + v8Serialize.mockImplementation(obj => { + if (obj?.[1]?.context?.objectType === `node`) { + return { + toString: () => JSON.stringify(obj), + length: simulatedNodeObjectSize, + } + } else if (obj?.[1]?.context?.objectType === `page`) { + return { + toString: () => JSON.stringify(obj), + length: simulatedPageObjectSize, + } + } else { + return JSON.stringify(obj) + } + }) + v8Deserialize.mockImplementation(obj => JSON.parse(obj.toString())) + + await saveState() + + if (expectedPageContextSizeWarning) { + expect(reporterWarn).toBeCalledWith( + `The size of at least one page context chunk exceeded 500kb, which could lead to degraded performance. Consider putting less data in the page context.` + ) + } else { + expect(reporterWarn).not.toBeCalled() + } + + const shardsWritten = { + rest: 0, + node: 0, + page: 0, + } + + for (const fileWritten of mockWrittenContent.keys()) { + const basename = path.basename(fileWritten) + if (basename.startsWith(`redux.rest`)) { + shardsWritten.rest++ + } else if (basename.startsWith(`redux.node`)) { + shardsWritten.node++ + } else if (basename.startsWith(`redux.page`)) { + shardsWritten.page++ + } + } + + expect(writeToCache).toBeCalled() + + expect(shardsWritten.rest).toEqual(1) + expect(shardsWritten.node).toEqual(expectedNumberOfNodeShards) + expect(shardsWritten.page).toEqual(expectedNumberOfPageShards) + + // and finally - let's make sure that reading shards stitches it back together + // correctly + const persistedStateAfterSaving = readState() + + expect(persistedStateAfterSaving.nodes).toEqual(clonedCurrentNodes) + expect(persistedStateAfterSaving.pages).toEqual(clonedCurrentPages) + } + ) + }) + + it(`doesn't discard persisted cache if no pages`, () => { + expect(store.getState().nodes.size).toEqual(0) + expect(store.getState().pages.size).toEqual(0) + + store.dispatch( + createNode( + { + id: `node-test`, + context: { + objectType: `node`, + }, + internal: { + type: `Foo`, + contentDigest: `contentDigest-test`, + }, + }, + { name: `gatsby-source-test` } + ) + ) + + expect(store.getState().nodes.size).toEqual(1) + expect(store.getState().pages.size).toEqual(0) + + let persistedState = readState() + + expect(persistedState.nodes?.size ?? 0).toEqual(0) + expect(persistedState.pages?.size ?? 0).toEqual(0) + + saveState() + + // reset state in memory + store.dispatch({ + type: `DELETE_CACHE`, + }) + + expect(store.getState().nodes.size).toEqual(0) + expect(store.getState().pages.size).toEqual(0) + + persistedState = readState() + + expect(persistedState.nodes?.size ?? 0).toEqual(1) + expect(persistedState.pages?.size ?? 0).toEqual(0) + }) + + it(`discards persisted cache if no nodes are stored there`, () => { + expect(store.getState().nodes.size).toEqual(0) + expect(store.getState().pages.size).toEqual(0) + + createPages(defaultPage) + + expect(store.getState().nodes.size).toEqual(0) + expect(store.getState().pages.size).toEqual(1) + + let persistedState = readState() + + expect(persistedState.nodes?.size ?? 0).toEqual(0) + expect(persistedState.pages?.size ?? 0).toEqual(0) + + saveState() + + // reset state in memory + store.dispatch({ + type: `DELETE_CACHE`, + }) + + expect(store.getState().nodes.size).toEqual(0) + expect(store.getState().pages.size).toEqual(0) + + persistedState = readState() + + expect(persistedState.nodes?.size ?? 0).toEqual(0) + // we expect state to be discarded because gatsby creates it least few nodes of it's own + // (particularly `Site` node). If there was nodes read this likely means something went wrong + // and state is not consistent + expect(persistedState.pages?.size ?? 0).toEqual(0) + + expect(reporterInfo).toBeCalledWith( + `Cache exists but contains no nodes. There should be at least some nodes available so it seems the cache was corrupted. Disregarding the cache and proceeding as if there was none.` + ) + }) }) diff --git a/packages/gatsby/src/redux/index.ts b/packages/gatsby/src/redux/index.ts index 87b07109a3437..0bc736e2560d3 100644 --- a/packages/gatsby/src/redux/index.ts +++ b/packages/gatsby/src/redux/index.ts @@ -103,6 +103,7 @@ export const saveState = (): void => { staticQueryComponents: state.staticQueryComponents, webpackCompilationHash: state.webpackCompilationHash, pageDataStats: state.pageDataStats, + pages: state.pages, pendingPageDataWrites: state.pendingPageDataWrites, staticQueriesByTemplate: state.staticQueriesByTemplate, queries: state.queries, diff --git a/packages/gatsby/src/redux/persist.ts b/packages/gatsby/src/redux/persist.ts index 239437df43dde..00b37f3877f26 100644 --- a/packages/gatsby/src/redux/persist.ts +++ b/packages/gatsby/src/redux/persist.ts @@ -9,7 +9,7 @@ import { removeSync, writeFileSync, } from "fs-extra" -import { IGatsbyNode, ICachedReduxState } from "./types" +import { IGatsbyNode, ICachedReduxState, IGatsbyPage } from "./types" import { sync as globSync } from "glob" import report from "gatsby-cli/lib/reporter" @@ -23,12 +23,15 @@ function reduxSharedFile(dir: string): string { function reduxChunkedNodesFilePrefix(dir: string): string { return path.join(dir, `redux.node.state_`) } +function reduxChunkedPagesFilePrefix(dir: string): string { + return path.join(dir, `redux.page.state_`) +} export function readFromCache(): ICachedReduxState { - // The cache is stored in two steps; the nodes in chunks and the rest - // First we revive the rest, then we inject the nodes into that obj (if any) + // The cache is stored in two steps; the nodes and pages in chunks and the rest + // First we revive the rest, then we inject the nodes and pages into that obj (if any) // Each chunk is stored in its own file, this circumvents max buffer lengths - // for sites with a _lot_ of content. Since all nodes go into a Map, the order + // for sites with a _lot_ of content. Since all nodes / pages go into a Map, the order // of reading them is not relevant. const reduxCacheFolder = getReduxCacheFolder() @@ -38,13 +41,13 @@ export function readFromCache(): ICachedReduxState { ) // Note: at 1M pages, this will be 1M/chunkSize chunks (ie. 1m/10k=100) - const chunks = globSync( + const nodesChunks = globSync( reduxChunkedNodesFilePrefix(reduxCacheFolder) + `*` ).map(file => v8.deserialize(readFileSync(file))) - const nodes: Array<[string, IGatsbyNode]> = [].concat(...chunks) + const nodes: Array<[string, IGatsbyNode]> = [].concat(...nodesChunks) - if (!chunks.length) { + if (!nodesChunks.length) { report.info( `Cache exists but contains no nodes. There should be at least some nodes available so it seems the cache was corrupted. Disregarding the cache and proceeding as if there was none.` ) @@ -54,10 +57,22 @@ export function readFromCache(): ICachedReduxState { obj.nodes = new Map(nodes) + // Note: at 1M pages, this will be 1M/chunkSize chunks (ie. 1m/10k=100) + const pagesChunks = globSync( + reduxChunkedPagesFilePrefix(reduxCacheFolder) + `*` + ).map(file => v8.deserialize(readFileSync(file))) + + const pages: Array<[string, IGatsbyPage]> = [].concat(...pagesChunks) + + obj.pages = new Map(pages) + return obj } -function guessSafeChunkSize(values: Array<[string, IGatsbyNode]>): number { +export function guessSafeChunkSize( + values: Array<[string, IGatsbyNode]> | Array<[string, IGatsbyPage]>, + showMaxSizeWarning: boolean = false +): number { // Pick a few random elements and measure their size then pick a chunk size // ceiling based on the worst case. Each test takes time so there's trade-off. // This attempts to prevent small sites with very large pages from OOMing. @@ -74,7 +89,7 @@ function guessSafeChunkSize(values: Array<[string, IGatsbyNode]>): number { } // Sends a warning once if any of the chunkSizes exceeds approx 500kb limit - if (maxSize > 500000) { + if (showMaxSizeWarning && maxSize > 500000) { report.warn( `The size of at least one page context chunk exceeded 500kb, which could lead to degraded performance. Consider putting less data in the page context.` ) @@ -90,18 +105,22 @@ function prepareCacheFolder( targetDir: string, contents: ICachedReduxState ): void { - // Temporarily save the nodes and remove them from the main redux store + // Temporarily save the nodes and pages and remove them from the main redux store // This prevents an OOM when the page nodes collectively contain to much data - const map = contents.nodes + const nodesMap = contents.nodes contents.nodes = undefined + const pagesMap = contents.pages + contents.pages = undefined + writeFileSync(reduxSharedFile(targetDir), v8.serialize(contents)) // Now restore them on the redux store - contents.nodes = map + contents.nodes = nodesMap + contents.pages = pagesMap - if (map) { + if (nodesMap) { // Now store the nodes separately, chunk size determined by a heuristic - const values: Array<[string, IGatsbyNode]> = [...map.entries()] + const values: Array<[string, IGatsbyNode]> = [...nodesMap.entries()] const chunkSize = guessSafeChunkSize(values) const chunks = Math.ceil(values.length / chunkSize) @@ -112,6 +131,20 @@ function prepareCacheFolder( ) } } + + if (pagesMap) { + // Now store the nodes separately, chunk size determined by a heuristic + const values: Array<[string, IGatsbyPage]> = [...pagesMap.entries()] + const chunkSize = guessSafeChunkSize(values, true) + const chunks = Math.ceil(values.length / chunkSize) + + for (let i = 0; i < chunks; ++i) { + writeFileSync( + reduxChunkedPagesFilePrefix(targetDir) + i, + v8.serialize(values.slice(i * chunkSize, i * chunkSize + chunkSize)) + ) + } + } } function safelyRenameToBak(reduxCacheFolder: string): string { diff --git a/packages/gatsby/src/redux/reducers/queries.ts b/packages/gatsby/src/redux/reducers/queries.ts index 53434afb24088..dd4a67e1a311d 100644 --- a/packages/gatsby/src/redux/reducers/queries.ts +++ b/packages/gatsby/src/redux/reducers/queries.ts @@ -90,20 +90,6 @@ export function queriesReducer( state.deletedQueries.add(action.payload.path) return state } - case `DELETED_STALE_PAGE_DATA_FILES`: { - // this action is a hack/hot fix - // it should be removed/reverted when we start persisting pages state - for (const queryId of action.payload.pagePathsToClear) { - for (const component of state.trackedComponents.values()) { - component.pages.delete(queryId) - } - state = clearNodeDependencies(state, queryId) - state = clearConnectionDependencies(state, queryId) - state.trackedQueries.delete(queryId) - } - - return state - } case `API_FINISHED`: { if (action.payload.apiName !== `createPages`) { return state diff --git a/packages/gatsby/src/redux/types.ts b/packages/gatsby/src/redux/types.ts index 978e98f9da627..924ce703a55d4 100644 --- a/packages/gatsby/src/redux/types.ts +++ b/packages/gatsby/src/redux/types.ts @@ -311,6 +311,7 @@ export interface ICachedReduxState { staticQueryComponents: IGatsbyState["staticQueryComponents"] webpackCompilationHash: IGatsbyState["webpackCompilationHash"] pageDataStats: IGatsbyState["pageDataStats"] + pages?: IGatsbyState["pages"] staticQueriesByTemplate: IGatsbyState["staticQueriesByTemplate"] pendingPageDataWrites: IGatsbyState["pendingPageDataWrites"] queries: IGatsbyState["queries"] @@ -376,7 +377,6 @@ export type ActionsUnion = | IDisableTypeInferenceAction | ISetProgramAction | ISetProgramExtensions - | IDeletedStalePageDataFiles | IRemovedHtml | ITrackedHtmlCleanup | IGeneratedHtml @@ -829,13 +829,6 @@ interface ISetProgramExtensions { payload: Array } -interface IDeletedStalePageDataFiles { - type: `DELETED_STALE_PAGE_DATA_FILES` - payload: { - pagePathsToClear: Set - } -} - interface IRemovedHtml { type: `HTML_REMOVED` payload: string diff --git a/packages/gatsby/src/services/__tests__/create-pages.ts b/packages/gatsby/src/services/__tests__/create-pages.ts new file mode 100644 index 0000000000000..20d8087a4e0eb --- /dev/null +++ b/packages/gatsby/src/services/__tests__/create-pages.ts @@ -0,0 +1,223 @@ +import { createPages } from "../create-pages" +import { store, emitter } from "../../redux" +import { actions } from "../../redux/actions" +import apiRunnerNode from "../../utils/api-runner-node" +import * as path from "path" + +jest.mock(`../../utils/api-runner-node`) + +jest.mock(`../../utils/js-chunk-names`, () => { + return { generateComponentChunkName: (): string => `--mocked--` } +}) + +let mockAPIs = {} + +const component = path.join(process.cwd(), `wat`) + +const testPlugin = { + name: `gatsby-source-test`, + version: `1.0.0`, +} + +describe(`createPages service cleans up not recreated pages`, () => { + let RealDateNow + let DateNowCallCount = 0 + + let createPagesRun = 1 + let createPagesStatefullyRun = 1 + + let createPagesHook + let createPagesStatefullyHook + + let deletePageActions + + function onDeletePage(deletePageAction): void { + deletePageActions.push(deletePageAction) + } + + beforeAll(() => { + RealDateNow = Date.now + Date.now = jest.fn(() => ++DateNowCallCount) + apiRunnerNode.mockImplementation((apiName, opts = {}) => { + if (mockAPIs[apiName]) { + return mockAPIs[apiName]( + { + actions: Object.keys(actions).reduce((acc, actionName) => { + acc[actionName] = (...args): any => + store.dispatch(actions[actionName](...args, testPlugin, opts)) + return acc + }, {}), + }, + {} + ) + } + return undefined + }) + + createPagesHook = mockAPIs[`createPages`] = jest.fn( + ({ actions }, _pluginOptions) => { + actions.createPage({ + path: `/stateless/stable`, + component, + }) + actions.createPage({ + path: `/stateless/dynamic/${createPagesRun}`, + component, + }) + createPagesRun++ + } + ) + + createPagesStatefullyHook = mockAPIs[`createPagesStatefully`] = jest.fn( + ({ actions }, _pluginOptions) => { + actions.createPage({ + path: `/stateful/stable`, + component, + }) + actions.createPage({ + path: `/stateful/dynamic/${createPagesStatefullyRun}`, + component, + }) + createPagesStatefullyRun++ + } + ) + + emitter.on(`DELETE_PAGE`, onDeletePage) + }) + + beforeEach(() => { + createPagesRun = 1 + createPagesStatefullyRun = 1 + createPagesHook.mockClear() + createPagesStatefullyHook.mockClear() + deletePageActions = [] + + store.dispatch({ type: `DELETE_CACHE` }) + }) + + afterAll(() => { + Date.now = RealDateNow + mockAPIs = {} + emitter.off(`DELETE_PAGE`, onDeletePage) + }) + + it.each([ + [`From cold cache`, { cacheStatus: `COLD` }], + [`From warm cache`, { cacheStatus: `WARM` }], + ])(`%s`, async (_, { cacheStatus }) => { + expect(deletePageActions).toEqual([]) + expect(store.getState().pages.size).toEqual(0) + + if (cacheStatus === `WARM`) { + // add some junk + store.dispatch( + actions.createPage( + { + path: `/stateless/junk`, + component, + context: {}, + }, + testPlugin + ) + ) + store.dispatch( + actions.createPage( + { + path: `/stateful/junk`, + component, + context: {}, + }, + testPlugin, + { + traceId: `initial-createPagesStatefully`, + } + ) + ) + + expect(store.getState().pages.size).toEqual(2) + expect(Array.from(store.getState().pages.keys())).toEqual([ + `/stateless/junk`, + `/stateful/junk`, + ]) + expect( + store.getState().pages.get(`/stateless/junk`) + .isCreatedByStatefulCreatePages + ).toEqual(false) + expect( + store.getState().pages.get(`/stateful/junk`) + .isCreatedByStatefulCreatePages + ).toEqual(true) + } else { + expect(store.getState().pages.size).toEqual(0) + } + + expect(mockAPIs[`createPages`]).toHaveBeenCalledTimes(0) + expect(mockAPIs[`createPagesStatefully`]).toHaveBeenCalledTimes(0) + + await createPages({ store, shouldRunCreatePagesStatefully: true }) + + expect(mockAPIs[`createPages`]).toHaveBeenCalledTimes(1) + expect(mockAPIs[`createPagesStatefully`]).toHaveBeenCalledTimes(1) + expect(store.getState().pages.size).toEqual(4) + expect(Array.from(store.getState().pages.keys())).toEqual( + expect.arrayContaining([ + `/stateless/stable`, + `/stateless/dynamic/1`, + `/stateful/stable`, + `/stateful/dynamic/1`, + ]) + ) + + if (cacheStatus === `WARM`) { + // "junk" pages were not recreated, so we expect DELETE_PAGE action to be emitted for those + expect(deletePageActions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: `DELETE_PAGE`, + payload: expect.objectContaining({ + path: `/stateless/junk`, + }), + }), + ]) + ) + expect(deletePageActions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: `DELETE_PAGE`, + payload: expect.objectContaining({ + path: `/stateful/junk`, + }), + }), + ]) + ) + } + + await createPages({ store, shouldRunCreatePagesStatefully: false }) + + // createPagesStatefully should not be called and stateful pages should remain as they were before calling `createPages` service + expect(mockAPIs[`createPages`]).toHaveBeenCalledTimes(2) + expect(mockAPIs[`createPagesStatefully`]).toHaveBeenCalledTimes(1) + expect(store.getState().pages.size).toEqual(4) + + expect(Array.from(store.getState().pages.keys())).toEqual( + expect.arrayContaining([ + `/stateless/stable`, + `/stateless/dynamic/2`, + `/stateful/stable`, + `/stateful/dynamic/1`, + ]) + ) + + // 1st dynamic page was not recreated so we expect that we emitted DELETE_PAGE action + expect(deletePageActions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: `DELETE_PAGE`, + payload: expect.objectContaining({ + path: `/stateless/dynamic/1`, + }), + }), + ]) + ) + }) +}) diff --git a/packages/gatsby/src/services/create-pages-statefully.ts b/packages/gatsby/src/services/create-pages-statefully.ts deleted file mode 100644 index 163263006c9cb..0000000000000 --- a/packages/gatsby/src/services/create-pages-statefully.ts +++ /dev/null @@ -1,32 +0,0 @@ -import reporter from "gatsby-cli/lib/reporter" -import apiRunnerNode from "../utils/api-runner-node" -import { IDataLayerContext } from "../state-machines/data-layer/types" - -export async function createPagesStatefully({ - parentSpan, - gatsbyNodeGraphQLFunction, - deferNodeMutation, -}: Partial): Promise { - // A variant on createPages for plugins that want to - // have full control over adding/removing pages. The normal - // "createPages" API is called every time (during development) - // that data changes. - const activity = reporter.activityTimer(`createPagesStatefully`, { - parentSpan, - }) - activity.start() - await apiRunnerNode( - `createPagesStatefully`, - { - graphql: gatsbyNodeGraphQLFunction, - traceId: `initial-createPagesStatefully`, - waitForCascadingActions: true, - parentSpan: activity.span, - deferNodeMutation, - }, - { - activity, - } - ) - activity.end() -} diff --git a/packages/gatsby/src/services/create-pages.ts b/packages/gatsby/src/services/create-pages.ts index 9dec865bf8655..73bc3373abcec 100644 --- a/packages/gatsby/src/services/create-pages.ts +++ b/packages/gatsby/src/services/create-pages.ts @@ -11,6 +11,7 @@ export async function createPages({ gatsbyNodeGraphQLFunction, store, deferNodeMutation, + shouldRunCreatePagesStatefully, }: Partial): Promise<{ deletedPages: Array changedPages: Array @@ -33,6 +34,28 @@ export async function createPages({ }, { activity } ) + activity.end() + + if (shouldRunCreatePagesStatefully) { + const activity = reporter.activityTimer(`createPagesStatefully`, { + parentSpan, + }) + activity.start() + await apiRunnerNode( + `createPagesStatefully`, + { + graphql: gatsbyNodeGraphQLFunction, + traceId: `initial-createPagesStatefully`, + waitForCascadingActions: true, + parentSpan: activity.span, + deferNodeMutation, + }, + { + activity, + } + ) + activity.end() + } reporter.info( `Total nodes: ${store.getState().nodes.size}, SitePage nodes: ${ @@ -50,11 +73,13 @@ export async function createPages({ ) } - activity.end() - reporter.verbose(`Checking for deleted pages`) - const deletedPages = deleteUntouchedPages(store.getState().pages, timestamp) + const deletedPages = deleteUntouchedPages( + store.getState().pages, + timestamp, + !!shouldRunCreatePagesStatefully + ) reporter.verbose( `Deleted ${deletedPages.length} page${deletedPages.length === 1 ? `` : `s`}` diff --git a/packages/gatsby/src/services/index.ts b/packages/gatsby/src/services/index.ts index 013a89655f349..a7581f220ccb9 100644 --- a/packages/gatsby/src/services/index.ts +++ b/packages/gatsby/src/services/index.ts @@ -5,7 +5,6 @@ export { writeOutRedirects } from "./write-out-redirects" export { postBootstrap } from "./post-bootstrap" export { buildSchema } from "./build-schema" export { createPages } from "./create-pages" -export { createPagesStatefully } from "./create-pages-statefully" export { customizeSchema } from "./customize-schema" export { initialize } from "./initialize" export { sourceNodes } from "./source-nodes" diff --git a/packages/gatsby/src/state-machines/data-layer/index.ts b/packages/gatsby/src/state-machines/data-layer/index.ts index 2e1bd7adc899f..7d1f9f9929475 100644 --- a/packages/gatsby/src/state-machines/data-layer/index.ts +++ b/packages/gatsby/src/state-machines/data-layer/index.ts @@ -48,18 +48,9 @@ const initialCreatePagesStates: StatesConfig = { invoke: { id: `creating-pages`, src: `createPages`, - onDone: { - target: `creatingPagesStatefully`, - actions: `assignChangedPages`, - }, - }, - }, - creatingPagesStatefully: { - invoke: { - src: `createPagesStatefully`, - id: `creating-pages-statefully`, onDone: { target: `rebuildingSchemaWithSitePage`, + actions: `assignChangedPages`, }, }, }, diff --git a/packages/gatsby/src/state-machines/data-layer/services.ts b/packages/gatsby/src/state-machines/data-layer/services.ts index b8271a67b1107..a32125abbbc33 100644 --- a/packages/gatsby/src/state-machines/data-layer/services.ts +++ b/packages/gatsby/src/state-machines/data-layer/services.ts @@ -2,7 +2,6 @@ import { ServiceConfig } from "xstate" import { customizeSchema, createPages, - createPagesStatefully, buildSchema, sourceNodes, rebuildSchemaWithSitePage, @@ -18,7 +17,6 @@ export const dataLayerServices: Record< sourceNodes, createPages, buildSchema, - createPagesStatefully, rebuildSchemaWithSitePage, writeOutRedirectsAndWatch, } diff --git a/packages/gatsby/src/state-machines/data-layer/types.ts b/packages/gatsby/src/state-machines/data-layer/types.ts index b4d6c49ca0d81..5eec687222320 100644 --- a/packages/gatsby/src/state-machines/data-layer/types.ts +++ b/packages/gatsby/src/state-machines/data-layer/types.ts @@ -28,4 +28,5 @@ export interface IDataLayerContext { workerPool?: JestWorker pagesToBuild?: Array pagesToDelete?: Array + shouldRunCreatePagesStatefully?: boolean } diff --git a/packages/gatsby/src/state-machines/develop/index.ts b/packages/gatsby/src/state-machines/develop/index.ts index e3a1f76b9909b..96b787e769cc8 100644 --- a/packages/gatsby/src/state-machines/develop/index.ts +++ b/packages/gatsby/src/state-machines/develop/index.ts @@ -76,6 +76,7 @@ const developConfig: MachineConfig = { parentSpan, store, webhookBody, + shouldRunCreatePagesStatefully: true, deferNodeMutation: true, } }, @@ -292,6 +293,7 @@ const developConfig: MachineConfig = { webhookSourcePluginName, refresh: true, deferNodeMutation: true, + shouldRunCreatePagesStatefully: false, } }, onDone: { @@ -320,7 +322,12 @@ const developConfig: MachineConfig = { id: `recreate-pages`, src: `recreatePages`, data: ({ parentSpan, store }: IBuildContext): IDataLayerContext => { - return { parentSpan, store, deferNodeMutation: true } + return { + parentSpan, + store, + deferNodeMutation: true, + shouldRunCreatePagesStatefully: false, + } }, onDone: { actions: `assignServiceResult`, diff --git a/packages/gatsby/src/utils/changed-pages.ts b/packages/gatsby/src/utils/changed-pages.ts index b1b888953fa24..16a76dbafbe42 100644 --- a/packages/gatsby/src/utils/changed-pages.ts +++ b/packages/gatsby/src/utils/changed-pages.ts @@ -7,14 +7,16 @@ import { IGatsbyPage } from "../redux/types" export function deleteUntouchedPages( currentPages: Map, - timeBeforeApisRan: number + timeBeforeApisRan: number, + shouldRunCreatePagesStatefully: boolean ): Array { const deletedPages: Array = [] // Delete pages that weren't updated when running createPages. currentPages.forEach(page => { if ( - !page.isCreatedByStatefulCreatePages && + (shouldRunCreatePagesStatefully || + !page.isCreatedByStatefulCreatePages) && page.updatedAt < timeBeforeApisRan && page.path !== `/404.html` ) { diff --git a/packages/gatsby/src/utils/page-data.ts b/packages/gatsby/src/utils/page-data.ts index e279a423642aa..a0f3d5ba4fea5 100644 --- a/packages/gatsby/src/utils/page-data.ts +++ b/packages/gatsby/src/utils/page-data.ts @@ -259,20 +259,10 @@ export async function handleStalePageData(): Promise { }) const deletionPromises: Array> = [] - const pagePathsToClear = new Set() - for (const pageDataFilePath of pageDataFilesFromPreviousBuilds) { + pageDataFilesFromPreviousBuilds.forEach(pageDataFilePath => { if (!expectedPageDataFiles.has(pageDataFilePath)) { - const stalePageDataContent = await fs.readJson(pageDataFilePath) - pagePathsToClear.add(stalePageDataContent.path) deletionPromises.push(fs.remove(pageDataFilePath)) } - } - - store.dispatch({ - type: `DELETED_STALE_PAGE_DATA_FILES`, - payload: { - pagePathsToClear, - }, }) await Promise.all(deletionPromises)