Skip to content

Commit

Permalink
fix(gatsby-source-contentful): reduce memory usage (#37910)
Browse files Browse the repository at this point in the history
* add touch nodes optout action

* cleanup

* start adding backreference cache

* revert formatting

* remove unused variable

* remove unused var

* switch from opting out types from stale nodes to opting out entire plugin

* add no nodes warning

* fix bug where double bound actions are ignored when there are no args

* update progress mock

* get owner from plugin

* rename type

* rename types

* use redux types instead of pulling from lmdb

* remove unused line

* Update source-nodes.ts

* use CREATE_NODE action instead of adding a new type owner action

* add typeowners test

* test touchNodes and enableStatefulSourceNodes()

* fix contentful tests

* snapshot updates

* chore(changelogs): update changelogs (#37808)

* fix(deps): update starters and examples - gatsby to ^5.8.1 (#37806)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>

* fix(gatsby): Validate sub plugins options (#37804)

* fix(gatsby): validate sub plugins options of gatsby-plugin-mdx

* revert hardcoded path to subplugins

* validate subplugins if they are not under 'options.plugins' field

---------

Co-authored-by: Michal Piechowiak <misiek.piechowiak@gmail.com>

* fix(create-gatsby): Use correct name in summary message (#37809)

* actual fix

* other misc changes

* chore(release): Publish next

 - create-gatsby@3.9.0-next.1
 - gatsby-cli@5.9.0-next.1
 - gatsby-plugin-sharp@5.9.0-next.1
 - gatsby-transformer-sqip@5.9.0-next.1
 - gatsby@5.9.0-next.2

* remove testing timeout

* minimal docs

* reword

* add comment

* reportOnce instead of throwing an error

* consolidate typeOwners

* use new typesToPlugins Map keys instead of pluginsToTypes Map values

* consolidate remaining typeOwners object checks into new typeOwners reducer

* fix missing owner error

* fix type errors and incorrect plugin object reference

* maybe fix unit tests

* make SitePage nodes owned by internal-data-bridge

* add missing fields on some TOUCH_NODE actions

* skip owner checks when deleting child nodes

* sp

* Update yarn.lock

* reduce memory usage by storing minimal backreference info and not blocking the event loop

* remove coment

* remove extra logs

* add helper to explain what the setImmediate promise is for

* remove unneeded entryNode condition

* refactor createAssetNodes to use async/await

* add cached node counts by assets vs content

* refactor to fix linting failure

* fix tests

* ensure mutated existing node data is applied during updates

* restore previous logging behaviour

* fix test state

* always reset memory node count when existingNodes is empty

* revert more logging

* update test to use new createAssetNodes changes

* store count instead of huge set of id's to use less memory

* remove debugger

* make sure sys.type exists before checking

* move enableStatefulSourceNodes call to main sourceNodes fn

* use util

* fix e2e test - ContentfulTag node doesn't have sys.type

* always exclude ContentfulTag nodes from memory Map cache

* fix linting (consistent return)

---------

Co-authored-by: GatsbyJS Bot <core-team@gatsbyjs.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Taozc <41264828+Talaxy009@users.noreply.github.com>
Co-authored-by: Michal Piechowiak <misiek.piechowiak@gmail.com>
Co-authored-by: Lennart <lekoarts@gmail.com>
  • Loading branch information
6 people committed Apr 18, 2023
1 parent c1f0bbf commit 311b202
Show file tree
Hide file tree
Showing 6 changed files with 667 additions and 408 deletions.
Expand Up @@ -66,17 +66,15 @@ describe(`downloadContentfulAssets`, () => {
const assetNodes = []
for (const assetItem of fixtures) {
assetNodes.push(
...(await Promise.all(
createAssetNodes({
assetItem,
createNode,
createNodeId,
defaultLocale,
locales,
space,
pluginConfig,
})
))
...(await createAssetNodes({
assetItem,
createNode,
createNodeId,
defaultLocale,
locales,
space,
pluginConfig,
}))
)
}

Expand Down
27 changes: 24 additions & 3 deletions packages/gatsby-source-contentful/src/__tests__/gatsby-node.js
Expand Up @@ -6,6 +6,7 @@ import {
sourceNodes,
onPreInit,
} from "../gatsby-node"
import { existingNodes, is, memoryNodeCounts } from "../backreferences"
import { fetchContent, fetchContentTypes } from "../fetch"
import { makeId } from "../normalize"

Expand Down Expand Up @@ -59,7 +60,12 @@ describe(`gatsby-node`, () => {

const actions = {
createTypes: jest.fn(),
setPluginStatus: jest.fn(),
setPluginStatus: jest.fn(pluginStatusObject => {
pluginStatus = {
...pluginStatus,
...pluginStatusObject,
}
}),
createNode: jest.fn(async node => {
// similar checks as gatsby does
if (!_.isPlainObject(node)) {
Expand Down Expand Up @@ -99,9 +105,20 @@ describe(`gatsby-node`, () => {
}),
buildInterfaceType: jest.fn(),
}
let pluginStatus = {}
const resetPluginStatus = () => {
pluginStatus = {}
}
const store = {
getState: jest.fn(() => {
return { program: { directory: process.cwd() }, status: {} }
return {
program: { directory: process.cwd() },
status: {
plugins: {
[`gatsby-source-contentful`]: pluginStatus,
},
},
}
}),
}
const cache = createMockCache()
Expand Down Expand Up @@ -404,7 +421,11 @@ describe(`gatsby-node`, () => {
})
}

beforeEach(() => {
beforeEach(async () => {
existingNodes.clear()
is.firstSourceNodesCallOfCurrentNodeProcess = true
resetPluginStatus()

// @ts-ignore
fetchContent.mockClear()
// @ts-ignore
Expand Down
139 changes: 139 additions & 0 deletions packages/gatsby-source-contentful/src/backreferences.js
@@ -0,0 +1,139 @@
// @ts-check
import { hasFeature } from "gatsby-plugin-utils/index"
import { getDataStore } from "gatsby/dist/datastore"
import { untilNextEventLoopTick } from "./utils"

// Array of all existing Contentful nodes. Make it global and incrementally update it because it's hella slow to recreate this on every data update for large sites.
export const existingNodes = new Map()

let allNodesLoopCount = 0

// "is" === object so it can be overridden by tests
export const is = {
firstSourceNodesCallOfCurrentNodeProcess: true,
}

export async function getExistingCachedNodes({
actions,
getNode,
pluginConfig,
}) {
const { touchNode } = actions

const needToTouchNodes =
!hasFeature(`stateful-source-nodes`) &&
is.firstSourceNodesCallOfCurrentNodeProcess

if (existingNodes.size === 0) {
memoryNodeCountsBySysType.Asset = 0
memoryNodeCountsBySysType.Entry = 0

const dataStore = getDataStore()
const allNodeTypeNames = Array.from(dataStore.getTypes())

for (const typeName of allNodeTypeNames) {
const typeNodes = dataStore.iterateNodesByType(typeName)

const firstNodeOfType = Array.from(typeNodes.slice(0, 1))[0]

if (
!firstNodeOfType ||
firstNodeOfType.internal.owner !== `gatsby-source-contentful`
) {
continue
}

for (const node of typeNodes) {
if (needToTouchNodes) {
touchNode(node)

if (node?.fields?.includes(`localFile`)) {
// Prevent GraphQL type inference from crashing on this property
const fullNode = getNode(node.id)
const localFileNode = getNode(fullNode.fields.localFile)
touchNode(localFileNode)
}
}

if (++allNodesLoopCount % 5000 === 0) {
// dont block the event loop
await untilNextEventLoopTick()
}

addNodeToExistingNodesCache(node)
}

// dont block the event loop
await untilNextEventLoopTick()
}
}

is.firstSourceNodesCallOfCurrentNodeProcess = false

return {
existingNodes,
memoryNodeCountsBySysType,
}
}

const memoryNodeCountsBySysType = {
Asset: 0,
Entry: 0,
}

// store only the fields we need to compare to reduce memory usage. if a node is updated we'll use getNode to grab the whole node before updating it
export function addNodeToExistingNodesCache(node) {
if (node.internal.type === `ContentfulTag`) {
return
}

if (
node.sys.type in memoryNodeCountsBySysType &&
!existingNodes.has(node.id)
) {
memoryNodeCountsBySysType[node.sys.type] ||= 0
memoryNodeCountsBySysType[node.sys.type]++
}

const cacheNode = {
id: node.id,
contentful_id: node.contentful_id,
sys: {
type: node.sys.type,
},
node_locale: node.node_locale,
children: node.children,
internal: {
owner: node.internal.owner,
},
__memcache: true,
}

for (const key of Object.keys(node)) {
if (key.endsWith(`___NODE`)) {
cacheNode[key] = node[key]
}
}

existingNodes.set(node.id, cacheNode)
}

export function removeNodeFromExistingNodesCache(node) {
if (node.internal.type === `ContentfulTag`) {
return
}

if (
node.sys.type in memoryNodeCountsBySysType &&
existingNodes.has(node.id)
) {
memoryNodeCountsBySysType[node.sys.type] ||= 0
memoryNodeCountsBySysType[node.sys.type]--

if (memoryNodeCountsBySysType[node.sys.type] < 0) {
memoryNodeCountsBySysType[node.sys.type] = 0
}
}

existingNodes.delete(node.id)
}

0 comments on commit 311b202

Please sign in to comment.