Skip to content

Commit

Permalink
feat(gatsby): switch from arrays to node iterators (#31718)
Browse files Browse the repository at this point in the history
  • Loading branch information
vladar committed Jun 10, 2021
1 parent 3ee9008 commit 5278e1e
Show file tree
Hide file tree
Showing 11 changed files with 223 additions and 100 deletions.
59 changes: 59 additions & 0 deletions packages/gatsby/src/datastore/common/iterable.ts
@@ -0,0 +1,59 @@
import { IGatsbyIterable } from "../types"

export class GatsbyIterable<T> implements IGatsbyIterable<T> {
constructor(private source: Iterator<T>) {}

[Symbol.iterator](): Iterator<T> {
return this.source
}

concat<U>(other: Iterable<U>): GatsbyIterable<T | U> {
return new GatsbyIterable(concatSequence(this, other))
}

map<U>(fn: (entry: T) => U): GatsbyIterable<U> {
return new GatsbyIterable(mapSequence(this, fn))
}

filter(predicate: (entry: T) => unknown): GatsbyIterable<T> {
return new GatsbyIterable<T>(filterSequence(this, predicate))
}

forEach(callback: (entry: T) => unknown): void {
for (const value of this) {
callback(value)
}
}
}

function* mapSequence<T, U>(
source: Iterable<T>,
fn: (arg: T) => U
): Iterator<U> {
for (const value of source) {
yield fn(value)
}
}

function* filterSequence<T>(
source: Iterable<T>,
predicate: (arg: T) => unknown
): Iterator<T> {
for (const value of source) {
if (predicate(value)) {
yield value
}
}
}

function* concatSequence<T, U = T>(
first: Iterable<T>,
second: Iterable<U>
): Iterator<U | T> {
for (const value of first) {
yield value
}
for (const value of second) {
yield value
}
}
31 changes: 18 additions & 13 deletions packages/gatsby/src/datastore/in-memory/in-memory-datastore.ts
@@ -1,29 +1,32 @@
import { IDataStore } from "../types"
import { IDataStore, IGatsbyIterable } from "../types"
import { store } from "../../redux"
import { IGatsbyNode } from "../../redux/types"
import { GatsbyIterable } from "../common/iterable"

/**
* @deprecated
*/
function getNodes(): Array<IGatsbyNode> {
const nodes = store.getState().nodes
if (nodes) {
return Array.from(nodes.values())
} else {
return []
}
const nodes = store.getState().nodes ?? new Map()
return Array.from(nodes.values())
}

/**
* @deprecated
*/
function getNodesByType(type: string): Array<IGatsbyNode> {
const nodes = store.getState().nodesByType.get(type)
if (nodes) {
return Array.from(nodes.values())
} else {
return []
}
const nodes = store.getState().nodesByType.get(type) ?? new Map()
return Array.from(nodes.values())
}

function iterateNodes(): IGatsbyIterable<IGatsbyNode> {
const nodes = store.getState().nodes ?? new Map()
return new GatsbyIterable(nodes.values())
}

function iterateNodesByType(type: string): IGatsbyIterable<IGatsbyNode> {
const nodes = store.getState().nodesByType.get(type) ?? new Map()
return new GatsbyIterable(nodes.values())
}

function getNode(id: string): IGatsbyNode | undefined {
Expand Down Expand Up @@ -59,6 +62,8 @@ export function setupInMemoryStore(): IDataStore {
getTypes,
countNodes,
ready,
iterateNodes,
iterateNodesByType,

// deprecated:
getNodes,
Expand Down
110 changes: 61 additions & 49 deletions packages/gatsby/src/datastore/in-memory/indexing.ts
@@ -1,7 +1,7 @@
import { store } from "../../redux"
import { IGatsbyNode } from "../../redux/types"
import { IDbQueryElemMatch } from "../common/query"
import { getNodes, getNodesByType } from "../"
import { getDataStore } from "../"

// Only list supported ops here. "CacheableFilterOp"
export type FilterOp = // TODO: merge with DbComparator ?
Expand Down Expand Up @@ -205,19 +205,23 @@ export const ensureIndexByQuery = (
// it's probably faster to loop through all nodes. Perhaps. Maybe.

if (nodeTypeNames.length === 1) {
getNodesByType(nodeTypeNames[0]).forEach(node => {
addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
})
getDataStore()
.iterateNodesByType(nodeTypeNames[0])
.forEach(node => {
addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
})
} else {
// Here we must first filter for the node type
// This loop is expensive at scale (!)
getNodes().forEach(node => {
if (!nodeTypeNames.includes(node.internal.type)) {
return
}
getDataStore()
.iterateNodes()
.forEach(node => {
if (!nodeTypeNames.includes(node.internal.type)) {
return
}

addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
})
addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache)
})
}

postIndexingMetaSetup(filterCache, op)
Expand Down Expand Up @@ -245,22 +249,9 @@ export function ensureEmptyFilterCache(
})

if (nodeTypeNames.length === 1) {
getNodesByType(nodeTypeNames[0]).forEach(node => {
if (!node.__gatsby_resolved) {
const typeName = node.internal.type
const resolvedNodes = resolvedNodesCache.get(typeName)
const resolved = resolvedNodes?.get(node.id)
if (resolved !== undefined) {
node.__gatsby_resolved = resolved
}
}
orderedByCounter.push(node)
})
} else {
// Here we must first filter for the node type
// This loop is expensive at scale (!)
getNodes().forEach(node => {
if (nodeTypeNames.includes(node.internal.type)) {
getDataStore()
.iterateNodesByType(nodeTypeNames[0])
.forEach(node => {
if (!node.__gatsby_resolved) {
const typeName = node.internal.type
const resolvedNodes = resolvedNodesCache.get(typeName)
Expand All @@ -270,8 +261,25 @@ export function ensureEmptyFilterCache(
}
}
orderedByCounter.push(node)
}
})
})
} else {
// Here we must first filter for the node type
// This loop is expensive at scale (!)
getDataStore()
.iterateNodes()
.forEach(node => {
if (nodeTypeNames.includes(node.internal.type)) {
if (!node.__gatsby_resolved) {
const typeName = node.internal.type
const resolvedNodes = resolvedNodesCache.get(typeName)
const resolved = resolvedNodes?.get(node.id)
if (resolved !== undefined) {
node.__gatsby_resolved = resolved
}
}
orderedByCounter.push(node)
}
})
}

// Since each node can only have one type, we shouldn't have to be concerned
Expand Down Expand Up @@ -363,30 +371,34 @@ export const ensureIndexByElemMatch = (
filtersCache.set(filterCacheKey, filterCache)

if (nodeTypeNames.length === 1) {
getNodesByType(nodeTypeNames[0]).forEach(node => {
addNodeToBucketWithElemMatch(
node,
node,
filter,
filterCache,
resolvedNodesCache
)
})
getDataStore()
.iterateNodesByType(nodeTypeNames[0])
.forEach(node => {
addNodeToBucketWithElemMatch(
node,
node,
filter,
filterCache,
resolvedNodesCache
)
})
} else {
// Expensive at scale
getNodes().forEach(node => {
if (!nodeTypeNames.includes(node.internal.type)) {
return
}
getDataStore()
.iterateNodes()
.forEach(node => {
if (!nodeTypeNames.includes(node.internal.type)) {
return
}

addNodeToBucketWithElemMatch(
node,
node,
filter,
filterCache,
resolvedNodesCache
)
})
addNodeToBucketWithElemMatch(
node,
node,
filter,
filterCache,
resolvedNodesCache
)
})
}

postIndexingMetaSetup(filterCache, op)
Expand Down
8 changes: 4 additions & 4 deletions packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
@@ -1,9 +1,9 @@
import { ArrayLikeIterable, RootDatabase, open } from "lmdb-store"
import { RootDatabase, open } from "lmdb-store"
// import { performance } from "perf_hooks"
import { ActionsUnion, IGatsbyNode } from "../../redux/types"
import { updateNodes } from "./updates/nodes"
import { updateNodesByType } from "./updates/nodes-by-type"
import { IDataStore, ILmdbDatabases } from "../types"
import { IDataStore, IGatsbyIterable, ILmdbDatabases } from "../types"
import { emitter, replaceReducer } from "../../redux"

const rootDbFile =
Expand Down Expand Up @@ -73,7 +73,7 @@ function getNodesByType(type: string): Array<IGatsbyNode> {
return result ?? []
}

function iterateNodes(): ArrayLikeIterable<IGatsbyNode> {
function iterateNodes(): IGatsbyIterable<IGatsbyNode> {
// Additionally fetching items by id to leverage lmdb-store cache
const nodesDb = getDatabases().nodes
return nodesDb
Expand All @@ -82,7 +82,7 @@ function iterateNodes(): ArrayLikeIterable<IGatsbyNode> {
.filter(Boolean)
}

function iterateNodesByType(type: string): ArrayLikeIterable<IGatsbyNode> {
function iterateNodesByType(type: string): IGatsbyIterable<IGatsbyNode> {
const nodesByType = getDatabases().nodesByType
return nodesByType
.getValues(type)
Expand Down
11 changes: 11 additions & 0 deletions packages/gatsby/src/datastore/types.ts
Expand Up @@ -9,11 +9,22 @@ export interface ILmdbDatabases {
nodesByType: Database<NodeId, NodeType>
}

// Note: this type is compatible with lmdb-store ArrayLikeIterable
export interface IGatsbyIterable<T> extends Iterable<T> {
[Symbol.iterator](): Iterator<T>
map<U>(fn: (entry: T) => U): IGatsbyIterable<U>
// concat<U>(other: Iterable<U>): Iterable<T | U>
filter(predicate: (entry: T) => any): IGatsbyIterable<T>
forEach(callback: (entry: T) => any): void
}

export interface IDataStore {
getNode(id: string): IGatsbyNode | undefined
getTypes(): Array<string>
countNodes(typeName?: string): number
ready(): Promise<void>
iterateNodes(): IGatsbyIterable<IGatsbyNode>
iterateNodesByType(type: string): IGatsbyIterable<IGatsbyNode>

/** @deprecated */
getNodes(): Array<IGatsbyNode>
Expand Down
4 changes: 2 additions & 2 deletions packages/gatsby/src/schema/index.js
Expand Up @@ -2,7 +2,7 @@

const tracer = require(`opentracing`).globalTracer()
const { store } = require(`../redux`)
const { getDataStore, getNodesByType, getTypes } = require(`../datastore`)
const { getDataStore, getTypes } = require(`../datastore`)
const { createSchemaComposer } = require(`./schema-composer`)
const { buildSchema, rebuildSchemaWithSitePage } = require(`./schema`)
const { builtInFieldExtensions } = require(`./extensions`)
Expand Down Expand Up @@ -65,7 +65,7 @@ const buildInferenceMetadata = ({ types }) =>
type: `BUILD_TYPE_METADATA`,
payload: {
typeName,
nodes: getNodesByType(typeName),
nodes: getDataStore().iterateNodesByType(typeName),
},
})
if (typeNames.length > 0) {
Expand Down
36 changes: 24 additions & 12 deletions packages/gatsby/src/schema/infer/add-inferred-fields.js
Expand Up @@ -8,8 +8,7 @@ import { isFile } from "./is-file"
import { isDate } from "../types/date"
import { addDerivedType } from "../types/derived-types"
import { is32BitInteger } from "../../utils/is-32-bit-integer"
import { printDirectives } from "../print"
const { getNode, getNodes } = require(`../../datastore`)
const { getDataStore } = require(`../../datastore`)

const addInferredFields = ({
schemaComposer,
Expand Down Expand Up @@ -212,17 +211,30 @@ const getFieldConfigFromFieldNameConvention = ({
const path = key.split(`___NODE___`)[1]
// Allow linking by nested fields, e.g. `author___NODE___contact___email`
const foreignKey = path && path.replace(/___/g, `.`)
const linkedTypesSet = new Set()

if (foreignKey) {
// TODO: deprecate foreign keys like this (e.g. author___NODE___contact___email)
// and recommend using schema customization instead
const linkedValues = new Set(value.linkedNodes)
getDataStore()
.iterateNodes()
.forEach(node => {
const value = _.get(node, foreignKey)
if (linkedValues.has(value)) {
linkedTypesSet.add(node.internal.type)
}
})
} else {
value.linkedNodes.forEach(id => {
const node = getDataStore().getNode(id)
if (node) {
linkedTypesSet.add(node.internal.type)
}
})
}

const getNodeBy = value =>
foreignKey
? getNodes().find(node => _.get(node, foreignKey) === value)
: getNode(value)

const linkedNodes = value.linkedNodes.map(getNodeBy)

const linkedTypes = _.uniq(
linkedNodes.filter(Boolean).map(node => node.internal.type)
)
const linkedTypes = [...linkedTypesSet]

invariant(
linkedTypes.length,
Expand Down

0 comments on commit 5278e1e

Please sign in to comment.