Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gatsby-source-filesystem): Only generate hashes when a file has changed, and add an option for skipping hashing #37464

6 changes: 6 additions & 0 deletions packages/gatsby-source-filesystem/README.md
Expand Up @@ -62,6 +62,12 @@ They will be added to the following default list:
../**/dist/**
```

There is an alternative hashing mechanism available by passing the `fastHash` parameter.
This option can significantly reduce build times on sites with many large files, but is potentially less robust
because it uses the modification time to fingerprint the file rather than hashing it. On a modern OS this
shouldn't be a problem, but can't be relied on completely across all systems, which is why it's an option.
Note that if you're passing this option to an existing site it will force re-processing of all your files.

To prevent concurrent requests overload of `processRemoteNode`, you can adjust the `200` default concurrent downloads, with `GATSBY_CONCURRENT_DOWNLOAD` environment variable.

## How to query
Expand Down
237 changes: 163 additions & 74 deletions packages/gatsby-source-filesystem/src/__tests__/create-file-node.js
Expand Up @@ -5,6 +5,95 @@ const fs = require(`fs-extra`)

const fsStatBak = fs.stat

const createMockCache = (get = jest.fn()) => {
return {
get,
set: jest.fn(),
directory: __dirname,
}
}

const createMockCreateNodeId = () => {
const createNodeId = jest.fn()
createNodeId.mockReturnValue(`uuid-from-gatsby`)
return createNodeId
}

// MD5 hash of the file (if the mock below changes this should change)
const fileHash = `8d777f385d3dfec8815d20f7496026dc`

// mtime + inode (if the mock below changes this should change)
const fileFastHash = `123456123456`

function testNode(node, dname, fname, contentDigest) {
// Sanitize all filenames
Object.keys(node).forEach(key => {
if (typeof node[key] === `string`) {
node[key] = node[key].replace(new RegExp(dname, `g`), `<DIR>`)
node[key] = node[key].replace(new RegExp(fname, `g`), `<FILE>`)
}
})
Object.keys(node.internal).forEach(key => {
if (typeof node.internal[key] === `string`) {
node.internal[key] = node.internal[key].replace(
new RegExp(dname, `g`),
`<DIR>`
)
node.internal[key] = node.internal[key].replace(
new RegExp(fname, `g`),
`<FILE>`
)
}
})

// Note: this snapshot should update if the mock below is changed
expect(node).toMatchInlineSnapshot(`
Object {
"absolutePath": "<DIR>/f",
"accessTime": "1970-01-01T00:02:03.456Z",
"atime": "1970-01-01T00:02:03.456Z",
"atimeMs": 123456,
"base": "f",
"birthTime": "1970-01-01T00:02:03.456Z",
"birthtime": "1970-01-01T00:02:03.456Z",
"birthtimeMs": 123456,
"blksize": 123456,
"blocks": 123456,
"changeTime": "1970-01-01T00:02:03.456Z",
"children": Array [],
"ctime": "1970-01-01T00:02:03.456Z",
"ctimeMs": 123456,
"dev": 123456,
"dir": "<DIR>",
"ext": "",
"extension": "",
"id": "uuid-from-gatsby",
"ino": 123456,
"internal": Object {
"contentDigest": "${contentDigest}",
"description": "File \\"<DIR>/f\\"",
"mediaType": "application/octet-stream",
"type": "File",
},
"mode": 123456,
"modifiedTime": "1970-01-01T00:02:03.456Z",
"mtime": "1970-01-01T00:02:03.456Z",
"mtimeMs": 123456,
"name": "f",
"nlink": 123456,
"parent": null,
"prettySize": "123 kB",
"rdev": 123456,
"relativeDirectory": "<DIR>",
"relativePath": "<DIR>/f",
"root": "",
"size": 123456,
"sourceInstanceName": "__PROGRAMMATIC__",
"uid": 123456,
}
`)
}

// FIXME: This test needs to not use snapshots because of file differences
// and locations across users and CI systems
describe(`create-file-node`, () => {
Expand Down Expand Up @@ -43,12 +132,15 @@ describe(`create-file-node`, () => {
})

it(`creates a file node`, async () => {
const createNodeId = jest.fn()
createNodeId.mockReturnValue(`uuid-from-gatsby`)
const createNodeId = createMockCreateNodeId()

const cache = createMockCache()

return createFileNode(
path.resolve(`${__dirname}/fixtures/file.json`),
createNodeId,
{}
{},
cache
)
})

Expand All @@ -59,77 +151,74 @@ describe(`create-file-node`, () => {
console.log(dname, fname)
fs.writeFileSync(fname, `data`)
try {
const createNodeId = jest.fn()
createNodeId.mockReturnValue(`uuid-from-gatsby`)

const node = await createFileNode(fname, createNodeId, {})

// Sanitize all filenames
Object.keys(node).forEach(key => {
if (typeof node[key] === `string`) {
node[key] = node[key].replace(new RegExp(dname, `g`), `<DIR>`)
node[key] = node[key].replace(new RegExp(fname, `g`), `<FILE>`)
}
})
Object.keys(node.internal).forEach(key => {
if (typeof node.internal[key] === `string`) {
node.internal[key] = node.internal[key].replace(
new RegExp(dname, `g`),
`<DIR>`
)
node.internal[key] = node.internal[key].replace(
new RegExp(fname, `g`),
`<FILE>`
)
}
})

// Note: this snapshot should update if the mock above is changed
expect(node).toMatchInlineSnapshot(`
Object {
"absolutePath": "<DIR>/f",
"accessTime": "1970-01-01T00:02:03.456Z",
"atime": "1970-01-01T00:02:03.456Z",
"atimeMs": 123456,
"base": "f",
"birthTime": "1970-01-01T00:02:03.456Z",
"birthtime": "1970-01-01T00:02:03.456Z",
"birthtimeMs": 123456,
"blksize": 123456,
"blocks": 123456,
"changeTime": "1970-01-01T00:02:03.456Z",
"children": Array [],
"ctime": "1970-01-01T00:02:03.456Z",
"ctimeMs": 123456,
"dev": 123456,
"dir": "<DIR>",
"ext": "",
"extension": "",
"id": "uuid-from-gatsby",
"ino": 123456,
"internal": Object {
"contentDigest": "8d777f385d3dfec8815d20f7496026dc",
"description": "File \\"<DIR>/f\\"",
"mediaType": "application/octet-stream",
"type": "File",
},
"mode": 123456,
"modifiedTime": "1970-01-01T00:02:03.456Z",
"mtime": "1970-01-01T00:02:03.456Z",
"mtimeMs": 123456,
"name": "f",
"nlink": 123456,
"parent": null,
"prettySize": "123 kB",
"rdev": 123456,
"relativeDirectory": "<DIR>",
"relativePath": "<DIR>/f",
"root": "",
"size": 123456,
"sourceInstanceName": "__PROGRAMMATIC__",
"uid": 123456,
}
`)
const createNodeId = createMockCreateNodeId()

const emptyCache = {
get: jest.fn(),
set: jest.fn(),
directory: __dirname,
}

const node = await createFileNode(fname, createNodeId, {}, emptyCache)

testNode(node, dname, fname, fileHash)
} finally {
fs.unlinkSync(fname)
}
} finally {
fs.rmdirSync(dname)
}
})

it(`records the shape of the node from cache`, async () => {
const dname = fs.mkdtempSync(`gatsby-create-file-node-test`).trim()
try {
const fname = path.join(dname, `f`)
console.log(dname, fname)
FraserThompson marked this conversation as resolved.
Show resolved Hide resolved
fs.writeFileSync(fname, `data`)
try {
const createNodeId = createMockCreateNodeId()

const getFromCache = jest.fn()
getFromCache.mockReturnValue(fileHash)
const cache = createMockCache(getFromCache)

const nodeFromCache = await createFileNode(
fname,
createNodeId,
{},
cache
)

testNode(nodeFromCache, dname, fname, fileHash)
} finally {
fs.unlinkSync(fname)
}
} finally {
fs.rmdirSync(dname)
}
})

it(`records the shape of the fast hashed node`, async () => {
const dname = fs.mkdtempSync(`gatsby-create-file-node-test`).trim()
try {
const fname = path.join(dname, `f`)
console.log(dname, fname)
FraserThompson marked this conversation as resolved.
Show resolved Hide resolved
fs.writeFileSync(fname, `data`)
try {
const createNodeId = createMockCreateNodeId()
const cache = createMockCache()

const nodeFastHash = await createFileNode(
fname,
createNodeId,
{
fastHash: true,
},
cache
)

testNode(nodeFastHash, dname, fname, fileFastHash)
} finally {
fs.unlinkSync(fname)
}
Expand Down
21 changes: 18 additions & 3 deletions packages/gatsby-source-filesystem/src/create-file-node.js
Expand Up @@ -2,14 +2,15 @@ const path = require(`path`)
const fs = require(`fs-extra`)
const mime = require(`mime`)
const prettyBytes = require(`pretty-bytes`)

const md5File = require(`md5-file`)

const { createContentDigest, slash } = require(`gatsby-core-utils`)

exports.createFileNode = async (
pathToFile,
createNodeId,
pluginOptions = {}
pluginOptions = {},
cache = null
) => {
const slashed = slash(pathToFile)
const parsedSlashed = path.parse(slashed)
Expand All @@ -35,7 +36,21 @@ exports.createFileNode = async (
description: `Directory "${path.relative(process.cwd(), slashed)}"`,
}
} else {
const contentDigest = await md5File(slashedFile.absolutePath)
const key = stats.mtimeMs.toString() + stats.ino.toString()
let contentDigest

if (pluginOptions.fastHash) {
// Skip hashing.
contentDigest = key
} else {
// Generate a hash, but only if the file has changed.
contentDigest = cache && (await cache.get(key))
if (!contentDigest) {
contentDigest = await md5File(slashedFile.absolutePath)
FraserThompson marked this conversation as resolved.
Show resolved Hide resolved
if (cache) await cache.set(key, contentDigest)
}
}

const mediaType = mime.getType(slashedFile.ext)
internal = {
contentDigest,
Expand Down
12 changes: 10 additions & 2 deletions packages/gatsby-source-filesystem/src/gatsby-node.js
Expand Up @@ -16,14 +16,21 @@ exports.onPreInit = ({ reporter }) => {
* Create a state machine to manage Chokidar's not-ready/ready states.
*/
const createFSMachine = (
{ actions: { createNode, deleteNode }, getNode, createNodeId, reporter },
{
actions: { createNode, deleteNode },
getNode,
createNodeId,
reporter,
cache,
},
pluginOptions
) => {
const createAndProcessNode = path => {
const fileNodePromise = createFileNode(
path,
createNodeId,
pluginOptions
pluginOptions,
cache
).then(fileNode => {
createNode(fileNode)
return null
Expand Down Expand Up @@ -162,6 +169,7 @@ exports.pluginOptionsSchema = ({ Joi }) =>
Joi.object({
name: Joi.string(),
path: Joi.string(),
fastHash: Joi.boolean(),
FraserThompson marked this conversation as resolved.
Show resolved Hide resolved
ignore: Joi.array().items(
Joi.string(),
Joi.object().regex(),
Expand Down