Skip to content

Commit

Permalink
feat: add a validateEntry option to compact
Browse files Browse the repository at this point in the history
PR-URL: #55
Credit: @nlf
Close: #55
Reviewed-by: @isaacs
  • Loading branch information
nlf authored and isaacs committed May 25, 2021
1 parent 460b951 commit 8892a92
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 14 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -495,12 +495,21 @@ cacache.rm.content(cachePath, 'sha512-SoMeDIGest/IN+BaSE64==').then(() => {
})
```

#### <a name="index-compact"></a> `> cacache.index.compact(cache, key, matchFn) -> Promise`
#### <a name="index-compact"></a> `> cacache.index.compact(cache, key, matchFn, [opts]) -> Promise`

Uses `matchFn`, which must be a synchronous function that accepts two entries
and returns a boolean indicating whether or not the two entries match, to
deduplicate all entries in the cache for the given `key`.

If `opts.validateEntry` is provided, it will be called as a function with the
only parameter being a single index entry. The function must return a Boolean,
if it returns `true` the entry is considered valid and will be kept in the index,
if it returns `false` the entry will be removed from the index.

If `opts.validateEntry` is not provided, however, every entry in the index will
be deduplicated and kept until the first `null` integrity is reached, removing
all entries that were written before the `null`.

The deduplicated list of entries is both written to the index, replacing the
existing content, and returned in the Promise.

Expand Down
35 changes: 28 additions & 7 deletions lib/entry-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,31 @@ module.exports.compact = compact
async function compact (cache, key, matchFn, opts = {}) {
const bucket = bucketPath(cache, key)
const entries = await bucketEntries(bucket)
// reduceRight because the bottom-most result is the newest
const newEntries = []
// we loop backwards because the bottom-most result is the newest
// since we add new entries with appendFile
const newEntries = entries.reduceRight((acc, newEntry) => {
if (!acc.find((oldEntry) => matchFn(oldEntry, newEntry))) {
acc.push(newEntry)
for (let i = entries.length - 1; i >= 0; --i) {
const entry = entries[i]
// a null integrity could mean either a delete was appended
// or the user has simply stored an index that does not map
// to any content. we determine if the user wants to keep the
// null integrity based on the validateEntry function passed in options.
// if the integrity is null and no validateEntry is provided, we break
// as we consider the null integrity to be a deletion of everything
// that came before it.
if (entry.integrity === null && !opts.validateEntry) {
break
}

return acc
}, [])
// if this entry is valid, and it is either the first entry or
// the newEntries array doesn't already include an entry that
// matches this one based on the provided matchFn, then we add
// it to the beginning of our list
if ((!opts.validateEntry || opts.validateEntry(entry) === true) &&
(newEntries.length === 0 || !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) {
newEntries.unshift(entry)
}
}

const newIndex = '\n' + newEntries.map((entry) => {
const stringified = JSON.stringify(entry)
Expand Down Expand Up @@ -87,7 +103,12 @@ async function compact (cache, key, matchFn, opts = {}) {
// write the file atomically
await disposer(setup(), teardown, write)

return newEntries.map((entry) => formatEntry(cache, entry, true))
// we reverse the list we generated such that the newest
// entries come first in order to make looping through them easier
// the true passed to formatEntry tells it to keep null
// integrity values, if they made it this far it's because
// validateEntry returned true, and as such we should return it
return newEntries.reverse().map((entry) => formatEntry(cache, entry, true))
}

module.exports.insert = insert
Expand Down
85 changes: 79 additions & 6 deletions test/entry-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,94 @@ test('compact', async (t) => {
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }),
// compact will return entries with a null integrity
index.insert(CACHE, KEY, null, { metadata: { rev: 3 } })
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } })
])

const bucket = index.bucketPath(CACHE, KEY)
const entries = await index.bucketEntries(bucket)
t.equal(entries.length, 5, 'started with 5 entries')
t.equal(entries.length, 4, 'started with 4 entries')

const filter = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
const compacted = await index.compact(CACHE, KEY, filter)
t.equal(compacted.length, 3, 'should return only three entries')
t.equal(compacted.length, 2, 'should return only two entries')

const newEntries = await index.bucketEntries(bucket)
t.equal(newEntries.length, 2, 'bucket was deduplicated')
})

test('compact: treats null integrity without validateEntry as a delete', async (t) => {
t.teardown(() => {
index.delete.sync(CACHE, KEY)
})
// this one does not use Promise.all because we want to be certain
// things are written in the right order
await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } })
await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } })
// this is a delete, revs 1, 2 and 3 will be omitted
await index.insert(CACHE, KEY, null, { metadata: { rev: 3 } })
await index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 4 } })

const bucket = index.bucketPath(CACHE, KEY)
const entries = await index.bucketEntries(bucket)
t.equal(entries.length, 4, 'started with 4 entries')

const filter = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
const compacted = await index.compact(CACHE, KEY, filter)
t.equal(compacted.length, 1, 'should return only one entry')
t.equal(compacted[0].metadata.rev, 4, 'kept rev 4')

const newEntries = await index.bucketEntries(bucket)
t.equal(newEntries.length, 1, 'bucket was deduplicated')
})

test('compact: leverages validateEntry to skip invalid entries', async (t) => {
t.teardown(() => {
index.delete.sync(CACHE, KEY)
})
await Promise.all([
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 2 } }),
index.insert(CACHE, KEY, INTEGRITY, { metadata: { rev: 1 } })
])

const bucket = index.bucketPath(CACHE, KEY)
const entries = await index.bucketEntries(bucket)
t.equal(entries.length, 4, 'started with 4 entries')

const matchFn = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
const validateEntry = (entry) => entry.metadata.rev > 1
const compacted = await index.compact(CACHE, KEY, matchFn, { validateEntry })
t.equal(compacted.length, 1, 'should return only one entries')
t.equal(compacted[0].metadata.rev, 2, 'kept the rev 2 entry')

const newEntries = await index.bucketEntries(bucket)
t.equal(newEntries.length, 1, 'bucket was deduplicated')
})

test('compact: validateEntry allows for keeping null integrity', async (t) => {
t.teardown(() => {
index.delete.sync(CACHE, KEY)
})
await Promise.all([
index.insert(CACHE, KEY, null, { metadata: { rev: 1 } }),
index.insert(CACHE, KEY, null, { metadata: { rev: 2 } }),
index.insert(CACHE, KEY, null, { metadata: { rev: 2 } }),
index.insert(CACHE, KEY, null, { metadata: { rev: 1 } })
])

const bucket = index.bucketPath(CACHE, KEY)
const entries = await index.bucketEntries(bucket)
t.equal(entries.length, 4, 'started with 4 entries')

const matchFn = (entryA, entryB) => entryA.metadata.rev === entryB.metadata.rev
const validateEntry = (entry) => entry.metadata.rev > 1
const compacted = await index.compact(CACHE, KEY, matchFn, { validateEntry })
t.equal(compacted.length, 1, 'should return only one entry')
t.equal(compacted[0].metadata.rev, 2, 'kept the rev 2 entry')

const newEntries = await index.bucketEntries(bucket)
t.equal(newEntries.length, 3, 'bucket was deduplicated')
t.equal(newEntries.length, 1, 'bucket was deduplicated')
})

test('compact: ENOENT in chownr does not cause failure', async (t) => {
Expand Down

0 comments on commit 8892a92

Please sign in to comment.