fix: faster cache key factory for range #536

H4ad · 2023-04-06T03:24:24Z

The old way of creating a cache key for range.js is too slow:

Lines 82 to 85 in f4fa069

    
           // memoize range parsing for performance. 
        
           // this is a very hot path, and fully deterministic. 
        
           const memoOpts = Object.keys(this.options).join(',') 
        
           const memoKey = `parseRange:${memoOpts}:${range}`

The performance was like this:

Object.keys({"includePrelease":true}).join(',') x 14,540,605 ops/sec ±1.34% (91 runs sampled)
Object.keys({"includePrelease":true,"loose":true}).join(',') x 7,756,786 ops/sec ±1.18% (91 runs sampled)
Object.keys({"includePrelease":true,"loose":true,"rtl":true}).join(',') x 5,706,112 ops/sec ±1.16% (91 runs sampled)

Now, I just changed to a simple function with a bunch of comparisons:

buildMemoKeyFromOptionsBitMasks({"includePrelease":true}) x 1,135,166,905 ops/sec ±0.06% (95 runs sampled)
buildMemoKeyFromOptionsBitMasks({"includePrelease":true,"loose":true}) x 1,132,064,541 ops/sec ±0.38% (96 runs sampled)
buildMemoKeyFromOptionsBitMasks({"includePrelease":true,"loose":true,"rtl":true}) x 1,063,387,861 ops/sec ±0.08% (97 runs sampled)

To reduce the code, isaacs suggested use flags, so I changed the implementation from the old PR to a version using flags.

The first version I wrote using flags was slower because I implemented using .toString, but just changing to 0 + '' was actually 10x faster, so I keep it as flags.

Conclusion

In this PR, we could use the benefit of using flags to reduce the code, maybe this PR could open a possibility to rewrite all the options as flags.

References

Related to #528

benchmark.js

const Benchmark = require('benchmark')
const suite = new Benchmark.Suite()

const option1 = { includePrelease: true }
const option2 = { includePrelease: true, loose: true }
const option3 = { includePrelease: true, loose: true, rtl: true }

function buildMemoKeyFromOptions (options) {
  if (options.includePrerelease === true) {
    if (options.loose === true && options.rtl === true) {
      return '1'
    }

    if (options.loose === true) {
      return '2'
    }

    if (options.rtl === true) {
      return '3'
    }

    return '4'
  } else if (options.loose === true) {
    if (options.rtl === true) {
      return '5'
    }

    return '6'
  } else if (options.rtl === true) {
    return '7'
  } else {
    return '8'
  }
}

function buildMemoKeyFromOptionsBitMasks (options) {
  return ((options.loose ? 1<<1 : 0)
  | (options.includePrerelease ? 1<<2 : 0)
  | (options.rtl ? 1<<3 : 0)) + ''
}

suite
  .add(`Object.keys(${JSON.stringify(option1)}).join(',')`, function () {
    Object.keys(option1).join(',')
  })
  .add(`Object.keys(${JSON.stringify(option2)}).join(',')`, function () {
    Object.keys(option2).join(',')
  })
  .add(`Object.keys(${JSON.stringify(option3)}).join(',')`, function () {
    Object.keys(option3).join(',')
  })

suite
  .add(`buildMemoKeyFromOptions(${JSON.stringify(option1)})`, function () {
    buildMemoKeyFromOptions(option1)
  })
  .add(`buildMemoKeyFromOptions(${JSON.stringify(option2)})`, function () {
    buildMemoKeyFromOptions(option2)
  })
  .add(`buildMemoKeyFromOptions(${JSON.stringify(option3)})`, function () {
    buildMemoKeyFromOptions(option3)
  })

suite
  .add(`buildMemoKeyFromOptionsBitMasks(${JSON.stringify(option1)})`, function () {
    buildMemoKeyFromOptionsBitMasks(option1)
  })
  .add(`buildMemoKeyFromOptionsBitMasks(${JSON.stringify(option2)})`, function () {
    buildMemoKeyFromOptionsBitMasks(option2)
  })
  .add(`buildMemoKeyFromOptionsBitMasks(${JSON.stringify(option3)})`, function () {
    buildMemoKeyFromOptionsBitMasks(option3)
  })

suite
  .on('cycle', function (event) {
    console.log(String(event.target))
  })
  .run({ async: false })

ljharb

Are you sure these are the only three options that matter? this.options is passed to parseComparator, and replaceGTE0, as well as the Comparator constructor. I'm not even sure if rtl is used in the code path currently, and it seems very dangerous for future maintenance to hardcode some options here. What happens if another option is added later?

update: i see after writing this that parseOptions filters down the options to those three. Is the creation of that new options object actually needed anymore after this PR, now that the cache key isn't made via reflecting on the options object? Would keeping the reflection and avoiding the parseOptions clone have a better perf impact here, or a worse one?

classes/range.js

H4ad · 2023-04-06T11:15:36Z

@ljharb You have an interesting point, if the parseOptions was written just because of this function, so yes, maybe the necessity of parsing the options is just needed to check if is not undefined/null or bollean, In the other cases, we can just use the same object.

About the perf, I think it will not affect us since we already look for properties in the object that could doesn't exist, and if we just perform Object.freeze for an empty object and loose, we still save memory and avoid doing more comparisons and reduce the maintenance.

internal/constants.js

wraithgar · 2023-04-06T14:21:12Z

rtl is only used in functions/coerce.js, a standalone function not reused in any other part of semver.

This refactor is great because our coverage tests are exposing the fact that we are evaluating for a parameter that doesn't matter to this class. The flag doesn't affect the data being memoized so it should not need to be accounted for in the memo key.

wraithgar · 2023-04-06T14:46:23Z

parseOptions is doing two things, it is allowing for the options parameter to overload as a boolean representing loose, and it is also ensuring that the object keys are in a deterministic order.

I searched through the code to see if options was doing any other heavy lifting and found:

classes/comparator.js#intersects recreating the logic instead of calling parseOptions
functions/inc.js making the positional parameter options optional by testing if it is a string, this likely would only be affected when interpreting params from the cli, which already builds up an options object and only uses that. I don't think we need to support passing 'true' for the loose paramater.
ranges/subset.js looks at options.includePrerelease without having cleaned it via parseOptions

Nothing I found would indicate that the ordering of parseOptions matters except in this one case where we are using it to memoize a cache key. I think that memo logic should be isolated to where it's used, and made to only account for flags that affect the results.

wraithgar · 2023-04-06T15:02:44Z

What happens if another option is added later?

If those options affect what classes/range.js returns there will need to be tests that cover those lines, which will fail if we are improperly memoizing the results.

wraithgar · 2023-04-06T15:10:41Z

So far so good imho. I am going to ping @kurtextrem and @jakebailey since they were pretty well engaged with the other performance PR. If either of you don't want me to ping you on this and the next performance PR(s) please let me know.

jakebailey · 2023-04-06T15:12:38Z

Always happy to be pinged 😃

I'll test these PRs out on my super stressful DefinitelyTyped monorepo case once I'm at my desk.

H4ad · 2023-04-06T15:13:11Z

classes/comparator.js#intersects recreating the logic instead of calling parseOptions

I will push a fix to this.

functions/inc.js making the positional parameter options optional by testing if it is a string, this likely would only be affected when interpreting params from the cli, which already builds up an options object and only uses that. I don't think we need to support passing 'true' for the loose parameter.

I think the logic behind the testing is when we send the identifier on options, like:

inc(version: string, release: string, options: any, identifier: string);
inc(version: string, release: string, identifier: string);

ranges/subset.js looks at options.includePrerelease without having cleaned it via parseOptions

I already fix this case in this PR by calling parseOptions in the beginning.

Nothing I found would indicate that the ordering of parseOptions matters except in this one case where we are using it to memoize a cache key. I think that memo logic should be isolated to where it's used, and made to only account for flags that affect the results.

In this case, should I remove all the object freeze from the other PR and change the tests to accept any object?

If those options affect what classes/range.js returns there will need to be tests that cover those lines, which will fail if we are improperly memoizing the results.

So currently we don't need any additional test, right?

wraithgar · 2023-04-06T15:25:42Z

In this case, should I remove all the object freeze from the other PR and change the tests to accept any object?

I haven't looked at the other PR yet. Let's talk about that over there though.

So currently we don't need any additional test, right?

Correct, imho. We would need new tests in the event that we added new options, at which point if we didn't also update the memoization they would fail.

wraithgar · 2023-04-06T15:31:46Z

I already fix this case in this PR by calling parseOptions in the beginning.

I don't think this is fixed though. subset is exported directly from index.js so folks calling it are sending an unparsed options param directly to code that evaluates options.includePrerelease without first having ran it through parseOptions.

classes/range.js

internal/constants.js

ljharb · 2023-04-06T16:05:16Z

this is much simpler and thus much less risky, well done

wraithgar · 2023-04-06T16:09:49Z

this is much simpler and thus much less risky, well done

Agreed. The past two weeks @H4ad has been extremely patient and diligent responding to all of the feedback in the PRs they've made. The end results have been worth it and I am thankful for the effort being made here.

I've given this a 👍 and will let it sit now through the weekend to give others time to catch up.

jakebailey · 2023-04-06T16:19:09Z

Here's my DT stress test using pnpm. Before:

Done in 2m 14.5s
total time:  134.82s
user time:   162.81s
system time: 31.61s
CPU percent: 144%
max memory:  1752 MB

This PR ("semver": "github:h4ad-forks/node-semver#fix/better-cache-key-factory"):

Done in 2m 13s
total time:  133.25s
user time:   161.42s
system time: 31.23s
CPU percent: 144%
max memory:  1768 MB

So, seemingly no impact. This is probably not showing much improvement becuase I went and added a Range cache to pnpm (which yarn berry also has). I can try removing that cache and show before/after this PR, though, if people are interested.

jakebailey · 2023-04-06T16:31:48Z

Eh, I just ran it anyway.

Here's pnpm without the Range cache, instead using semver.satisfies like it used to:

Done in 2m 43s
total time:  163.27s
user time:   194.27s
system time: 31.76s
CPU percent: 138%
max memory:  1792 MB

Already a lot slower than my baseline. With this PR, it gets a little better:

Done in 2m 37.5s
total time:  157.75s
user time:   189.12s
system time: 31.90s
CPU percent: 140%
max memory:  1771 MB

Which, is still a light improvement, but not game changing like the options PR for pnpm anyway. Not to say this isn't a good change; absolutely take it 😄

wraithgar · 2023-04-06T16:37:50Z

Thanks @jakebailey it's good to have benchmarks that use a whole program. Sometimes an optimization helps a use case that isn't very common.

The primary benefit of this PR is that it sets up the parse options refactor, since the memoization is now decoupled from that function.

kurtextrem · 2023-04-07T17:18:41Z

From my side, I would also say lgtm, even if it might not increase performance drastically, as it gives a good baseline in case in the future more options are added or for using more bit masks inside this module.

wraithgar · 2023-04-10T17:13:16Z

@H4ad looks like #530 conflicted w/ this PR. Once that is resolved I'll land this.

H4ad · 2023-04-10T17:54:47Z

@wraithgar Fixed!

H4ad requested a review from a team as a code owner April 6, 2023 03:24

H4ad requested review from wraithgar and removed request for a team April 6, 2023 03:24

H4ad force-pushed the fix/better-cache-key-factory branch from 3a87cd9 to d33af71 Compare April 6, 2023 03:24

ljharb reviewed Apr 6, 2023

View reviewed changes

classes/range.js Outdated Show resolved Hide resolved

H4ad force-pushed the fix/better-cache-key-factory branch from d33af71 to 2bc0f82 Compare April 6, 2023 11:06

H4ad mentioned this pull request Apr 6, 2023

fix: faster parse options #535

Merged

wraithgar reviewed Apr 6, 2023

View reviewed changes

internal/constants.js Outdated Show resolved Hide resolved

H4ad force-pushed the fix/better-cache-key-factory branch from 2bc0f82 to 9b1364a Compare April 6, 2023 14:56

wraithgar reviewed Apr 6, 2023

View reviewed changes

classes/range.js Outdated Show resolved Hide resolved

H4ad force-pushed the fix/better-cache-key-factory branch from 9b1364a to 6a0f846 Compare April 6, 2023 15:59

wraithgar reviewed Apr 6, 2023

View reviewed changes

internal/constants.js Outdated Show resolved Hide resolved

fix: faster cache key factory for range

58bb3f9

H4ad force-pushed the fix/better-cache-key-factory branch from 6a0f846 to 58bb3f9 Compare April 6, 2023 16:01

wraithgar approved these changes Apr 6, 2023

View reviewed changes

nlf approved these changes Apr 10, 2023

View reviewed changes

Merge branch 'main' into fix/better-cache-key-factory

c2b35bc

wraithgar merged commit 61e6ea1 into npm:main Apr 10, 2023
23 checks passed

github-actions bot mentioned this pull request Apr 10, 2023

chore: release 7.4.0 #540

Merged

H4ad deleted the fix/better-cache-key-factory branch April 10, 2023 18:12

ms-bot mentioned this pull request Apr 17, 2023

chore(deps): bump 🧳 semver to ^7.6.0 mobsuccess-devops/pmd-github-action#11

Draft

snyk-bot mentioned this pull request May 16, 2023

[Snyk] Upgrade semver from 6.3.0 to 7.5.0 OutSystems/cordova-plugin-add-swift-support#3

Open

This was referenced Jul 31, 2023

Bump semver from 7.3.5 to 7.5.2 in /platform/ui ossrs/oryx#107

Closed

Bump semver from 7.3.5 to 7.5.2 in /ui ossrs/oryx#116

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: faster cache key factory for range #536

fix: faster cache key factory for range #536

H4ad commented Apr 6, 2023 •

edited

ljharb left a comment

H4ad commented Apr 6, 2023

wraithgar commented Apr 6, 2023

wraithgar commented Apr 6, 2023 •

edited

wraithgar commented Apr 6, 2023

wraithgar commented Apr 6, 2023

jakebailey commented Apr 6, 2023

H4ad commented Apr 6, 2023

wraithgar commented Apr 6, 2023

wraithgar commented Apr 6, 2023

ljharb commented Apr 6, 2023

wraithgar commented Apr 6, 2023

jakebailey commented Apr 6, 2023

jakebailey commented Apr 6, 2023

wraithgar commented Apr 6, 2023

kurtextrem commented Apr 7, 2023

wraithgar commented Apr 10, 2023

H4ad commented Apr 10, 2023

	// memoize range parsing for performance.
	// this is a very hot path, and fully deterministic.
	const memoOpts = Object.keys(this.options).join(',')
	const memoKey = `parseRange:${memoOpts}:${range}`

fix: faster cache key factory for range #536

fix: faster cache key factory for range #536

Conversation

H4ad commented Apr 6, 2023 • edited

Conclusion

References

ljharb left a comment

Choose a reason for hiding this comment

H4ad commented Apr 6, 2023

wraithgar commented Apr 6, 2023

wraithgar commented Apr 6, 2023 • edited

wraithgar commented Apr 6, 2023

wraithgar commented Apr 6, 2023

jakebailey commented Apr 6, 2023

H4ad commented Apr 6, 2023

wraithgar commented Apr 6, 2023

wraithgar commented Apr 6, 2023

ljharb commented Apr 6, 2023

wraithgar commented Apr 6, 2023

jakebailey commented Apr 6, 2023

jakebailey commented Apr 6, 2023

wraithgar commented Apr 6, 2023

kurtextrem commented Apr 7, 2023

wraithgar commented Apr 10, 2023

H4ad commented Apr 10, 2023

H4ad commented Apr 6, 2023 •

edited

wraithgar commented Apr 6, 2023 •

edited