From 7ac834568d57a9b9e63002353543d32f6e97b4a5 Mon Sep 17 00:00:00 2001 From: Sylvain Lebresne Date: Wed, 21 Jun 2023 11:27:20 +0200 Subject: [PATCH] Query planning performance improvements (#2610) Implements a few performance improvement for query plan computations: 1. profiling of some slow query planning shows `FetchGroup.isUseless` as one of the hot path. This commit caches the result of this method for a group, and only invalid that cache when we know the result may needs to be recomputed. On the planning of some queries, this is shown to provide a 15% improvement to query planning time. 2. when a type has multiple keys, the query planning was sometimes considering an option where some key `x` was used to get field `y` but then key `y` was used to get that same `y` field from another subgraph. This is obviously not very useful, and we know we can ignore those paths as the 1st part of those path already does what we want. But considering those (useless) options, while harmless for correction, was in some case drastically increasing the number of plans that were evaluated, leading to long query planning times. --- .changeset/friendly-lies-protect.md | 8 + composition-js/src/validate.ts | 112 +-------- internals-js/src/operations.ts | 5 + .../src/__tests__/graphPath.test.ts | 216 ++++++++++++++++++ query-graphs-js/src/conditionsCaching.ts | 16 +- query-graphs-js/src/conditionsValidation.ts | 108 +++++++++ query-graphs-js/src/graphPath.ts | 86 +++++-- query-graphs-js/src/index.ts | 1 + .../src/__tests__/buildPlan.defer.test.ts | 1 - .../src/__tests__/buildPlan.test.ts | 88 ++++++- query-planner-js/src/buildPlan.ts | 38 ++- 11 files changed, 537 insertions(+), 142 deletions(-) create mode 100644 .changeset/friendly-lies-protect.md create mode 100644 query-graphs-js/src/__tests__/graphPath.test.ts create mode 100644 query-graphs-js/src/conditionsValidation.ts diff --git a/.changeset/friendly-lies-protect.md b/.changeset/friendly-lies-protect.md new file mode 100644 index 0000000000..d70feaf6f5 --- /dev/null +++ b/.changeset/friendly-lies-protect.md @@ -0,0 +1,8 @@ +--- +"@apollo/query-planner": patch +"@apollo/query-graphs": patch +"@apollo/federation-internals": patch +--- + +Improves query planning time in some situations where entities use multiple keys. + \ No newline at end of file diff --git a/composition-js/src/validate.ts b/composition-js/src/validate.ts index 17ae66d351..02494a910a 100644 --- a/composition-js/src/validate.ts +++ b/composition-js/src/validate.ts @@ -43,25 +43,15 @@ import { RootPath, advancePathWithTransition, Transition, - OpGraphPath, - advanceSimultaneousPathsWithOperation, - ExcludedEdges, QueryGraphState, - ExcludedConditions, Unadvanceables, isUnadvanceable, Unadvanceable, noConditionsResolution, - ConditionResolution, - unsatisfiedConditionsResolution, - ConditionResolver, - cachingConditionResolver, - PathContext, - addConditionExclusion, - SimultaneousPathsWithLazyIndirectPaths, - advanceOptionsToString, TransitionPathWithLazyIndirectPaths, RootVertex, + simpleValidationConditionResolver, + ConditionResolver, } from "@apollo/query-graphs"; import { CompositionHint, HINTS } from "./hints"; import { ASTNode, GraphQLError, print } from "graphql"; @@ -330,7 +320,7 @@ export function computeSubgraphPaths( } { try { assert(!supergraphPath.hasAnyEdgeConditions(), () => `A supergraph path should not have edge condition paths (as supergraph edges should not have conditions): ${supergraphPath}`); - const conditionResolver = new ConditionValidationResolver(supergraphSchema, subgraphs); + const conditionResolver = simpleValidationConditionResolver({ supergraph: supergraphSchema, queryGraph: subgraphs, withCaching: true }); const initialState = ValidationState.initial({supergraphAPI: supergraphPath.graph, kind: supergraphPath.root.rootKind, subgraphs, conditionResolver}); const context = new ValidationContext(supergraphSchema); let state = initialState; @@ -431,11 +421,11 @@ export class ValidationState { supergraphAPI: QueryGraph, kind: SchemaRootKind, subgraphs: QueryGraph, - conditionResolver: ConditionValidationResolver, + conditionResolver: ConditionResolver, }) { return new ValidationState( GraphPath.fromGraphRoot(supergraphAPI, kind)!, - initialSubgraphPaths(kind, subgraphs).map((p) => TransitionPathWithLazyIndirectPaths.initial(p, conditionResolver.resolver)), + initialSubgraphPaths(kind, subgraphs).map((p) => TransitionPathWithLazyIndirectPaths.initial(p, conditionResolver)), ); } @@ -586,7 +576,7 @@ function isSupersetOrEqual(maybeSuperset: string[], other: string[]): boolean { } class ValidationTraversal { - private readonly conditionResolver: ConditionValidationResolver; + private readonly conditionResolver: ConditionResolver; // The stack contains all states that aren't terminal. private readonly stack: ValidationState[] = []; @@ -604,7 +594,11 @@ class ValidationTraversal { supergraphAPI: QueryGraph, subgraphs: QueryGraph ) { - this.conditionResolver = new ConditionValidationResolver(supergraphSchema, subgraphs); + this.conditionResolver = simpleValidationConditionResolver({ + supergraph: supergraphSchema, + queryGraph: subgraphs, + withCaching: true, + }); supergraphAPI.rootKinds().forEach((kind) => this.stack.push(ValidationState.initial({ supergraphAPI, kind, @@ -680,87 +674,3 @@ class ValidationTraversal { debug.groupEnd(); } } - -class ConditionValidationState { - constructor( - // Selection that belongs to the condition we're validating. - readonly selection: Selection, - // All the possible "simultaneous paths" we could be in the subgraph when we reach this state selection. - readonly subgraphOptions: SimultaneousPathsWithLazyIndirectPaths[] - ) {} - - toString(): string { - return `${this.selection} <=> ${advanceOptionsToString(this.subgraphOptions)}`; - } -} - -class ConditionValidationResolver { - readonly resolver: ConditionResolver; - - constructor( - private readonly supergraphSchema: Schema, - private readonly federatedQueryGraph: QueryGraph - ) { - this.resolver = cachingConditionResolver( - federatedQueryGraph, - ( - edge: Edge, - context: PathContext, - excludedEdges: ExcludedEdges, - excludedConditions: ExcludedConditions - ) => this.validateConditions(edge, context, excludedEdges, excludedConditions) - ); - } - - private validateConditions( - edge: Edge, - context: PathContext, - excludedEdges: ExcludedEdges, - excludedConditions: ExcludedConditions - ): ConditionResolution { - const conditions = edge.conditions!; - excludedConditions = addConditionExclusion(excludedConditions, conditions); - - const initialPath: OpGraphPath = GraphPath.create(this.federatedQueryGraph, edge.head); - const initialOptions = [new SimultaneousPathsWithLazyIndirectPaths([initialPath], context, this.resolver, excludedEdges, excludedConditions)]; - - const stack: ConditionValidationState[] = []; - for (const selection of conditions.selections()) { - stack.push(new ConditionValidationState(selection, initialOptions)); - } - - while (stack.length > 0) { - const state = stack.pop()!; - const newStates = this.advanceState(state); - if (newStates === null) { - return unsatisfiedConditionsResolution; - } - newStates.forEach(s => stack.push(s)); - } - // If we exhaust the stack, it means we've been able to find "some" path for every possible selection in the condition, so the - // condition is validated. Note that we use a cost of 1 for all conditions as we don't care about efficiency. - return { satisfied: true, cost: 1 }; - } - - private advanceState(state: ConditionValidationState): ConditionValidationState[] | null { - let newOptions: SimultaneousPathsWithLazyIndirectPaths[] = []; - for (const paths of state.subgraphOptions) { - const pathsOptions = advanceSimultaneousPathsWithOperation( - this.supergraphSchema, - paths, - state.selection.element, - ); - if (!pathsOptions) { - continue; - } - newOptions = newOptions.concat(pathsOptions); - } - - // If we got no options, it means that particular selection of the conditions cannot be satisfied, so the - // overall condition cannot. - if (newOptions.length === 0) { - return null; - } - return state.selection.selectionSet ? state.selection.selectionSet.selections().map(s => new ConditionValidationState(s, newOptions)) : []; - } -} diff --git a/internals-js/src/operations.ts b/internals-js/src/operations.ts index cadc3099f4..4762b566bb 100644 --- a/internals-js/src/operations.ts +++ b/internals-js/src/operations.ts @@ -1792,6 +1792,11 @@ export class SelectionSet { : ContainsResult.STRICTLY_CONTAINED; } + containsTopLevelField(field: Field): boolean { + const selection = this._keyedSelections.get(field.key()); + return !!selection && selection.element.equals(field); + } + /** * Returns a selection set that correspond to this selection set but where any of the selections in the * provided selection set have been remove. diff --git a/query-graphs-js/src/__tests__/graphPath.test.ts b/query-graphs-js/src/__tests__/graphPath.test.ts new file mode 100644 index 0000000000..f544daaae0 --- /dev/null +++ b/query-graphs-js/src/__tests__/graphPath.test.ts @@ -0,0 +1,216 @@ +import { + Field, + FieldDefinition, + Schema, + assert, + buildSupergraphSchema, +} from "@apollo/federation-internals"; +import { + GraphPath, + OpGraphPath, + SimultaneousPathsWithLazyIndirectPaths, + advanceSimultaneousPathsWithOperation, + createInitialOptions +} from "../graphPath"; +import { QueryGraph, Vertex, buildFederatedQueryGraph } from "../querygraph"; +import { emptyContext } from "../pathContext"; +import { simpleValidationConditionResolver } from "../conditionsValidation"; + +function parseSupergraph(subgraphs: number, schema: string): { supergraph: Schema, api: Schema, queryGraph: QueryGraph } { + assert(subgraphs >= 1, 'Should have at least 1 subgraph'); + const header = ` + schema + @link(url: "https://specs.apollo.dev/link/v1.0") + @link(url: "https://specs.apollo.dev/join/v0.3", for: EXECUTION) + { + query: Query + } + + directive @join__enumValue(graph: join__Graph!) repeatable on ENUM_VALUE + directive @join__field(graph: join__Graph, requires: join__FieldSet, provides: join__FieldSet, type: String, external: Boolean, override: String, usedOverridden: Boolean) repeatable on FIELD_DEFINITION | INPUT_FIELD_DEFINITION + directive @join__graph(name: String!, url: String!) on ENUM_VALUE + directive @join__implements(graph: join__Graph!, interface: String!) repeatable on OBJECT | INTERFACE + directive @join__type(graph: join__Graph!, key: join__FieldSet, extension: Boolean! = false, resolvable: Boolean! = true, isInterfaceObject: Boolean! = false) repeatable on OBJECT | INTERFACE | UNION | ENUM | INPUT_OBJECT | SCALAR + directive @join__unionMember(graph: join__Graph!, member: String!) repeatable on UNION + directive @link(url: String, as: String, for: link__Purpose, import: [link__Import]) repeatable on SCHEMA + + scalar join__FieldSet + + scalar link__Import + + enum link__Purpose { + SECURITY + EXECUTION + } + + enum join__Graph { + ${[...Array(subgraphs).keys()].map((n) => `S${n+1} @join__graph(name: "S${n+1}", url: "https://S${n+1}")`).join('\n')} + } + + `; + + try { + const supergraph = buildSupergraphSchema(header + schema)[0]; + return { + supergraph, + api: supergraph.toAPISchema(), + queryGraph: buildFederatedQueryGraph(supergraph, true), + }; + } catch (e) { + throw new Error('Error parsing supergraph schema:\n' + e.toString()); + } +} + +function createOptions(supergraph: Schema, queryGraph: QueryGraph): SimultaneousPathsWithLazyIndirectPaths[] { + // We know we only use `Query` in the supergraph, so there is only that as root. + const root = queryGraph.roots()[0]; + const initialPath: OpGraphPath = GraphPath.create(queryGraph, root); + return createInitialOptions( + initialPath, + emptyContext, + simpleValidationConditionResolver({ supergraph, queryGraph }), + [], + [], + ); +} + +function field(schema: Schema, coordinate: string): Field { + const def = schema.elementByCoordinate(coordinate) as FieldDefinition; + return new Field(def); +} + +describe("advanceSimultaneousPathsWithOperation", () => { + test("do not use key `x` to fetch `x`", () => { + const { supergraph, api, queryGraph } = parseSupergraph(3, ` + type Query + @join__type(graph: S1) + { + t: T @join__field(graph: S1) + } + + type T + @join__type(graph: S1) + @join__type(graph: S2, key: "otherId") + @join__type(graph: S2, key: "id") + @join__type(graph: S3, key: "id") + { + otherId: ID! @join__field(graph: S1) @join__field(graph: S2) + id: ID! @join__field(graph: S2) @join__field(graph: S3) + } + `); + + // Picking the first initial, the one going to S1 + const initial = createOptions(supergraph, queryGraph)[0]; + + // Then picking `t`, which should be just the one option of picking it in S1 at this point. + const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t")); + assert(allAfterT, 'Should have advanced correctly'); + expect(allAfterT).toHaveLength(1); + const afterT = allAfterT[0]; + expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`); + + // Checking that, at this point, we technically have 2 options: + // 1. we can go to S2 using `otherId`. + // 2. we can go to S3 using `id`, assuming we first get `id` from S2 (using `otherId`). + const indirect = afterT.indirectOptions(afterT.context, 0); + expect(indirect.paths).toHaveLength(2); + expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) (types: [T])`); + expect(indirect.paths[1].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id } ⊢ key()]--> T(S3) (types: [T])`); + + const allForId = advanceSimultaneousPathsWithOperation(supergraph, afterT, field(api, "T.id")); + assert(allForId, 'Should have advanced correctly'); + + // Here, `id` is a direct path from both of our indirect paths. However, it makes no sense to use the 2nd + // indirect path above, since the condition to get to `S3` was `id`, and this means another indirect path + // is able to get to `id` more directly (the first one in this case). + // So ultimately, we should only keep the 1st option. + expect(allForId).toHaveLength(1); + const forId = allForId[0]; + expect(forId.toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) --[id]--> ID(S2)`); + }); + + test("do not use key containing `x` to fetch `x`", () => { + // Similar to the previous test, but the key used is not exactly the fetch field, it only contains + // it (but the optimisation should still work). + const { supergraph, api, queryGraph } = parseSupergraph(3, ` + type Query + @join__type(graph: S1) + { + t: T @join__field(graph: S1) + } + + type T + @join__type(graph: S1) + @join__type(graph: S2, key: "otherId") + @join__type(graph: S2, key: "id1 id2") + @join__type(graph: S3, key: "id1 id2") + { + otherId: ID! @join__field(graph: S1) @join__field(graph: S2) + id1: ID! @join__field(graph: S2) @join__field(graph: S3) + id2: ID! @join__field(graph: S2) @join__field(graph: S3) + } + `); + + // Picking the first initial, the one going to S1 + const initial = createOptions(supergraph, queryGraph)[0]; + + // Then picking `t`, which should be just the one option of picking it in S1 at this point. + const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t")); + assert(allAfterT, 'Should have advanced correctly'); + expect(allAfterT).toHaveLength(1); + const afterT = allAfterT[0]; + expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`); + + // Checking that, at this point, we technically have 2 options: + // 1. we can go to S2 using `otherId`. + // 2. we can go to S3 using `id1 id2`, assuming we first get `id1 id2` from S2 (using `otherId`). + const indirect = afterT.indirectOptions(afterT.context, 0); + expect(indirect.paths).toHaveLength(2); + expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) (types: [T])`); + expect(indirect.paths[1].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id1 id2 } ⊢ key()]--> T(S3) (types: [T])`); + + const allForId = advanceSimultaneousPathsWithOperation(supergraph, afterT, field(api, "T.id1")); + assert(allForId, 'Should have advanced correctly'); + + // Here, `id1` is a direct path from both of our indirect paths. However, it makes no sense to use the 2nd + // indirect path above, since the condition to get to `S3` was `id1 id2`, which includes `id1`. + expect(allForId).toHaveLength(1); + const forId = allForId[0]; + expect(forId.toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) --[id1]--> ID(S2)`); + }); + + test("avoids indirect path that needs a key to the same subgraph to validate its condition", () => { + const { supergraph, api, queryGraph } = parseSupergraph(2, ` + type Query + @join__type(graph: S1) + { + t: T @join__field(graph: S1) + } + + type T + @join__type(graph: S1) + @join__type(graph: S2, key: "id1") + @join__type(graph: S2, key: "id2") + { + id1: ID! @join__field(graph: S2) + id2: ID! @join__field(graph: S1) @join__field(graph: S2) + } + `); + + // Picking the first initial, the one going to S1 + const initial = createOptions(supergraph, queryGraph)[0]; + + // Then picking `t`, which should be just the one option of picking it in S1 at this point. + const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t")); + assert(allAfterT, 'Should have advanced correctly'); + expect(allAfterT).toHaveLength(1); + const afterT = allAfterT[0]; + expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`); + + // Technically, the `id1` key could be used to go to S2 by first getting `id1` from S2 using `id2`, but + // that's obviously unecessary to consider since we can just use `id2` to go to S2 in the first place. + const indirect = afterT.indirectOptions(afterT.context, 0); + expect(indirect.paths).toHaveLength(1); + expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id2 } ⊢ key()]--> T(S2) (types: [T])`); + }); +}); diff --git a/query-graphs-js/src/conditionsCaching.ts b/query-graphs-js/src/conditionsCaching.ts index 25d80aa240..3168e0492d 100644 --- a/query-graphs-js/src/conditionsCaching.ts +++ b/query-graphs-js/src/conditionsCaching.ts @@ -1,5 +1,5 @@ import { assert } from "@apollo/federation-internals"; -import { ConditionResolution, ConditionResolver, ExcludedConditions, ExcludedEdges, sameExcludedEdges } from "./graphPath"; +import { ConditionResolution, ConditionResolver, ExcludedConditions, ExcludedDestinations, sameExcludedDestinations } from "./graphPath"; import { PathContext } from "./pathContext"; import { Edge, QueryGraph, QueryGraphState } from "./querygraph"; @@ -13,8 +13,8 @@ export function cachingConditionResolver(graph: QueryGraph, resolver: ConditionR // when we have no excluded edges, we'd only ever use the cache for the first key of every type. However, // as the algorithm always try keys in the same order (the order of the edges in the query graph), including // the excluded edges we see on the first ever call is actually the proper thing to do. - const cache = new QueryGraphState(graph); - return (edge: Edge, context: PathContext, excludedEdges: ExcludedEdges, excludedConditions: ExcludedConditions) => { + const cache = new QueryGraphState(graph); + return (edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions) => { assert(edge.conditions, 'Should not have been called for edge without conditions'); // We don't cache if there is a context or excluded conditions because those would impact the resolution and @@ -26,18 +26,18 @@ export function cachingConditionResolver(graph: QueryGraph, resolver: ConditionR // cached value `pathTree` when the context is not empty. That said, the context is about active @include/@skip and it's not use // that commonly, so this is probably not an urgent improvement. if (!context.isEmpty() || excludedConditions.length > 0) { - return resolver(edge, context, excludedEdges, excludedConditions); + return resolver(edge, context, excludedDestinations, excludedConditions); } const cachedResolutionAndExcludedEdges = cache.getEdgeState(edge); if (cachedResolutionAndExcludedEdges) { const [cachedResolution, forExcludedEdges] = cachedResolutionAndExcludedEdges; - return sameExcludedEdges(forExcludedEdges, excludedEdges) + return sameExcludedDestinations(forExcludedEdges, excludedDestinations) ? cachedResolution - : resolver(edge, context, excludedEdges, excludedConditions); + : resolver(edge, context, excludedDestinations, excludedConditions); } else { - const resolution = resolver(edge, context, excludedEdges, excludedConditions); - cache.setEdgeState(edge, [resolution, excludedEdges]); + const resolution = resolver(edge, context, excludedDestinations, excludedConditions); + cache.setEdgeState(edge, [resolution, excludedDestinations]); return resolution; } }; diff --git a/query-graphs-js/src/conditionsValidation.ts b/query-graphs-js/src/conditionsValidation.ts new file mode 100644 index 0000000000..31cfa8a0c0 --- /dev/null +++ b/query-graphs-js/src/conditionsValidation.ts @@ -0,0 +1,108 @@ +import { Schema, Selection } from "@apollo/federation-internals"; +import { + ConditionResolution, + ConditionResolver, + ExcludedConditions, + ExcludedDestinations, + GraphPath, + OpGraphPath, + SimultaneousPathsWithLazyIndirectPaths, + addConditionExclusion, + advanceOptionsToString, + advanceSimultaneousPathsWithOperation, + unsatisfiedConditionsResolution +} from "./graphPath"; +import { Edge, QueryGraph } from "./querygraph"; +import { PathContext } from "./pathContext"; +import { cachingConditionResolver } from "./conditionsCaching"; + +class ConditionValidationState { + constructor( + // Selection that belongs to the condition we're validating. + readonly selection: Selection, + // All the possible "simultaneous paths" we could be in the subgraph when we reach this state selection. + readonly subgraphOptions: SimultaneousPathsWithLazyIndirectPaths[] + ) {} + + advance(supergraph: Schema): ConditionValidationState[] | null { + let newOptions: SimultaneousPathsWithLazyIndirectPaths[] = []; + for (const paths of this.subgraphOptions) { + const pathsOptions = advanceSimultaneousPathsWithOperation( + supergraph, + paths, + this.selection.element, + ); + if (!pathsOptions) { + continue; + } + newOptions = newOptions.concat(pathsOptions); + } + + // If we got no options, it means that particular selection of the conditions cannot be satisfied, so the + // overall condition cannot. + if (newOptions.length === 0) { + return null; + } + return this.selection.selectionSet ? this.selection.selectionSet.selections().map(s => new ConditionValidationState(s, newOptions)) : []; + } + + toString(): string { + return `${this.selection} <=> ${advanceOptionsToString(this.subgraphOptions)}`; + } +} + +/** + * Creates a `ConditionResolver` that only validates that the condition can be satisfied, but without + * trying compare/evaluate the potential various ways to validate said conditions. Concretely, the + * `ConditionResolution` values returned by the create resolver will never contain a `pathTree` (or + * an `unsatisfiedConditionReason` for that matter) and the cost will always default to 1 if the + * conditions are satisfied. + */ +export function simpleValidationConditionResolver({ + supergraph, + queryGraph, + withCaching, +}: { + supergraph: Schema, + queryGraph: QueryGraph, + withCaching?: boolean, +}): ConditionResolver { + const resolver = ( + edge: Edge, + context: PathContext, + excludedDestinations: ExcludedDestinations, + excludedConditions: ExcludedConditions, + ): ConditionResolution => { + const conditions = edge.conditions!; + excludedConditions = addConditionExclusion(excludedConditions, conditions); + + const initialPath: OpGraphPath = GraphPath.create(queryGraph, edge.head); + const initialOptions = [ + new SimultaneousPathsWithLazyIndirectPaths( + [initialPath], + context, + simpleValidationConditionResolver({supergraph, queryGraph, withCaching}), + excludedDestinations, + excludedConditions + ) + ]; + + const stack: ConditionValidationState[] = []; + for (const selection of conditions.selections()) { + stack.push(new ConditionValidationState(selection, initialOptions)); + } + + while (stack.length > 0) { + const state = stack.pop()!; + const newStates = state.advance(supergraph); + if (newStates === null) { + return unsatisfiedConditionsResolution; + } + newStates.forEach(s => stack.push(s)); + } + // If we exhaust the stack, it means we've been able to find "some" path for every possible selection in the condition, so the + // condition is validated. Note that we use a cost of 1 for all conditions as we don't care about efficiency. + return { satisfied: true, cost: 1 }; + }; + return withCaching ? cachingConditionResolver(queryGraph, resolver) : resolver; +} diff --git a/query-graphs-js/src/graphPath.ts b/query-graphs-js/src/graphPath.ts index 6b7aad4ccb..ba8ef2ea18 100644 --- a/query-graphs-js/src/graphPath.ts +++ b/query-graphs-js/src/graphPath.ts @@ -791,7 +791,8 @@ export class GraphPath` : ''; - return `${isRoot ? '' : this.root}${pathStr}${deferStr} (types: [${this.props.runtimeTypesOfTail.join(', ')}])`; + const typeStr = this.props.runtimeTypesOfTail.length > 0 ? ` (types: [${this.props.runtimeTypesOfTail.join(', ')}])` : ''; + return `${isRoot ? '' : this.root}${pathStr}${deferStr}${typeStr}`; } } @@ -866,7 +867,7 @@ export function traversePath( // Note that ConditionResolver are guaranteed to be only called for edge with conditions. export type ConditionResolver = - (edge: Edge, context: PathContext, excludedEdges: ExcludedEdges, excludedConditions: ExcludedConditions) => ConditionResolution; + (edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions) => ConditionResolution; export type ConditionResolution = { @@ -1169,30 +1170,28 @@ function createLazyTransitionOptions( )); } -// A set of excluded edges, that is a pair of a head vertex index and an edge index (since edge indexes are relative to their vertex). -export type ExcludedEdges = readonly [number, number][]; +// A "set" of excluded destinations, that is subgraph name. Note that we use an array instead of set because this is used +// in pretty hot paths (the whole path computation is CPU intensive) and will basically always be tiny (it's bounded +// by the number of distinct key on a given type, so usually 2-3 max; even in completely unrealistic cases, it's hard bounded +// by the number of subgraph), so array is going to perform a lot better than `Set` in practice. +export type ExcludedDestinations = readonly string[]; -function isEdgeExcluded(edge: Edge, excluded: ExcludedEdges): boolean { - return excluded.some(([vIdx, eIdx]) => edge.head.index === vIdx && edge.index === eIdx); +function isDestinationExcluded(destination: string, excluded: ExcludedDestinations): boolean { + return excluded.includes(destination); } -export function sameExcludedEdges(ex1: ExcludedEdges, ex2: ExcludedEdges): boolean { +export function sameExcludedDestinations(ex1: ExcludedDestinations, ex2: ExcludedDestinations): boolean { if (ex1 === ex2) { return true; } if (ex1.length !== ex2.length) { return false; } - for (let i = 0; i < ex1.length; ++i) { - if (ex1[i][0] !== ex2[i][0] || ex1[i][1] !== ex2[i][1]) { - return false; - } - } - return true; + return ex1.every((d) => ex2.includes(d)); } -function addEdgeExclusion(excluded: ExcludedEdges, newExclusion: Edge): ExcludedEdges { - return excluded.concat([[newExclusion.head.index, newExclusion.index]]); +function addDestinationExclusion(excluded: ExcludedDestinations, destination: string): ExcludedDestinations { + return excluded.includes(destination) ? excluded : excluded.concat(destination); } export type ExcludedConditions = readonly SelectionSet[]; @@ -1233,7 +1232,7 @@ function advancePathWithNonCollectingAndTypePreservingTransitions, context: PathContext, conditionResolver: ConditionResolver, - excludedEdges: ExcludedEdges, + excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions, convertTransitionWithCondition: (transition: Transition, context: PathContext) => TTrigger, triggerToEdge: (graph: QueryGraph, vertex: Vertex, t: TTrigger) => Edge | null | undefined @@ -1299,7 +1298,9 @@ function advancePathWithNonCollectingAndTypePreservingTransitions `From ${toAdvance}:`); for (const edge of nextEdges) { debug.group(() => `Testing edge ${edge}`); - if (isEdgeExcluded(edge, excludedEdges)) { + const target = edge.tail; + + if (isDestinationExcluded(target.source, excludedDestinations)) { debug.groupEnd(`Ignored: edge is excluded`); continue; } @@ -1308,7 +1309,6 @@ function advancePathWithNonCollectingAndTypePreservingTransitions `Validating conditions ${edge.conditions}`); + // As we validate the condition for this edge, it might be necessary to jump to another subgraph, but if for that + // we need to jump to the same subgraph we're trying to get to, then it means there is another, shorter way to + // go to our destination and we can return that shorter path, not the one with the edge we're trying. const conditionResolution = canSatisfyConditions( toAdvance, edge, conditionResolver, context, - addEdgeExclusion(excludedEdges, edge), + addDestinationExclusion(excludedDestinations, target.source), excludedConditions, ); if (conditionResolution.satisfied) { @@ -1715,7 +1718,7 @@ function canSatisfyConditions { readonly paths: SimultaneousPaths, readonly context: PathContext, readonly conditionResolver: ConditionResolver, - readonly excludedNonCollectingEdges: ExcludedEdges = [], + readonly excludedNonCollectingEdges: ExcludedDestinations = [], readonly excludedConditionsOnNonCollectingEdges: ExcludedConditions = [], ) { this.lazilyComputedIndirectPaths = new Array(paths.length); @@ -1842,6 +1845,40 @@ export function advanceOptionsToString(options: (SimultaneousPaths | Simult return '[\n ' + options.map(opt => Array.isArray(opt) ? simultaneousPathsToString(opt, " ") : opt.toString()).join('\n ') + '\n]'; } +// Given a list of just computed indirect paths and a field that we're trying to advance after those paths, this +// method fields any path that should note be considered. +// +// Currently, this handle the case where the key used at the end of the indirect path contains (at top level) the field +// being queried. Or to make this more concrete, if we're trying to collect field `id`, and the path last edge was using +// key `id`, then we can ignore that path because this imply that there is a way to `id` "some other way" (also see +// the `does not evaluate plans relying on a key field to fetch that same field` test in `buildPlan` for more details). +function filterNonCollectingPathsForField( + paths: OpIndirectPaths, + field: Field, +): OpIndirectPaths { + // We only handle leafs. Things are more complex non-leaf. + if (!field.isLeafField()) { + return paths; + } + + const filtered = paths.paths.filter((p) => { + const lastEdge = p.lastEdge(); + if (!lastEdge || lastEdge.transition.kind !== 'KeyResolution') { + return true; + } + + const conditions = lastEdge.conditions; + return !(conditions && conditions.containsTopLevelField(field)); + }); + return filtered.length === paths.paths.length + ? paths + : { + ...paths, + paths: filtered + }; + +} + // Returns undefined if the operation cannot be dealt with/advanced. Otherwise, it returns a list of options we can be in after advancing the operation, each option // being a set of simultaneous paths in the subgraphs (a single path in the simple case, but type exploding may make us explore multiple paths simultaneously). // The lists of options can be empty, which has the special meaning that the operation is guaranteed to have no results (it corresponds to unsatisfiable conditions), @@ -1900,7 +1937,10 @@ export function advanceSimultaneousPathsWithOperation( if (operation.kind === 'Field') { debug.group(`Computing indirect paths:`); // Then adds whatever options can be obtained by taking some non-collecting edges first. - const pathsWithNonCollecting = subgraphSimultaneousPaths.indirectOptions(updatedContext, i); + const pathsWithNonCollecting = filterNonCollectingPathsForField( + subgraphSimultaneousPaths.indirectOptions(updatedContext, i), + operation + ); debug.groupEnd(() => pathsWithNonCollecting.paths.length == 0 ? `no indirect paths` : `${pathsWithNonCollecting.paths.length} indirect paths`); if (pathsWithNonCollecting.paths.length > 0) { debug.group('Validating indirect options:'); @@ -1996,7 +2036,7 @@ export function createInitialOptions( initialPath: OpGraphPath, initialContext: PathContext, conditionResolver: ConditionResolver, - excludedEdges: ExcludedEdges, + excludedEdges: ExcludedDestinations, excludedConditions: ExcludedConditions, ): SimultaneousPathsWithLazyIndirectPaths[] { const lazyInitialPath = new SimultaneousPathsWithLazyIndirectPaths( diff --git a/query-graphs-js/src/index.ts b/query-graphs-js/src/index.ts index 53d6b0c222..3907233a72 100644 --- a/query-graphs-js/src/index.ts +++ b/query-graphs-js/src/index.ts @@ -5,3 +5,4 @@ export * from './graphviz'; export * from './transition'; export * from './pathContext'; export * from './conditionsCaching'; +export * from './conditionsValidation'; diff --git a/query-planner-js/src/__tests__/buildPlan.defer.test.ts b/query-planner-js/src/__tests__/buildPlan.defer.test.ts index 1132631a91..ef807df4e3 100644 --- a/query-planner-js/src/__tests__/buildPlan.defer.test.ts +++ b/query-planner-js/src/__tests__/buildPlan.defer.test.ts @@ -3613,7 +3613,6 @@ test('@defer only the key of an entity', () => { Fetch(service: "Subgraph1") { { t { - __typename v0 id } diff --git a/query-planner-js/src/__tests__/buildPlan.test.ts b/query-planner-js/src/__tests__/buildPlan.test.ts index 400d4a5ec9..5a1d42690b 100644 --- a/query-planner-js/src/__tests__/buildPlan.test.ts +++ b/query-planner-js/src/__tests__/buildPlan.test.ts @@ -6247,7 +6247,6 @@ test('correctly generate plan built from some non-individually optimal branch op // // Anyway, this test make sure we do correctly generate the plan using 1.b and 2.a, and do // not ignore 1.b in favor of 1.a in particular (which a bug did at one point). - const subgraph1 = { name: 'Subgraph1', typeDefs: gql` @@ -6458,3 +6457,90 @@ test('does not error on some complex fetch group dependencies', () => { } `); }); + + +test('does not evaluate plans relying on a key field to fetch that same field', () => { + const subgraph1 = { + name: 'Subgraph1', + typeDefs: gql` + type Query { + t: T + } + + type T @key(fields: "otherId") { + otherId: ID! + } + ` + } + + const subgraph2 = { + name: 'Subgraph2', + typeDefs: gql` + type T @key(fields: "id") @key(fields: "otherId") { + id: ID! + otherId: ID! + } + ` + } + + const subgraph3 = { + name: 'Subgraph3', + typeDefs: gql` + type T @key(fields: "id") { + id: ID! + } + ` + } + + const [api, queryPlanner] = composeAndCreatePlanner(subgraph1, subgraph2, subgraph3); + const operation = operationFromDocument(api, gql` + { + t { + id + } + } + `); + + const plan = queryPlanner.buildQueryPlan(operation); + expect(plan).toMatchInlineSnapshot(` + QueryPlan { + Sequence { + Fetch(service: "Subgraph1") { + { + t { + __typename + otherId + } + } + }, + Flatten(path: "t") { + Fetch(service: "Subgraph2") { + { + ... on T { + __typename + otherId + } + } => + { + ... on T { + id + } + } + }, + }, + }, + } + `); + + // This is the main thing this test exists for: making sure we only evaluate a + // single plan for this example. And while it may be hard to see what other + // plans than the one above could be evaluated, some older version of the planner + // where considering a plan consisting of, from `Subgraph1`, fetching key `id` + // in `Subgraph2` using key `otherId`, and then using that `id` key to fetch + // ... field `id` in `Subgraph3`, not realizing that the `id` is what we ultimately + // want and so there is no point in considering path that use it as key. Anyway + // this test ensure this is not considered anymore (considering that later plan + // was not incorrect, but it was adding to the options to evaluate which in some + // cases could impact query planning performance quite a bit). + expect(queryPlanner.lastGeneratedPlanStatistics()?.evaluatedPlanCount).toBe(1); +}); diff --git a/query-planner-js/src/buildPlan.ts b/query-planner-js/src/buildPlan.ts index 1b12aedd86..6fdcf19b36 100644 --- a/query-planner-js/src/buildPlan.ts +++ b/query-planner-js/src/buildPlan.ts @@ -64,7 +64,7 @@ import { advanceSimultaneousPathsWithOperation, Edge, emptyContext, - ExcludedEdges, + ExcludedDestinations, QueryGraph, GraphPath, isPathContext, @@ -384,7 +384,7 @@ class QueryPlanningTraversal { private readonly rootKind: SchemaRootKind, readonly costFunction: CostFunction, initialContext: PathContext, - excludedEdges: ExcludedEdges = [], + excludedDestinations: ExcludedDestinations = [], excludedConditions: ExcludedConditions = [], ) { const { root, federatedQueryGraph } = parameters; @@ -399,7 +399,7 @@ class QueryPlanningTraversal { initialPath, initialContext, this.conditionResolver, - excludedEdges, + excludedDestinations, excludedConditions, ); this.stack = mapOptionsToSelections(selectionSet, initialOptions); @@ -772,7 +772,7 @@ class QueryPlanningTraversal { : computeNonRootFetchGroups(dependencyGraph, tree, this.rootKind); } - private resolveConditionPlan(edge: Edge, context: PathContext, excludedEdges: ExcludedEdges, excludedConditions: ExcludedConditions): ConditionResolution { + private resolveConditionPlan(edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions): ConditionResolution { const bestPlan = new QueryPlanningTraversal( { ...this.parameters, @@ -784,7 +784,7 @@ class QueryPlanningTraversal { 'query', this.costFunction, context, - excludedEdges, + excludedDestinations, addConditionExclusion(excludedConditions, edge.conditions) ).findBestPlan(); // Note that we want to return 'null', not 'undefined', because it's the latter that means "I cannot resolve that @@ -814,6 +814,7 @@ const conditionsMemoizer = (selectionSet: SelectionSet) => ({ conditions: condit class GroupInputs { private readonly perType = new Map(); + onUpdateCallback: () => void = () => {}; constructor( readonly supergraphSchema: Schema, @@ -830,6 +831,7 @@ class GroupInputs { this.perType.set(typeName, typeSelection); } typeSelection.updates().add(selection); + this.onUpdateCallback(); } addAll(other: GroupInputs) { @@ -913,6 +915,7 @@ class FetchGroup { private readonly inputRewrites: FetchDataRewrite[] = []; + private constructor( readonly dependencyGraph: FetchDependencyGraph, public index: number, @@ -928,7 +931,21 @@ class FetchGroup { // key for that. Having it here saves us from re-computing it more than once. readonly subgraphAndMergeAtKey?: string, private cachedCost?: number, + // Cache used to save unecessary recomputation of the `isUseless` method. + private isKnownUseful: boolean = false, ) { + if (this._inputs) { + this._inputs.onUpdateCallback = () => { + // We're trying to avoid the full recomputation of `isUseless` when we're already + // shown that the group is known useful (if it is shown useless, the group is removed, + // so we're not caching that result but it's ok). And `isUseless` basically checks if + // `inputs.contains(selection)`, so if a group is shown useful, it means that there + // is some selections not in the inputs, but as long as we add to selections (and we + // never remove from selections; `MutableSelectionSet` don't have removing methods), + // then this won't change. Only changing inputs may require some recomputation. + this.isKnownUseful = false; + } + } } static create({ @@ -967,6 +984,7 @@ class FetchGroup { ); } + // Clones everything on the group itself, but not it's `_parents` or `_children` links. cloneShallow(newDependencyGraph: FetchDependencyGraph): FetchGroup { return new FetchGroup( newDependencyGraph, @@ -981,6 +999,7 @@ class FetchGroup { this.deferRef, this.subgraphAndMergeAtKey, this.cachedCost, + this.isKnownUseful, ); } @@ -1165,7 +1184,7 @@ class FetchGroup { // If a group is such that everything is fetches is already included in the inputs, then // this group does useless fetches. isUseless(): boolean { - if (!this.inputs || this.mustPreserveSelection) { + if (this.isKnownUseful || !this.inputs || this.mustPreserveSelection) { return false; } @@ -1178,7 +1197,7 @@ class FetchGroup { // But the fetch above _is_ useless, it does only fetch its inputs, and we wouldn't catch this // if we do a raw inclusion check of `selection` into `inputs` // - // We only care about this problem at the top-level of the selections however, so we does that + // We only care about this problem at the top-level of the selections however, so we do that // top-level check manually (instead of just calling `this.inputs.contains(this.selection)`) // but fallback on `contains` for anything deeper. @@ -1199,7 +1218,7 @@ class FetchGroup { const inputSelections = this.inputs.selectionSets().flatMap((s) => s.selections()); // Checks that every selection is contained in the input selections. - return this.selection.selections().every((selection) => { + const isUseless = this.selection.selections().every((selection) => { const conditionInSupergraph = conditionInSupergraphIfInterfaceObject(selection); if (!conditionInSupergraph) { // We're not in the @interfaceObject case described above. We just check that an input selection contains the @@ -1240,6 +1259,9 @@ class FetchGroup { return implementationInputSelections.length > 0 && implementationInputSelections.every((input) => input.selectionSet.contains(subSelectionSet)); }); + + this.isKnownUseful = !isUseless; + return isUseless; } /**