diff --git a/.changeset/friendly-lies-protect.md b/.changeset/friendly-lies-protect.md new file mode 100644 index 0000000000..d70feaf6f5 --- /dev/null +++ b/.changeset/friendly-lies-protect.md @@ -0,0 +1,8 @@ +--- +"@apollo/query-planner": patch +"@apollo/query-graphs": patch +"@apollo/federation-internals": patch +--- + +Improves query planning time in some situations where entities use multiple keys. + \ No newline at end of file diff --git a/composition-js/src/validate.ts b/composition-js/src/validate.ts index 17ae66d351..02494a910a 100644 --- a/composition-js/src/validate.ts +++ b/composition-js/src/validate.ts @@ -43,25 +43,15 @@ import { RootPath, advancePathWithTransition, Transition, - OpGraphPath, - advanceSimultaneousPathsWithOperation, - ExcludedEdges, QueryGraphState, - ExcludedConditions, Unadvanceables, isUnadvanceable, Unadvanceable, noConditionsResolution, - ConditionResolution, - unsatisfiedConditionsResolution, - ConditionResolver, - cachingConditionResolver, - PathContext, - addConditionExclusion, - SimultaneousPathsWithLazyIndirectPaths, - advanceOptionsToString, TransitionPathWithLazyIndirectPaths, RootVertex, + simpleValidationConditionResolver, + ConditionResolver, } from "@apollo/query-graphs"; import { CompositionHint, HINTS } from "./hints"; import { ASTNode, GraphQLError, print } from "graphql"; @@ -330,7 +320,7 @@ export function computeSubgraphPaths( } { try { assert(!supergraphPath.hasAnyEdgeConditions(), () => `A supergraph path should not have edge condition paths (as supergraph edges should not have conditions): ${supergraphPath}`); - const conditionResolver = new ConditionValidationResolver(supergraphSchema, subgraphs); + const conditionResolver = simpleValidationConditionResolver({ supergraph: supergraphSchema, queryGraph: subgraphs, withCaching: true }); const initialState = ValidationState.initial({supergraphAPI: supergraphPath.graph, kind: supergraphPath.root.rootKind, subgraphs, conditionResolver}); const context = new ValidationContext(supergraphSchema); let state = initialState; @@ -431,11 +421,11 @@ export class ValidationState { supergraphAPI: QueryGraph, kind: SchemaRootKind, subgraphs: QueryGraph, - conditionResolver: ConditionValidationResolver, + conditionResolver: ConditionResolver, }) { return new ValidationState( GraphPath.fromGraphRoot(supergraphAPI, kind)!, - initialSubgraphPaths(kind, subgraphs).map((p) => TransitionPathWithLazyIndirectPaths.initial(p, conditionResolver.resolver)), + initialSubgraphPaths(kind, subgraphs).map((p) => TransitionPathWithLazyIndirectPaths.initial(p, conditionResolver)), ); } @@ -586,7 +576,7 @@ function isSupersetOrEqual(maybeSuperset: string[], other: string[]): boolean { } class ValidationTraversal { - private readonly conditionResolver: ConditionValidationResolver; + private readonly conditionResolver: ConditionResolver; // The stack contains all states that aren't terminal. private readonly stack: ValidationState[] = []; @@ -604,7 +594,11 @@ class ValidationTraversal { supergraphAPI: QueryGraph, subgraphs: QueryGraph ) { - this.conditionResolver = new ConditionValidationResolver(supergraphSchema, subgraphs); + this.conditionResolver = simpleValidationConditionResolver({ + supergraph: supergraphSchema, + queryGraph: subgraphs, + withCaching: true, + }); supergraphAPI.rootKinds().forEach((kind) => this.stack.push(ValidationState.initial({ supergraphAPI, kind, @@ -680,87 +674,3 @@ class ValidationTraversal { debug.groupEnd(); } } - -class ConditionValidationState { - constructor( - // Selection that belongs to the condition we're validating. - readonly selection: Selection, - // All the possible "simultaneous paths" we could be in the subgraph when we reach this state selection. - readonly subgraphOptions: SimultaneousPathsWithLazyIndirectPaths[] - ) {} - - toString(): string { - return `${this.selection} <=> ${advanceOptionsToString(this.subgraphOptions)}`; - } -} - -class ConditionValidationResolver { - readonly resolver: ConditionResolver; - - constructor( - private readonly supergraphSchema: Schema, - private readonly federatedQueryGraph: QueryGraph - ) { - this.resolver = cachingConditionResolver( - federatedQueryGraph, - ( - edge: Edge, - context: PathContext, - excludedEdges: ExcludedEdges, - excludedConditions: ExcludedConditions - ) => this.validateConditions(edge, context, excludedEdges, excludedConditions) - ); - } - - private validateConditions( - edge: Edge, - context: PathContext, - excludedEdges: ExcludedEdges, - excludedConditions: ExcludedConditions - ): ConditionResolution { - const conditions = edge.conditions!; - excludedConditions = addConditionExclusion(excludedConditions, conditions); - - const initialPath: OpGraphPath = GraphPath.create(this.federatedQueryGraph, edge.head); - const initialOptions = [new SimultaneousPathsWithLazyIndirectPaths([initialPath], context, this.resolver, excludedEdges, excludedConditions)]; - - const stack: ConditionValidationState[] = []; - for (const selection of conditions.selections()) { - stack.push(new ConditionValidationState(selection, initialOptions)); - } - - while (stack.length > 0) { - const state = stack.pop()!; - const newStates = this.advanceState(state); - if (newStates === null) { - return unsatisfiedConditionsResolution; - } - newStates.forEach(s => stack.push(s)); - } - // If we exhaust the stack, it means we've been able to find "some" path for every possible selection in the condition, so the - // condition is validated. Note that we use a cost of 1 for all conditions as we don't care about efficiency. - return { satisfied: true, cost: 1 }; - } - - private advanceState(state: ConditionValidationState): ConditionValidationState[] | null { - let newOptions: SimultaneousPathsWithLazyIndirectPaths[] = []; - for (const paths of state.subgraphOptions) { - const pathsOptions = advanceSimultaneousPathsWithOperation( - this.supergraphSchema, - paths, - state.selection.element, - ); - if (!pathsOptions) { - continue; - } - newOptions = newOptions.concat(pathsOptions); - } - - // If we got no options, it means that particular selection of the conditions cannot be satisfied, so the - // overall condition cannot. - if (newOptions.length === 0) { - return null; - } - return state.selection.selectionSet ? state.selection.selectionSet.selections().map(s => new ConditionValidationState(s, newOptions)) : []; - } -} diff --git a/internals-js/src/operations.ts b/internals-js/src/operations.ts index cadc3099f4..4762b566bb 100644 --- a/internals-js/src/operations.ts +++ b/internals-js/src/operations.ts @@ -1792,6 +1792,11 @@ export class SelectionSet { : ContainsResult.STRICTLY_CONTAINED; } + containsTopLevelField(field: Field): boolean { + const selection = this._keyedSelections.get(field.key()); + return !!selection && selection.element.equals(field); + } + /** * Returns a selection set that correspond to this selection set but where any of the selections in the * provided selection set have been remove. diff --git a/query-graphs-js/src/__tests__/graphPath.test.ts b/query-graphs-js/src/__tests__/graphPath.test.ts new file mode 100644 index 0000000000..f544daaae0 --- /dev/null +++ b/query-graphs-js/src/__tests__/graphPath.test.ts @@ -0,0 +1,216 @@ +import { + Field, + FieldDefinition, + Schema, + assert, + buildSupergraphSchema, +} from "@apollo/federation-internals"; +import { + GraphPath, + OpGraphPath, + SimultaneousPathsWithLazyIndirectPaths, + advanceSimultaneousPathsWithOperation, + createInitialOptions +} from "../graphPath"; +import { QueryGraph, Vertex, buildFederatedQueryGraph } from "../querygraph"; +import { emptyContext } from "../pathContext"; +import { simpleValidationConditionResolver } from "../conditionsValidation"; + +function parseSupergraph(subgraphs: number, schema: string): { supergraph: Schema, api: Schema, queryGraph: QueryGraph } { + assert(subgraphs >= 1, 'Should have at least 1 subgraph'); + const header = ` + schema + @link(url: "https://specs.apollo.dev/link/v1.0") + @link(url: "https://specs.apollo.dev/join/v0.3", for: EXECUTION) + { + query: Query + } + + directive @join__enumValue(graph: join__Graph!) repeatable on ENUM_VALUE + directive @join__field(graph: join__Graph, requires: join__FieldSet, provides: join__FieldSet, type: String, external: Boolean, override: String, usedOverridden: Boolean) repeatable on FIELD_DEFINITION | INPUT_FIELD_DEFINITION + directive @join__graph(name: String!, url: String!) on ENUM_VALUE + directive @join__implements(graph: join__Graph!, interface: String!) repeatable on OBJECT | INTERFACE + directive @join__type(graph: join__Graph!, key: join__FieldSet, extension: Boolean! = false, resolvable: Boolean! = true, isInterfaceObject: Boolean! = false) repeatable on OBJECT | INTERFACE | UNION | ENUM | INPUT_OBJECT | SCALAR + directive @join__unionMember(graph: join__Graph!, member: String!) repeatable on UNION + directive @link(url: String, as: String, for: link__Purpose, import: [link__Import]) repeatable on SCHEMA + + scalar join__FieldSet + + scalar link__Import + + enum link__Purpose { + SECURITY + EXECUTION + } + + enum join__Graph { + ${[...Array(subgraphs).keys()].map((n) => `S${n+1} @join__graph(name: "S${n+1}", url: "https://S${n+1}")`).join('\n')} + } + + `; + + try { + const supergraph = buildSupergraphSchema(header + schema)[0]; + return { + supergraph, + api: supergraph.toAPISchema(), + queryGraph: buildFederatedQueryGraph(supergraph, true), + }; + } catch (e) { + throw new Error('Error parsing supergraph schema:\n' + e.toString()); + } +} + +function createOptions(supergraph: Schema, queryGraph: QueryGraph): SimultaneousPathsWithLazyIndirectPaths[] { + // We know we only use `Query` in the supergraph, so there is only that as root. + const root = queryGraph.roots()[0]; + const initialPath: OpGraphPath = GraphPath.create(queryGraph, root); + return createInitialOptions( + initialPath, + emptyContext, + simpleValidationConditionResolver({ supergraph, queryGraph }), + [], + [], + ); +} + +function field(schema: Schema, coordinate: string): Field { + const def = schema.elementByCoordinate(coordinate) as FieldDefinition; + return new Field(def); +} + +describe("advanceSimultaneousPathsWithOperation", () => { + test("do not use key `x` to fetch `x`", () => { + const { supergraph, api, queryGraph } = parseSupergraph(3, ` + type Query + @join__type(graph: S1) + { + t: T @join__field(graph: S1) + } + + type T + @join__type(graph: S1) + @join__type(graph: S2, key: "otherId") + @join__type(graph: S2, key: "id") + @join__type(graph: S3, key: "id") + { + otherId: ID! @join__field(graph: S1) @join__field(graph: S2) + id: ID! @join__field(graph: S2) @join__field(graph: S3) + } + `); + + // Picking the first initial, the one going to S1 + const initial = createOptions(supergraph, queryGraph)[0]; + + // Then picking `t`, which should be just the one option of picking it in S1 at this point. + const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t")); + assert(allAfterT, 'Should have advanced correctly'); + expect(allAfterT).toHaveLength(1); + const afterT = allAfterT[0]; + expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`); + + // Checking that, at this point, we technically have 2 options: + // 1. we can go to S2 using `otherId`. + // 2. we can go to S3 using `id`, assuming we first get `id` from S2 (using `otherId`). + const indirect = afterT.indirectOptions(afterT.context, 0); + expect(indirect.paths).toHaveLength(2); + expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) (types: [T])`); + expect(indirect.paths[1].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id } ⊢ key()]--> T(S3) (types: [T])`); + + const allForId = advanceSimultaneousPathsWithOperation(supergraph, afterT, field(api, "T.id")); + assert(allForId, 'Should have advanced correctly'); + + // Here, `id` is a direct path from both of our indirect paths. However, it makes no sense to use the 2nd + // indirect path above, since the condition to get to `S3` was `id`, and this means another indirect path + // is able to get to `id` more directly (the first one in this case). + // So ultimately, we should only keep the 1st option. + expect(allForId).toHaveLength(1); + const forId = allForId[0]; + expect(forId.toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) --[id]--> ID(S2)`); + }); + + test("do not use key containing `x` to fetch `x`", () => { + // Similar to the previous test, but the key used is not exactly the fetch field, it only contains + // it (but the optimisation should still work). + const { supergraph, api, queryGraph } = parseSupergraph(3, ` + type Query + @join__type(graph: S1) + { + t: T @join__field(graph: S1) + } + + type T + @join__type(graph: S1) + @join__type(graph: S2, key: "otherId") + @join__type(graph: S2, key: "id1 id2") + @join__type(graph: S3, key: "id1 id2") + { + otherId: ID! @join__field(graph: S1) @join__field(graph: S2) + id1: ID! @join__field(graph: S2) @join__field(graph: S3) + id2: ID! @join__field(graph: S2) @join__field(graph: S3) + } + `); + + // Picking the first initial, the one going to S1 + const initial = createOptions(supergraph, queryGraph)[0]; + + // Then picking `t`, which should be just the one option of picking it in S1 at this point. + const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t")); + assert(allAfterT, 'Should have advanced correctly'); + expect(allAfterT).toHaveLength(1); + const afterT = allAfterT[0]; + expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`); + + // Checking that, at this point, we technically have 2 options: + // 1. we can go to S2 using `otherId`. + // 2. we can go to S3 using `id1 id2`, assuming we first get `id1 id2` from S2 (using `otherId`). + const indirect = afterT.indirectOptions(afterT.context, 0); + expect(indirect.paths).toHaveLength(2); + expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) (types: [T])`); + expect(indirect.paths[1].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id1 id2 } ⊢ key()]--> T(S3) (types: [T])`); + + const allForId = advanceSimultaneousPathsWithOperation(supergraph, afterT, field(api, "T.id1")); + assert(allForId, 'Should have advanced correctly'); + + // Here, `id1` is a direct path from both of our indirect paths. However, it makes no sense to use the 2nd + // indirect path above, since the condition to get to `S3` was `id1 id2`, which includes `id1`. + expect(allForId).toHaveLength(1); + const forId = allForId[0]; + expect(forId.toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) --[id1]--> ID(S2)`); + }); + + test("avoids indirect path that needs a key to the same subgraph to validate its condition", () => { + const { supergraph, api, queryGraph } = parseSupergraph(2, ` + type Query + @join__type(graph: S1) + { + t: T @join__field(graph: S1) + } + + type T + @join__type(graph: S1) + @join__type(graph: S2, key: "id1") + @join__type(graph: S2, key: "id2") + { + id1: ID! @join__field(graph: S2) + id2: ID! @join__field(graph: S1) @join__field(graph: S2) + } + `); + + // Picking the first initial, the one going to S1 + const initial = createOptions(supergraph, queryGraph)[0]; + + // Then picking `t`, which should be just the one option of picking it in S1 at this point. + const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t")); + assert(allAfterT, 'Should have advanced correctly'); + expect(allAfterT).toHaveLength(1); + const afterT = allAfterT[0]; + expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`); + + // Technically, the `id1` key could be used to go to S2 by first getting `id1` from S2 using `id2`, but + // that's obviously unecessary to consider since we can just use `id2` to go to S2 in the first place. + const indirect = afterT.indirectOptions(afterT.context, 0); + expect(indirect.paths).toHaveLength(1); + expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id2 } ⊢ key()]--> T(S2) (types: [T])`); + }); +}); diff --git a/query-graphs-js/src/conditionsCaching.ts b/query-graphs-js/src/conditionsCaching.ts index 25d80aa240..3168e0492d 100644 --- a/query-graphs-js/src/conditionsCaching.ts +++ b/query-graphs-js/src/conditionsCaching.ts @@ -1,5 +1,5 @@ import { assert } from "@apollo/federation-internals"; -import { ConditionResolution, ConditionResolver, ExcludedConditions, ExcludedEdges, sameExcludedEdges } from "./graphPath"; +import { ConditionResolution, ConditionResolver, ExcludedConditions, ExcludedDestinations, sameExcludedDestinations } from "./graphPath"; import { PathContext } from "./pathContext"; import { Edge, QueryGraph, QueryGraphState } from "./querygraph"; @@ -13,8 +13,8 @@ export function cachingConditionResolver(graph: QueryGraph, resolver: ConditionR // when we have no excluded edges, we'd only ever use the cache for the first key of every type. However, // as the algorithm always try keys in the same order (the order of the edges in the query graph), including // the excluded edges we see on the first ever call is actually the proper thing to do. - const cache = new QueryGraphState(graph); - return (edge: Edge, context: PathContext, excludedEdges: ExcludedEdges, excludedConditions: ExcludedConditions) => { + const cache = new QueryGraphState(graph); + return (edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions) => { assert(edge.conditions, 'Should not have been called for edge without conditions'); // We don't cache if there is a context or excluded conditions because those would impact the resolution and @@ -26,18 +26,18 @@ export function cachingConditionResolver(graph: QueryGraph, resolver: ConditionR // cached value `pathTree` when the context is not empty. That said, the context is about active @include/@skip and it's not use // that commonly, so this is probably not an urgent improvement. if (!context.isEmpty() || excludedConditions.length > 0) { - return resolver(edge, context, excludedEdges, excludedConditions); + return resolver(edge, context, excludedDestinations, excludedConditions); } const cachedResolutionAndExcludedEdges = cache.getEdgeState(edge); if (cachedResolutionAndExcludedEdges) { const [cachedResolution, forExcludedEdges] = cachedResolutionAndExcludedEdges; - return sameExcludedEdges(forExcludedEdges, excludedEdges) + return sameExcludedDestinations(forExcludedEdges, excludedDestinations) ? cachedResolution - : resolver(edge, context, excludedEdges, excludedConditions); + : resolver(edge, context, excludedDestinations, excludedConditions); } else { - const resolution = resolver(edge, context, excludedEdges, excludedConditions); - cache.setEdgeState(edge, [resolution, excludedEdges]); + const resolution = resolver(edge, context, excludedDestinations, excludedConditions); + cache.setEdgeState(edge, [resolution, excludedDestinations]); return resolution; } }; diff --git a/query-graphs-js/src/conditionsValidation.ts b/query-graphs-js/src/conditionsValidation.ts new file mode 100644 index 0000000000..31cfa8a0c0 --- /dev/null +++ b/query-graphs-js/src/conditionsValidation.ts @@ -0,0 +1,108 @@ +import { Schema, Selection } from "@apollo/federation-internals"; +import { + ConditionResolution, + ConditionResolver, + ExcludedConditions, + ExcludedDestinations, + GraphPath, + OpGraphPath, + SimultaneousPathsWithLazyIndirectPaths, + addConditionExclusion, + advanceOptionsToString, + advanceSimultaneousPathsWithOperation, + unsatisfiedConditionsResolution +} from "./graphPath"; +import { Edge, QueryGraph } from "./querygraph"; +import { PathContext } from "./pathContext"; +import { cachingConditionResolver } from "./conditionsCaching"; + +class ConditionValidationState { + constructor( + // Selection that belongs to the condition we're validating. + readonly selection: Selection, + // All the possible "simultaneous paths" we could be in the subgraph when we reach this state selection. + readonly subgraphOptions: SimultaneousPathsWithLazyIndirectPaths[] + ) {} + + advance(supergraph: Schema): ConditionValidationState[] | null { + let newOptions: SimultaneousPathsWithLazyIndirectPaths[] = []; + for (const paths of this.subgraphOptions) { + const pathsOptions = advanceSimultaneousPathsWithOperation( + supergraph, + paths, + this.selection.element, + ); + if (!pathsOptions) { + continue; + } + newOptions = newOptions.concat(pathsOptions); + } + + // If we got no options, it means that particular selection of the conditions cannot be satisfied, so the + // overall condition cannot. + if (newOptions.length === 0) { + return null; + } + return this.selection.selectionSet ? this.selection.selectionSet.selections().map(s => new ConditionValidationState(s, newOptions)) : []; + } + + toString(): string { + return `${this.selection} <=> ${advanceOptionsToString(this.subgraphOptions)}`; + } +} + +/** + * Creates a `ConditionResolver` that only validates that the condition can be satisfied, but without + * trying compare/evaluate the potential various ways to validate said conditions. Concretely, the + * `ConditionResolution` values returned by the create resolver will never contain a `pathTree` (or + * an `unsatisfiedConditionReason` for that matter) and the cost will always default to 1 if the + * conditions are satisfied. + */ +export function simpleValidationConditionResolver({ + supergraph, + queryGraph, + withCaching, +}: { + supergraph: Schema, + queryGraph: QueryGraph, + withCaching?: boolean, +}): ConditionResolver { + const resolver = ( + edge: Edge, + context: PathContext, + excludedDestinations: ExcludedDestinations, + excludedConditions: ExcludedConditions, + ): ConditionResolution => { + const conditions = edge.conditions!; + excludedConditions = addConditionExclusion(excludedConditions, conditions); + + const initialPath: OpGraphPath = GraphPath.create(queryGraph, edge.head); + const initialOptions = [ + new SimultaneousPathsWithLazyIndirectPaths( + [initialPath], + context, + simpleValidationConditionResolver({supergraph, queryGraph, withCaching}), + excludedDestinations, + excludedConditions + ) + ]; + + const stack: ConditionValidationState[] = []; + for (const selection of conditions.selections()) { + stack.push(new ConditionValidationState(selection, initialOptions)); + } + + while (stack.length > 0) { + const state = stack.pop()!; + const newStates = state.advance(supergraph); + if (newStates === null) { + return unsatisfiedConditionsResolution; + } + newStates.forEach(s => stack.push(s)); + } + // If we exhaust the stack, it means we've been able to find "some" path for every possible selection in the condition, so the + // condition is validated. Note that we use a cost of 1 for all conditions as we don't care about efficiency. + return { satisfied: true, cost: 1 }; + }; + return withCaching ? cachingConditionResolver(queryGraph, resolver) : resolver; +} diff --git a/query-graphs-js/src/graphPath.ts b/query-graphs-js/src/graphPath.ts index 6b7aad4ccb..ba8ef2ea18 100644 --- a/query-graphs-js/src/graphPath.ts +++ b/query-graphs-js/src/graphPath.ts @@ -791,7 +791,8 @@ export class GraphPath` : ''; - return `${isRoot ? '' : this.root}${pathStr}${deferStr} (types: [${this.props.runtimeTypesOfTail.join(', ')}])`; + const typeStr = this.props.runtimeTypesOfTail.length > 0 ? ` (types: [${this.props.runtimeTypesOfTail.join(', ')}])` : ''; + return `${isRoot ? '' : this.root}${pathStr}${deferStr}${typeStr}`; } } @@ -866,7 +867,7 @@ export function traversePath( // Note that ConditionResolver are guaranteed to be only called for edge with conditions. export type ConditionResolver = - (edge: Edge, context: PathContext, excludedEdges: ExcludedEdges, excludedConditions: ExcludedConditions) => ConditionResolution; + (edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions) => ConditionResolution; export type ConditionResolution = { @@ -1169,30 +1170,28 @@ function createLazyTransitionOptions( )); } -// A set of excluded edges, that is a pair of a head vertex index and an edge index (since edge indexes are relative to their vertex). -export type ExcludedEdges = readonly [number, number][]; +// A "set" of excluded destinations, that is subgraph name. Note that we use an array instead of set because this is used +// in pretty hot paths (the whole path computation is CPU intensive) and will basically always be tiny (it's bounded +// by the number of distinct key on a given type, so usually 2-3 max; even in completely unrealistic cases, it's hard bounded +// by the number of subgraph), so array is going to perform a lot better than `Set` in practice. +export type ExcludedDestinations = readonly string[]; -function isEdgeExcluded(edge: Edge, excluded: ExcludedEdges): boolean { - return excluded.some(([vIdx, eIdx]) => edge.head.index === vIdx && edge.index === eIdx); +function isDestinationExcluded(destination: string, excluded: ExcludedDestinations): boolean { + return excluded.includes(destination); } -export function sameExcludedEdges(ex1: ExcludedEdges, ex2: ExcludedEdges): boolean { +export function sameExcludedDestinations(ex1: ExcludedDestinations, ex2: ExcludedDestinations): boolean { if (ex1 === ex2) { return true; } if (ex1.length !== ex2.length) { return false; } - for (let i = 0; i < ex1.length; ++i) { - if (ex1[i][0] !== ex2[i][0] || ex1[i][1] !== ex2[i][1]) { - return false; - } - } - return true; + return ex1.every((d) => ex2.includes(d)); } -function addEdgeExclusion(excluded: ExcludedEdges, newExclusion: Edge): ExcludedEdges { - return excluded.concat([[newExclusion.head.index, newExclusion.index]]); +function addDestinationExclusion(excluded: ExcludedDestinations, destination: string): ExcludedDestinations { + return excluded.includes(destination) ? excluded : excluded.concat(destination); } export type ExcludedConditions = readonly SelectionSet[]; @@ -1233,7 +1232,7 @@ function advancePathWithNonCollectingAndTypePreservingTransitions, context: PathContext, conditionResolver: ConditionResolver, - excludedEdges: ExcludedEdges, + excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions, convertTransitionWithCondition: (transition: Transition, context: PathContext) => TTrigger, triggerToEdge: (graph: QueryGraph, vertex: Vertex, t: TTrigger) => Edge | null | undefined @@ -1299,7 +1298,9 @@ function advancePathWithNonCollectingAndTypePreservingTransitions `From ${toAdvance}:`); for (const edge of nextEdges) { debug.group(() => `Testing edge ${edge}`); - if (isEdgeExcluded(edge, excludedEdges)) { + const target = edge.tail; + + if (isDestinationExcluded(target.source, excludedDestinations)) { debug.groupEnd(`Ignored: edge is excluded`); continue; } @@ -1308,7 +1309,6 @@ function advancePathWithNonCollectingAndTypePreservingTransitions `Validating conditions ${edge.conditions}`); + // As we validate the condition for this edge, it might be necessary to jump to another subgraph, but if for that + // we need to jump to the same subgraph we're trying to get to, then it means there is another, shorter way to + // go to our destination and we can return that shorter path, not the one with the edge we're trying. const conditionResolution = canSatisfyConditions( toAdvance, edge, conditionResolver, context, - addEdgeExclusion(excludedEdges, edge), + addDestinationExclusion(excludedDestinations, target.source), excludedConditions, ); if (conditionResolution.satisfied) { @@ -1715,7 +1718,7 @@ function canSatisfyConditions { readonly paths: SimultaneousPaths, readonly context: PathContext, readonly conditionResolver: ConditionResolver, - readonly excludedNonCollectingEdges: ExcludedEdges = [], + readonly excludedNonCollectingEdges: ExcludedDestinations = [], readonly excludedConditionsOnNonCollectingEdges: ExcludedConditions = [], ) { this.lazilyComputedIndirectPaths = new Array(paths.length); @@ -1842,6 +1845,40 @@ export function advanceOptionsToString(options: (SimultaneousPaths | Simult return '[\n ' + options.map(opt => Array.isArray(opt) ? simultaneousPathsToString(opt, " ") : opt.toString()).join('\n ') + '\n]'; } +// Given a list of just computed indirect paths and a field that we're trying to advance after those paths, this +// method fields any path that should note be considered. +// +// Currently, this handle the case where the key used at the end of the indirect path contains (at top level) the field +// being queried. Or to make this more concrete, if we're trying to collect field `id`, and the path last edge was using +// key `id`, then we can ignore that path because this imply that there is a way to `id` "some other way" (also see +// the `does not evaluate plans relying on a key field to fetch that same field` test in `buildPlan` for more details). +function filterNonCollectingPathsForField( + paths: OpIndirectPaths, + field: Field, +): OpIndirectPaths { + // We only handle leafs. Things are more complex non-leaf. + if (!field.isLeafField()) { + return paths; + } + + const filtered = paths.paths.filter((p) => { + const lastEdge = p.lastEdge(); + if (!lastEdge || lastEdge.transition.kind !== 'KeyResolution') { + return true; + } + + const conditions = lastEdge.conditions; + return !(conditions && conditions.containsTopLevelField(field)); + }); + return filtered.length === paths.paths.length + ? paths + : { + ...paths, + paths: filtered + }; + +} + // Returns undefined if the operation cannot be dealt with/advanced. Otherwise, it returns a list of options we can be in after advancing the operation, each option // being a set of simultaneous paths in the subgraphs (a single path in the simple case, but type exploding may make us explore multiple paths simultaneously). // The lists of options can be empty, which has the special meaning that the operation is guaranteed to have no results (it corresponds to unsatisfiable conditions), @@ -1900,7 +1937,10 @@ export function advanceSimultaneousPathsWithOperation( if (operation.kind === 'Field') { debug.group(`Computing indirect paths:`); // Then adds whatever options can be obtained by taking some non-collecting edges first. - const pathsWithNonCollecting = subgraphSimultaneousPaths.indirectOptions(updatedContext, i); + const pathsWithNonCollecting = filterNonCollectingPathsForField( + subgraphSimultaneousPaths.indirectOptions(updatedContext, i), + operation + ); debug.groupEnd(() => pathsWithNonCollecting.paths.length == 0 ? `no indirect paths` : `${pathsWithNonCollecting.paths.length} indirect paths`); if (pathsWithNonCollecting.paths.length > 0) { debug.group('Validating indirect options:'); @@ -1996,7 +2036,7 @@ export function createInitialOptions( initialPath: OpGraphPath, initialContext: PathContext, conditionResolver: ConditionResolver, - excludedEdges: ExcludedEdges, + excludedEdges: ExcludedDestinations, excludedConditions: ExcludedConditions, ): SimultaneousPathsWithLazyIndirectPaths[] { const lazyInitialPath = new SimultaneousPathsWithLazyIndirectPaths( diff --git a/query-graphs-js/src/index.ts b/query-graphs-js/src/index.ts index 53d6b0c222..3907233a72 100644 --- a/query-graphs-js/src/index.ts +++ b/query-graphs-js/src/index.ts @@ -5,3 +5,4 @@ export * from './graphviz'; export * from './transition'; export * from './pathContext'; export * from './conditionsCaching'; +export * from './conditionsValidation'; diff --git a/query-planner-js/src/__tests__/buildPlan.defer.test.ts b/query-planner-js/src/__tests__/buildPlan.defer.test.ts index 1132631a91..ef807df4e3 100644 --- a/query-planner-js/src/__tests__/buildPlan.defer.test.ts +++ b/query-planner-js/src/__tests__/buildPlan.defer.test.ts @@ -3613,7 +3613,6 @@ test('@defer only the key of an entity', () => { Fetch(service: "Subgraph1") { { t { - __typename v0 id } diff --git a/query-planner-js/src/__tests__/buildPlan.test.ts b/query-planner-js/src/__tests__/buildPlan.test.ts index 400d4a5ec9..5a1d42690b 100644 --- a/query-planner-js/src/__tests__/buildPlan.test.ts +++ b/query-planner-js/src/__tests__/buildPlan.test.ts @@ -6247,7 +6247,6 @@ test('correctly generate plan built from some non-individually optimal branch op // // Anyway, this test make sure we do correctly generate the plan using 1.b and 2.a, and do // not ignore 1.b in favor of 1.a in particular (which a bug did at one point). - const subgraph1 = { name: 'Subgraph1', typeDefs: gql` @@ -6458,3 +6457,90 @@ test('does not error on some complex fetch group dependencies', () => { } `); }); + + +test('does not evaluate plans relying on a key field to fetch that same field', () => { + const subgraph1 = { + name: 'Subgraph1', + typeDefs: gql` + type Query { + t: T + } + + type T @key(fields: "otherId") { + otherId: ID! + } + ` + } + + const subgraph2 = { + name: 'Subgraph2', + typeDefs: gql` + type T @key(fields: "id") @key(fields: "otherId") { + id: ID! + otherId: ID! + } + ` + } + + const subgraph3 = { + name: 'Subgraph3', + typeDefs: gql` + type T @key(fields: "id") { + id: ID! + } + ` + } + + const [api, queryPlanner] = composeAndCreatePlanner(subgraph1, subgraph2, subgraph3); + const operation = operationFromDocument(api, gql` + { + t { + id + } + } + `); + + const plan = queryPlanner.buildQueryPlan(operation); + expect(plan).toMatchInlineSnapshot(` + QueryPlan { + Sequence { + Fetch(service: "Subgraph1") { + { + t { + __typename + otherId + } + } + }, + Flatten(path: "t") { + Fetch(service: "Subgraph2") { + { + ... on T { + __typename + otherId + } + } => + { + ... on T { + id + } + } + }, + }, + }, + } + `); + + // This is the main thing this test exists for: making sure we only evaluate a + // single plan for this example. And while it may be hard to see what other + // plans than the one above could be evaluated, some older version of the planner + // where considering a plan consisting of, from `Subgraph1`, fetching key `id` + // in `Subgraph2` using key `otherId`, and then using that `id` key to fetch + // ... field `id` in `Subgraph3`, not realizing that the `id` is what we ultimately + // want and so there is no point in considering path that use it as key. Anyway + // this test ensure this is not considered anymore (considering that later plan + // was not incorrect, but it was adding to the options to evaluate which in some + // cases could impact query planning performance quite a bit). + expect(queryPlanner.lastGeneratedPlanStatistics()?.evaluatedPlanCount).toBe(1); +}); diff --git a/query-planner-js/src/buildPlan.ts b/query-planner-js/src/buildPlan.ts index 1b12aedd86..6fdcf19b36 100644 --- a/query-planner-js/src/buildPlan.ts +++ b/query-planner-js/src/buildPlan.ts @@ -64,7 +64,7 @@ import { advanceSimultaneousPathsWithOperation, Edge, emptyContext, - ExcludedEdges, + ExcludedDestinations, QueryGraph, GraphPath, isPathContext, @@ -384,7 +384,7 @@ class QueryPlanningTraversal { private readonly rootKind: SchemaRootKind, readonly costFunction: CostFunction, initialContext: PathContext, - excludedEdges: ExcludedEdges = [], + excludedDestinations: ExcludedDestinations = [], excludedConditions: ExcludedConditions = [], ) { const { root, federatedQueryGraph } = parameters; @@ -399,7 +399,7 @@ class QueryPlanningTraversal { initialPath, initialContext, this.conditionResolver, - excludedEdges, + excludedDestinations, excludedConditions, ); this.stack = mapOptionsToSelections(selectionSet, initialOptions); @@ -772,7 +772,7 @@ class QueryPlanningTraversal { : computeNonRootFetchGroups(dependencyGraph, tree, this.rootKind); } - private resolveConditionPlan(edge: Edge, context: PathContext, excludedEdges: ExcludedEdges, excludedConditions: ExcludedConditions): ConditionResolution { + private resolveConditionPlan(edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions): ConditionResolution { const bestPlan = new QueryPlanningTraversal( { ...this.parameters, @@ -784,7 +784,7 @@ class QueryPlanningTraversal { 'query', this.costFunction, context, - excludedEdges, + excludedDestinations, addConditionExclusion(excludedConditions, edge.conditions) ).findBestPlan(); // Note that we want to return 'null', not 'undefined', because it's the latter that means "I cannot resolve that @@ -814,6 +814,7 @@ const conditionsMemoizer = (selectionSet: SelectionSet) => ({ conditions: condit class GroupInputs { private readonly perType = new Map(); + onUpdateCallback: () => void = () => {}; constructor( readonly supergraphSchema: Schema, @@ -830,6 +831,7 @@ class GroupInputs { this.perType.set(typeName, typeSelection); } typeSelection.updates().add(selection); + this.onUpdateCallback(); } addAll(other: GroupInputs) { @@ -913,6 +915,7 @@ class FetchGroup { private readonly inputRewrites: FetchDataRewrite[] = []; + private constructor( readonly dependencyGraph: FetchDependencyGraph, public index: number, @@ -928,7 +931,21 @@ class FetchGroup { // key for that. Having it here saves us from re-computing it more than once. readonly subgraphAndMergeAtKey?: string, private cachedCost?: number, + // Cache used to save unecessary recomputation of the `isUseless` method. + private isKnownUseful: boolean = false, ) { + if (this._inputs) { + this._inputs.onUpdateCallback = () => { + // We're trying to avoid the full recomputation of `isUseless` when we're already + // shown that the group is known useful (if it is shown useless, the group is removed, + // so we're not caching that result but it's ok). And `isUseless` basically checks if + // `inputs.contains(selection)`, so if a group is shown useful, it means that there + // is some selections not in the inputs, but as long as we add to selections (and we + // never remove from selections; `MutableSelectionSet` don't have removing methods), + // then this won't change. Only changing inputs may require some recomputation. + this.isKnownUseful = false; + } + } } static create({ @@ -967,6 +984,7 @@ class FetchGroup { ); } + // Clones everything on the group itself, but not it's `_parents` or `_children` links. cloneShallow(newDependencyGraph: FetchDependencyGraph): FetchGroup { return new FetchGroup( newDependencyGraph, @@ -981,6 +999,7 @@ class FetchGroup { this.deferRef, this.subgraphAndMergeAtKey, this.cachedCost, + this.isKnownUseful, ); } @@ -1165,7 +1184,7 @@ class FetchGroup { // If a group is such that everything is fetches is already included in the inputs, then // this group does useless fetches. isUseless(): boolean { - if (!this.inputs || this.mustPreserveSelection) { + if (this.isKnownUseful || !this.inputs || this.mustPreserveSelection) { return false; } @@ -1178,7 +1197,7 @@ class FetchGroup { // But the fetch above _is_ useless, it does only fetch its inputs, and we wouldn't catch this // if we do a raw inclusion check of `selection` into `inputs` // - // We only care about this problem at the top-level of the selections however, so we does that + // We only care about this problem at the top-level of the selections however, so we do that // top-level check manually (instead of just calling `this.inputs.contains(this.selection)`) // but fallback on `contains` for anything deeper. @@ -1199,7 +1218,7 @@ class FetchGroup { const inputSelections = this.inputs.selectionSets().flatMap((s) => s.selections()); // Checks that every selection is contained in the input selections. - return this.selection.selections().every((selection) => { + const isUseless = this.selection.selections().every((selection) => { const conditionInSupergraph = conditionInSupergraphIfInterfaceObject(selection); if (!conditionInSupergraph) { // We're not in the @interfaceObject case described above. We just check that an input selection contains the @@ -1240,6 +1259,9 @@ class FetchGroup { return implementationInputSelections.length > 0 && implementationInputSelections.every((input) => input.selectionSet.contains(subSelectionSet)); }); + + this.isKnownUseful = !isUseless; + return isUseless; } /**