Query planning performance improvements (#2610)

Implements a few performance improvement for query plan computations: 1. profiling of some slow query planning shows `FetchGroup.isUseless` as one of the hot path. This commit caches the result of this method for a group, and only invalid that cache when we know the result may needs to be recomputed. On the planning of some queries, this is shown to provide a 15% improvement to query planning time. 2. when a type has multiple keys, the query planning was sometimes considering an option where some key `x` was used to get field `y` but then key `y` was used to get that same `y` field from another subgraph. This is obviously not very useful, and we know we can ignore those paths as the 1st part of those path already does what we want. But considering those (useless) options, while harmless for correction, was in some case drastically increasing the number of plans that were evaluated, leading to long query planning times.
apollographql · Jun 21, 2023 · 7ac8345 · 7ac8345
1 parent 529ea34
commit 7ac8345
Show file tree

Hide file tree

Showing 11 changed files with 537 additions and 142 deletions.
diff --git a/.changeset/friendly-lies-protect.md b/.changeset/friendly-lies-protect.md
@@ -0,0 +1,8 @@
+---
+"@apollo/query-planner": patch
+"@apollo/query-graphs": patch
+"@apollo/federation-internals": patch
+---
+
+Improves query planning time in some situations where entities use multiple keys.
+
diff --git a/composition-js/src/validate.ts b/composition-js/src/validate.ts
@@ -43,25 +43,15 @@ import {
   RootPath,
   advancePathWithTransition,
   Transition,
-  OpGraphPath,
-  advanceSimultaneousPathsWithOperation,
-  ExcludedEdges,
   QueryGraphState,
-  ExcludedConditions,
   Unadvanceables,
   isUnadvanceable,
   Unadvanceable,
   noConditionsResolution,
-  ConditionResolution,
-  unsatisfiedConditionsResolution,
-  ConditionResolver,
-  cachingConditionResolver,
-  PathContext,
-  addConditionExclusion,
-  SimultaneousPathsWithLazyIndirectPaths,
-  advanceOptionsToString,
   TransitionPathWithLazyIndirectPaths,
   RootVertex,
+  simpleValidationConditionResolver,
+  ConditionResolver,
 } from "@apollo/query-graphs";
 import { CompositionHint, HINTS } from "./hints";
 import { ASTNode, GraphQLError, print } from "graphql";
@@ -330,7 +320,7 @@ export function computeSubgraphPaths(
 } {
   try {
     assert(!supergraphPath.hasAnyEdgeConditions(), () => `A supergraph path should not have edge condition paths (as supergraph edges should not have conditions): ${supergraphPath}`);
-    const conditionResolver = new ConditionValidationResolver(supergraphSchema, subgraphs);
+    const conditionResolver = simpleValidationConditionResolver({ supergraph: supergraphSchema, queryGraph: subgraphs, withCaching: true });
     const initialState = ValidationState.initial({supergraphAPI: supergraphPath.graph, kind: supergraphPath.root.rootKind, subgraphs, conditionResolver});
     const context = new ValidationContext(supergraphSchema);
     let state = initialState;
@@ -431,11 +421,11 @@ export class ValidationState {
     supergraphAPI: QueryGraph,
     kind: SchemaRootKind,
     subgraphs: QueryGraph,
-    conditionResolver: ConditionValidationResolver,
+    conditionResolver: ConditionResolver,
   }) {
     return new ValidationState(
       GraphPath.fromGraphRoot(supergraphAPI, kind)!,
-      initialSubgraphPaths(kind, subgraphs).map((p) => TransitionPathWithLazyIndirectPaths.initial(p, conditionResolver.resolver)),
+      initialSubgraphPaths(kind, subgraphs).map((p) => TransitionPathWithLazyIndirectPaths.initial(p, conditionResolver)),
     );
   }
 
@@ -586,7 +576,7 @@ function isSupersetOrEqual(maybeSuperset: string[], other: string[]): boolean {
 }
 
 class ValidationTraversal {
-  private readonly conditionResolver: ConditionValidationResolver;
+  private readonly conditionResolver: ConditionResolver;
   // The stack contains all states that aren't terminal.
   private readonly stack: ValidationState[] = [];
 
@@ -604,7 +594,11 @@ class ValidationTraversal {
     supergraphAPI: QueryGraph,
     subgraphs: QueryGraph
   ) {
-    this.conditionResolver = new ConditionValidationResolver(supergraphSchema, subgraphs);
+    this.conditionResolver = simpleValidationConditionResolver({
+      supergraph: supergraphSchema,
+      queryGraph: subgraphs,
+      withCaching: true,
+    });
     supergraphAPI.rootKinds().forEach((kind) => this.stack.push(ValidationState.initial({
       supergraphAPI,
       kind,
@@ -680,87 +674,3 @@ class ValidationTraversal {
     debug.groupEnd();
   }
 }
-
-class ConditionValidationState {
-  constructor(
-    // Selection that belongs to the condition we're validating.
-    readonly selection: Selection,
-    // All the possible "simultaneous paths" we could be in the subgraph when we reach this state selection.
-    readonly subgraphOptions: SimultaneousPathsWithLazyIndirectPaths[]
-  ) {}
-
-  toString(): string {
-    return `${this.selection} <=> ${advanceOptionsToString(this.subgraphOptions)}`;
-  }
-}
-
-class ConditionValidationResolver {
-  readonly resolver: ConditionResolver;
-
-  constructor(
-    private readonly supergraphSchema: Schema,
-    private readonly federatedQueryGraph: QueryGraph
-  ) {
-    this.resolver = cachingConditionResolver(
-      federatedQueryGraph,
-      (
-        edge: Edge,
-        context: PathContext,
-        excludedEdges: ExcludedEdges,
-        excludedConditions: ExcludedConditions
-      ) => this.validateConditions(edge, context, excludedEdges, excludedConditions)
-    );
-  }
-
-  private validateConditions(
-    edge: Edge,
-    context: PathContext,
-    excludedEdges: ExcludedEdges,
-    excludedConditions: ExcludedConditions
-  ): ConditionResolution {
-    const conditions = edge.conditions!;
-    excludedConditions = addConditionExclusion(excludedConditions, conditions);
-
-    const initialPath: OpGraphPath = GraphPath.create(this.federatedQueryGraph, edge.head);
-    const initialOptions = [new SimultaneousPathsWithLazyIndirectPaths([initialPath], context, this.resolver, excludedEdges, excludedConditions)];
-
-    const stack: ConditionValidationState[] = [];
-    for (const selection of conditions.selections()) {
-      stack.push(new ConditionValidationState(selection, initialOptions));
-    }
-
-    while (stack.length > 0) {
-      const state = stack.pop()!;
-      const newStates = this.advanceState(state);
-      if (newStates === null) {
-        return unsatisfiedConditionsResolution;
-      }
-      newStates.forEach(s => stack.push(s));
-    }
-    // If we exhaust the stack, it means we've been able to find "some" path for every possible selection in the condition, so the
-    // condition is validated. Note that we use a cost of 1 for all conditions as we don't care about efficiency.
-    return { satisfied: true, cost: 1 };
-  }
-
-  private advanceState(state: ConditionValidationState): ConditionValidationState[] | null {
-    let newOptions: SimultaneousPathsWithLazyIndirectPaths[] = [];
-    for (const paths of state.subgraphOptions) {
-      const pathsOptions = advanceSimultaneousPathsWithOperation(
-        this.supergraphSchema,
-        paths,
-        state.selection.element,
-      );
-      if (!pathsOptions) {
-        continue;
-      }
-      newOptions = newOptions.concat(pathsOptions);
-    }
-
-    // If we got no options, it means that particular selection of the conditions cannot be satisfied, so the
-    // overall condition cannot.
-    if (newOptions.length === 0) {
-      return null;
-    }
-    return state.selection.selectionSet ? state.selection.selectionSet.selections().map(s => new ConditionValidationState(s, newOptions)) : [];
-  }
-}
diff --git a/internals-js/src/operations.ts b/internals-js/src/operations.ts
@@ -1792,6 +1792,11 @@ export class SelectionSet {
       : ContainsResult.STRICTLY_CONTAINED;
   }
 
+  containsTopLevelField(field: Field): boolean {
+    const selection = this._keyedSelections.get(field.key());
+    return !!selection && selection.element.equals(field);
+  }
+
   /**
    * Returns a selection set that correspond to this selection set but where any of the selections in the
    * provided selection set have been remove.

diff --git a/query-graphs-js/src/__tests__/graphPath.test.ts b/query-graphs-js/src/__tests__/graphPath.test.ts
@@ -0,0 +1,216 @@
+import {
+  Field,
+  FieldDefinition,
+  Schema,
+  assert,
+  buildSupergraphSchema,
+} from "@apollo/federation-internals";
+import {
+  GraphPath,
+  OpGraphPath,
+  SimultaneousPathsWithLazyIndirectPaths,
+  advanceSimultaneousPathsWithOperation,
+  createInitialOptions
+} from "../graphPath";
+import { QueryGraph, Vertex, buildFederatedQueryGraph } from "../querygraph";
+import { emptyContext } from "../pathContext";
+import { simpleValidationConditionResolver } from "../conditionsValidation";
+
+function parseSupergraph(subgraphs: number, schema: string): { supergraph: Schema, api: Schema, queryGraph: QueryGraph } {
+  assert(subgraphs >= 1, 'Should have at least 1 subgraph');
+  const header = `
+      schema
+        @link(url: "https://specs.apollo.dev/link/v1.0")
+        @link(url: "https://specs.apollo.dev/join/v0.3", for: EXECUTION)
+      {
+        query: Query
+      }
+
+      directive @join__enumValue(graph: join__Graph!) repeatable on ENUM_VALUE
+      directive @join__field(graph: join__Graph, requires: join__FieldSet, provides: join__FieldSet, type: String, external: Boolean, override: String, usedOverridden: Boolean) repeatable on FIELD_DEFINITION | INPUT_FIELD_DEFINITION
+      directive @join__graph(name: String!, url: String!) on ENUM_VALUE
+      directive @join__implements(graph: join__Graph!, interface: String!) repeatable on OBJECT | INTERFACE
+      directive @join__type(graph: join__Graph!, key: join__FieldSet, extension: Boolean! = false, resolvable: Boolean! = true, isInterfaceObject: Boolean! = false) repeatable on OBJECT | INTERFACE | UNION | ENUM | INPUT_OBJECT | SCALAR
+      directive @join__unionMember(graph: join__Graph!, member: String!) repeatable on UNION
+      directive @link(url: String, as: String, for: link__Purpose, import: [link__Import]) repeatable on SCHEMA
+
+      scalar join__FieldSet
+
+      scalar link__Import
+
+      enum link__Purpose {
+        SECURITY
+        EXECUTION
+      }
+
+      enum join__Graph {
+        ${[...Array(subgraphs).keys()].map((n) => `S${n+1} @join__graph(name: "S${n+1}", url: "https://S${n+1}")`).join('\n')}
+      }
+
+  `;
+
+  try {
+    const supergraph = buildSupergraphSchema(header + schema)[0];
+    return {
+      supergraph,
+      api: supergraph.toAPISchema(),
+      queryGraph: buildFederatedQueryGraph(supergraph, true),
+    };
+  } catch (e) {
+    throw new Error('Error parsing supergraph schema:\n' + e.toString());
+  }
+}
+
+function createOptions(supergraph: Schema, queryGraph: QueryGraph): SimultaneousPathsWithLazyIndirectPaths<Vertex>[] {
+  // We know we only use `Query` in the supergraph, so there is only that as root.
+  const root = queryGraph.roots()[0];
+  const initialPath: OpGraphPath<Vertex> = GraphPath.create(queryGraph, root);
+  return createInitialOptions(
+    initialPath,
+    emptyContext,
+    simpleValidationConditionResolver({ supergraph, queryGraph }),
+    [],
+    [],
+  );
+}
+
+function field(schema: Schema, coordinate: string): Field {
+  const def = schema.elementByCoordinate(coordinate) as FieldDefinition<any>;
+  return new Field(def);
+}
+
+describe("advanceSimultaneousPathsWithOperation", () => {
+  test("do not use key `x` to fetch `x`", () => {
+    const { supergraph, api, queryGraph } = parseSupergraph(3, `
+       type Query
+         @join__type(graph: S1)
+       {
+          t: T @join__field(graph: S1)
+       }
+
+       type T
+         @join__type(graph: S1)
+         @join__type(graph: S2, key: "otherId")
+         @join__type(graph: S2, key: "id")
+         @join__type(graph: S3, key: "id")
+       {
+          otherId: ID! @join__field(graph: S1) @join__field(graph: S2)
+          id: ID!      @join__field(graph: S2) @join__field(graph: S3)
+       }
+    `);
+
+    // Picking the first initial, the one going to S1
+    const initial = createOptions(supergraph, queryGraph)[0];
+
+    // Then picking `t`, which should be just the one option of picking it in S1 at this point.
+    const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t"));
+    assert(allAfterT, 'Should have advanced correctly');
+    expect(allAfterT).toHaveLength(1);
+    const afterT = allAfterT[0];
+    expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`);
+
+    // Checking that, at this point, we technically have 2 options:
+    // 1. we can go to S2 using `otherId`.
+    // 2. we can go to S3 using `id`, assuming we first get `id` from S2 (using `otherId`).
+    const indirect = afterT.indirectOptions(afterT.context, 0);
+    expect(indirect.paths).toHaveLength(2);
+    expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) (types: [T])`);
+    expect(indirect.paths[1].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id } ⊢ key()]--> T(S3) (types: [T])`);
+
+    const allForId = advanceSimultaneousPathsWithOperation(supergraph, afterT, field(api, "T.id"));
+    assert(allForId, 'Should have advanced correctly');
+
+    // Here, `id` is a direct path from both of our indirect paths. However, it makes no sense to use the 2nd
+    // indirect path above, since the condition to get to `S3` was `id`, and this means another indirect path
+    // is able to get to `id` more directly (the first one in this case).
+    // So ultimately, we should only keep the 1st option.
+    expect(allForId).toHaveLength(1);
+    const forId = allForId[0];
+    expect(forId.toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) --[id]--> ID(S2)`);
+  });
+
+  test("do not use key containing `x` to fetch `x`", () => {
+    // Similar to the previous test, but the key used is not exactly the fetch field, it only contains
+    // it (but the optimisation should still work).
+    const { supergraph, api, queryGraph } = parseSupergraph(3, `
+       type Query
+         @join__type(graph: S1)
+       {
+          t: T @join__field(graph: S1)
+       }
+
+       type T
+         @join__type(graph: S1)
+         @join__type(graph: S2, key: "otherId")
+         @join__type(graph: S2, key: "id1 id2")
+         @join__type(graph: S3, key: "id1 id2")
+       {
+          otherId: ID! @join__field(graph: S1) @join__field(graph: S2)
+          id1: ID!      @join__field(graph: S2) @join__field(graph: S3)
+          id2: ID!      @join__field(graph: S2) @join__field(graph: S3)
+       }
+    `);
+
+    // Picking the first initial, the one going to S1
+    const initial = createOptions(supergraph, queryGraph)[0];
+
+    // Then picking `t`, which should be just the one option of picking it in S1 at this point.
+    const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t"));
+    assert(allAfterT, 'Should have advanced correctly');
+    expect(allAfterT).toHaveLength(1);
+    const afterT = allAfterT[0];
+    expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`);
+
+    // Checking that, at this point, we technically have 2 options:
+    // 1. we can go to S2 using `otherId`.
+    // 2. we can go to S3 using `id1 id2`, assuming we first get `id1 id2` from S2 (using `otherId`).
+    const indirect = afterT.indirectOptions(afterT.context, 0);
+    expect(indirect.paths).toHaveLength(2);
+    expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) (types: [T])`);
+    expect(indirect.paths[1].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id1 id2 } ⊢ key()]--> T(S3) (types: [T])`);
+
+    const allForId = advanceSimultaneousPathsWithOperation(supergraph, afterT, field(api, "T.id1"));
+    assert(allForId, 'Should have advanced correctly');
+
+    // Here, `id1` is a direct path from both of our indirect paths. However, it makes no sense to use the 2nd
+    // indirect path above, since the condition to get to `S3` was `id1 id2`, which includes `id1`.
+    expect(allForId).toHaveLength(1);
+    const forId = allForId[0];
+    expect(forId.toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ otherId } ⊢ key()]--> T(S2) --[id1]--> ID(S2)`);
+  });
+
+  test("avoids indirect path that needs a key to the same subgraph to validate its condition", () => {
+    const { supergraph, api, queryGraph } = parseSupergraph(2, `
+       type Query
+         @join__type(graph: S1)
+       {
+          t: T @join__field(graph: S1)
+       }
+
+       type T
+         @join__type(graph: S1)
+         @join__type(graph: S2, key: "id1")
+         @join__type(graph: S2, key: "id2")
+       {
+          id1: ID! @join__field(graph: S2)
+          id2: ID! @join__field(graph: S1) @join__field(graph: S2)
+       }
+    `);
+
+    // Picking the first initial, the one going to S1
+    const initial = createOptions(supergraph, queryGraph)[0];
+
+    // Then picking `t`, which should be just the one option of picking it in S1 at this point.
+    const allAfterT = advanceSimultaneousPathsWithOperation(supergraph, initial, field(api, "Query.t"));
+    assert(allAfterT, 'Should have advanced correctly');
+    expect(allAfterT).toHaveLength(1);
+    const afterT = allAfterT[0];
+    expect(afterT.toString()).toBe(`Query(S1) --[t]--> T(S1) (types: [T])`);
+
+    // Technically, the `id1` key could be used to go to S2 by first getting `id1` from S2 using `id2`, but
+    // that's obviously unecessary to consider since we can just use `id2` to go to S2 in the first place.
+    const indirect = afterT.indirectOptions(afterT.context, 0);
+    expect(indirect.paths).toHaveLength(1);
+    expect(indirect.paths[0].toString()).toBe(`Query(S1) --[t]--> T(S1) --[{ id2 } ⊢ key()]--> T(S2) (types: [T])`);
+  });
+});