Skip to content

Commit

Permalink
Merge #61219
Browse files Browse the repository at this point in the history
61219: opt: index accelerate <@ (contained by) expressions for array inverted indexes r=angelazxu a=angelazxu

Previously, we did not support index acceleration when checking if an indexed
column is <@ (contained by) a constant, or in other words, when the indexed
column is on the right side of a @> (contains) expression. We already perform
index acceleration for @> (contains) expressions where an indexed JSON or Array
column is on the left side of the expression.

This change adds support for using the inverted index with <@ expressions on
Array columns. When there is an inverted index available, a scan will be done on
the Array column using the spans found from the constant value. An additional
filter will then be applied, as the span expression will never be tight.
Support for JSON columns will be added later.

Informs: #59763

Release note (performance improvement): Some additional expressions using the <@ (contained by) and @> (contains) operators now support index-acceleration with the indexed column on either side of the expression.

Co-authored-by: Angela Xu <angelax@cockroachlabs.com>
  • Loading branch information
craig[bot] and angelazxu committed Mar 10, 2021
2 parents 033522b + 7c5154b commit 3bae381
Show file tree
Hide file tree
Showing 10 changed files with 887 additions and 64 deletions.
110 changes: 110 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -994,3 +994,113 @@ query ITT
SELECT * FROM c WHERE foo @> '{1, 2}' ORDER BY id
----
4 {1,2,3} {b,NULL,c}

subtest contained_by_arrays

statement ok
CREATE TABLE cb (
id INT PRIMARY KEY,
numbers INT[],
words STRING[],
INVERTED INDEX n (numbers),
INVERTED INDEX w (words)
)

statement ok
INSERT INTO cb VALUES
(0, ARRAY[], ARRAY[]),
(1, ARRAY[0], ARRAY[NULL]),
(2, ARRAY[1], ARRAY['cat']),
(3, ARRAY[0,1], ARRAY['mouse']),
(4, ARRAY[NULL], ARRAY['cat', 'mouse']),
(5, ARRAY[0,1,2], ARRAY['cat', NULL, 'mouse']),
(6, ARRAY[3,4,5], ARRAY['rat']),
(7, ARRAY[1,2,1], ARRAY['rat', NULL]),
(8, ARRAY[0,1,NULL], ARRAY[''])

query T
SELECT numbers FROM cb@n WHERE numbers <@ ARRAY[]::INT[]
----
{}

query T
SELECT numbers FROM cb@n WHERE numbers <@ ARRAY[1]
----
{}
{1}

query T
SELECT numbers FROM cb@n WHERE numbers <@ ARRAY[0,1,2]
----
{}
{0}
{1}
{0,1}
{0,1,2}
{1,2,1}

query T
SELECT numbers FROM cb@n WHERE numbers <@ ARRAY[1,2,3]
----
{}
{1}
{1,2,1}

query T
SELECT numbers FROM cb@n WHERE numbers <@ ARRAY[0,1,NULL]
----
{}
{0}
{1}
{0,1}

query T
SELECT numbers FROM cb@n WHERE numbers <@ ARRAY[NULL]::INT[]
----
{}

query T
SELECT words FROM cb@w WHERE words <@ ARRAY[]::STRING[]
----
{}

query T
SELECT words FROM cb@w WHERE words <@ ARRAY['']::STRING[]
----
{}
{""}

query T
SELECT words FROM cb@w WHERE words <@ ARRAY[NULL]::STRING[]
----
{}


query T
SELECT words FROM cb@w WHERE words <@ ARRAY['cat']
----
{}
{cat}


query T
SELECT words FROM cb@w WHERE words <@ ARRAY['cat', 'mouse']
----
{}
{cat}
{mouse}
{cat,mouse}

query T
SELECT words FROM cb@w WHERE words <@ ARRAY['cat', 'mouse', NULL]
----
{}
{cat}
{mouse}
{cat,mouse}

query T
SELECT words FROM cb@w WHERE words <@ ARRAY[NULL, 'rat']
----
{}
{rat}
112 changes: 112 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,118 @@ vectorized: true
right columns: ()
right fixed values: 1 column

# Test that queries with the contained by <@ operator use the inverted index.
query T
EXPLAIN (VERBOSE) SELECT * FROM e WHERE b <@ ARRAY[]::INT[]
----
distribution: local
vectorized: true
·
• filter
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ filter: ARRAY[] @> b
└── • index join
│ columns: (a, b)
│ estimated row count: 111 (missing stats)
│ table: e@primary
│ key columns: a
└── • scan
columns: (a)
estimated row count: 111 (missing stats)
table: e@e_b_idx
spans: /[]-/"D"

query T
EXPLAIN (VERBOSE) SELECT * FROM e WHERE b <@ ARRAY[0,1,2]
----
distribution: local
vectorized: true
·
• filter
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ filter: ARRAY[0,1,2] @> b
└── • index join
│ columns: (a, b)
│ estimated row count: 111 (missing stats)
│ table: e@primary
│ key columns: a
└── • project
│ columns: (a)
│ estimated row count: 111 (missing stats)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ inverted column: b_inverted_key
│ num spans: 2
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: e@e_b_idx
spans: /[]-/"D" /0-/3

query T
EXPLAIN (VERBOSE) SELECT * FROM e WHERE b <@ ARRAY[NULL]::INT[]
----
distribution: local
vectorized: true
·
• filter
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ filter: ARRAY[NULL] @> b
└── • index join
│ columns: (a, b)
│ estimated row count: 111 (missing stats)
│ table: e@primary
│ key columns: a
└── • scan
columns: (a)
estimated row count: 111 (missing stats)
table: e@e_b_idx
spans: /[]-/"D"

query T
EXPLAIN (VERBOSE) SELECT * FROM e WHERE b <@ ARRAY[0,1,NULL]
----
distribution: local
vectorized: true
·
• filter
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ filter: ARRAY[0,1,NULL] @> b
└── • index join
│ columns: (a, b)
│ estimated row count: 111 (missing stats)
│ table: e@primary
│ key columns: a
└── • project
│ columns: (a)
│ estimated row count: 111 (missing stats)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ inverted column: b_inverted_key
│ num spans: 2
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: e@e_b_idx
spans: /[]-/"D" /0-/2


# Ensure that an inverted index with a composite primary key still encodes
# the primary key data in the composite value.
statement ok
Expand Down
70 changes: 47 additions & 23 deletions pkg/sql/opt/invertedidx/json_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,30 @@ func (j *jsonOrArrayJoinPlanner) canExtractJSONOrArrayJoinCondition(
return true
}

// getInvertedExprForJSONOrArrayIndex gets an inverted.Expression that
// constrains a json or array index according to the given constant.
func getInvertedExprForJSONOrArrayIndex(
// getInvertedExprForJSONOrArrayIndexForContaining gets an inverted.Expression that
// constrains a JSON or Array index according to the given constant.
// This results in a span expression representing the intersection of all paths
// through the JSON or Array. This function is used when checking if an indexed
// column contains (@>) a constant.
func getInvertedExprForJSONOrArrayIndexForContaining(
evalCtx *tree.EvalContext, d tree.Datum,
) inverted.Expression {
var b []byte
invertedExpr, err := rowenc.EncodeContainingInvertedIndexSpans(
evalCtx, d, b, descpb.EmptyArraysInInvertedIndexesVersion,
)
invertedExpr, err := rowenc.EncodeContainingInvertedIndexSpans(evalCtx, d)
if err != nil {
panic(err)
}
return invertedExpr
}

// getInvertedExprForJSONOrArrayIndexForContainedBy gets an inverted.Expression
// that constrains a JSON or Array index according to the given constant.
// This results in a span expression representing the union of all paths
// through the JSON or Array. This function is only used when checking if an
// indexed column is contained by (<@) a constant.
func getInvertedExprForJSONOrArrayIndexForContainedBy(
evalCtx *tree.EvalContext, d tree.Datum,
) inverted.Expression {
invertedExpr, err := rowenc.EncodeContainedInvertedIndexSpans(evalCtx, d)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -181,7 +196,7 @@ func NewJSONOrArrayDatumsToInvertedExpr(
// it for every row.
var spanExpr *inverted.SpanExpression
if d, ok := nonIndexParam.(tree.Datum); ok {
invertedExpr := getInvertedExprForJSONOrArrayIndex(evalCtx, d)
invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, d)
spanExpr, _ = invertedExpr.(*inverted.SpanExpression)
}

Expand Down Expand Up @@ -226,7 +241,7 @@ func (g *jsonOrArrayDatumsToInvertedExpr) Convert(
if d == tree.DNull {
return nil, nil
}
return getInvertedExprForJSONOrArrayIndex(g.evalCtx, d), nil
return getInvertedExprForJSONOrArrayIndexForContaining(g.evalCtx, d), nil

default:
return nil, fmt.Errorf("unsupported expression %v", t)
Expand Down Expand Up @@ -309,27 +324,36 @@ func (j *jsonOrArrayFilterPlanner) extractInvertedFilterConditionFromLeaf(
func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition(
evalCtx *tree.EvalContext, left, right opt.ScalarExpr,
) inverted.Expression {
// The first argument should be a variable or expression corresponding to
// the index column.
if !isIndexColumn(j.tabID, j.index, left, j.computedColumns) {
return inverted.NonInvertedColExpression{}
}

// The second argument should be a constant.
if !memo.CanExtractConstDatum(right) {
var indexColumn, constantVal opt.ScalarExpr
containedBy := false
if isIndexColumn(j.tabID, j.index, left, j.computedColumns) && memo.CanExtractConstDatum(right) {
// When the first argument is a variable or expression corresponding to the
// index column and the second argument is a constant, we get the
// InvertedExpression for left @> right.
indexColumn, constantVal = left, right
} else if isIndexColumn(j.tabID, j.index, right, j.computedColumns) && memo.CanExtractConstDatum(left) {
// When the second argument is a variable or expression corresponding to
// the index column and the first argument is a constant, we get the
// equivalent InvertedExpression for right <@ left.
indexColumn, constantVal = right, left
containedBy = true
} else {
// If neither condition is met, we cannot create an InvertedExpression.
return inverted.NonInvertedColExpression{}
}
d := memo.ExtractConstDatum(right)
if left.DataType().Family() == types.ArrayFamily &&
d := memo.ExtractConstDatum(constantVal)
if indexColumn.DataType().Family() == types.ArrayFamily &&
j.index.Version() < descpb.EmptyArraysInInvertedIndexesVersion {
if arr, ok := d.(*tree.DArray); ok && arr.Len() == 0 {
if arr, ok := d.(*tree.DArray); ok && (containedBy || arr.Len() == 0) {
// We cannot constrain array indexes that do not include
// keys for empty arrays.
return inverted.NonInvertedColExpression{}
}
}

return getInvertedExprForJSONOrArrayIndex(evalCtx, d)
if containedBy {
return getInvertedExprForJSONOrArrayIndexForContainedBy(evalCtx, d)
}
return getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, d)
}

// extractJSONFetchValEqCondition extracts an InvertedExpression representing an
Expand Down Expand Up @@ -426,7 +450,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition(
obj = b.Build()
}

invertedExpr := getInvertedExprForJSONOrArrayIndex(evalCtx, tree.NewDJSON(obj))
invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(obj))

// When the right side is an array or object, the InvertedExpression
// generated is not tight. We must indicate it is non-tight so an additional
Expand Down

0 comments on commit 3bae381

Please sign in to comment.