Skip to content

Commit

Permalink
Use indexes to filter WHERE queries (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
muglug committed May 12, 2023
1 parent 4f7ed4b commit b7e3e2a
Show file tree
Hide file tree
Showing 17 changed files with 223 additions and 20 deletions.
5 changes: 5 additions & 0 deletions src/Expressions/BetweenOperatorExpression.php
Expand Up @@ -50,6 +50,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): bool {
return ($this->negated ? !$eval : $eval) ? true : false;
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function negate(): void {
$this->negated = true;
Expand Down
51 changes: 43 additions & 8 deletions src/Expressions/BinaryOperatorExpression.php
Expand Up @@ -100,7 +100,9 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {

if ($left is RowExpression) {
if (!$right is RowExpression) {
throw new SQLFakeRuntimeException('Expected row expression on RHS of '.(string)$this->operator.' operand');
throw new SQLFakeRuntimeException(
'Expected row expression on RHS of '.(string)$this->operator.' operand',
);
}

// oh fun! a row comparison, e.g. (col1, col2, col3) > (1, 2, 3)
Expand Down Expand Up @@ -164,7 +166,8 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
}
case Operator::GREATER_THAN:
if ($as_string) {
return (bool)((((Str\compare((string)$l_value, (string)$r_value)) > 0) ? 1 : 0) ^ $this->negatedInt);
return
(bool)((((Str\compare((string)$l_value, (string)$r_value)) > 0) ? 1 : 0) ^ $this->negatedInt);
} else {
return (bool)(((float)$l_value > (float)$r_value) ? 1 : 0 ^ $this->negatedInt);
}
Expand All @@ -177,7 +180,8 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
}
case Operator::LESS_THAN:
if ($as_string) {
return (bool)((((Str\compare((string)$l_value, (string)$r_value)) < 0) ? 1 : 0) ^ $this->negatedInt);
return
(bool)((((Str\compare((string)$l_value, (string)$r_value)) < 0) ? 1 : 0) ^ $this->negatedInt);
} else {
return (bool)(((float)$l_value < (float)$r_value) ? 1 : 0 ^ $this->negatedInt);
}
Expand Down Expand Up @@ -222,7 +226,9 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
case Operator::DOUBLE_GREATER_THAN:
return (int)$left_number >> (int)$right_number;
default:
throw new SQLFakeRuntimeException('Operator '.(string)$this->operator.' recognized but not implemented');
throw new SQLFakeRuntimeException(
'Operator '.(string)$this->operator.' recognized but not implemented',
);
}
case Operator::LIKE:
$left_string = (string)$left->evaluate($row, $conn);
Expand Down Expand Up @@ -303,22 +309,51 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
}
}

private static function getColumnNamesFromBinop(BinaryOperatorExpression $expr): dict<string, mixed> {
<<__Override>>
public function getIndexCandidates(dict<string, Column> $columns): ?dict<string, mixed> {
$op = $this->operator;
if ($op === null) {
// an operator should only be in this state in the middle of parsing, never when evaluating
throw new SQLFakeRuntimeException('Attempted to evaluate BinaryOperatorExpression with empty operator');
}

if ($this->negated) {
return null;
}

return self::getColumnNamesFromBinop($this, $columns);
}

private static function getColumnNamesFromBinop(
BinaryOperatorExpression $expr,
dict<string, Column> $columns,
): dict<string, mixed> {
$column_names = dict[];

if ($expr->operator === Operator::EQUALS) {
if ($expr->left is ColumnExpression && $expr->left->name !== '*' && $expr->right is ConstantExpression) {
$column_names[$expr->left->name] = $expr->right->value;
$table_name = $expr->left->tableName;
$column_name = $expr->left->name;
if ($table_name is nonnull) {
$column_name = $table_name.'.'.$column_name;
}
$value = $expr->right->value;
if (isset($columns[$column_name])) {
if ($columns[$column_name]->hack_type === 'int') {
$value = (int)$value;
}
}
$column_names[$column_name] = $value;
}
}

if ($expr->operator === Operator::AND) {
if ($expr->left is BinaryOperatorExpression) {
$column_names = self::getColumnNamesFromBinop($expr->left);
$column_names = self::getColumnNamesFromBinop($expr->left, $columns);
}

if ($expr->right is BinaryOperatorExpression) {
$column_names = Dict\merge($column_names, self::getColumnNamesFromBinop($expr->right));
$column_names = Dict\merge($column_names, self::getColumnNamesFromBinop($expr->right, $columns));
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/CaseOperatorExpression.php
Expand Up @@ -44,6 +44,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
return $this->else->evaluate($row, $conn);
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function isWellFormed(): bool {
return $this->wellFormed;
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/ColumnExpression.php
Expand Up @@ -77,6 +77,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $_conn): mixed {
}
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

/**
* for use in ORDER BY... allow evaluating the expression
* to fall through to the full row if the column is not found fully qualified.
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/ConstantExpression.php
Expand Up @@ -42,6 +42,11 @@ public function evaluateImpl(row $_row, AsyncMysqlConnection $_conn): mixed {
return $this->value;
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function isWellFormed(): bool {
return true;
Expand Down
2 changes: 2 additions & 0 deletions src/Expressions/Expression.php
Expand Up @@ -70,6 +70,8 @@ final public function evaluate(
return $result;
}

public abstract function getIndexCandidates(dict<string, Column> $columns): ?dict<string, mixed>;

/**
* a lot of times you just want the value
*/
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/FunctionExpression.php
Expand Up @@ -68,6 +68,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
}
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

public function isAggregate(): bool {
return C\contains_key(keyset['COUNT', 'SUM', 'MIN', 'MAX', 'AVG'], $this->functionName);
}
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/InOperatorExpression.php
Expand Up @@ -69,6 +69,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): bool {
return $this->negated;
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function negate(): void {
$this->negated = true;
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/JSONFunctionExpression.hack
Expand Up @@ -45,6 +45,11 @@ final class JSONFunctionExpression extends BaseFunctionExpression {
throw new SQLFakeRuntimeException('Function '.$this->functionName.' not implemented yet');
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

private function sqlJSONValid(row $row, AsyncMysqlConnection $conn): ?bool {
$row = $this->maybeUnrollGroupedDataset($row);
$args = $this->args;
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/PlaceholderExpression.php
Expand Up @@ -20,6 +20,11 @@ public function evaluateImpl(row $_row, AsyncMysqlConnection $_conn): mixed {
throw new SQLFakeRuntimeException('Attempted to evaluate placeholder expression!');
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function isWellFormed(): bool {
return false;
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/RowExpression.php
Expand Up @@ -25,6 +25,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
return $result;
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function isWellFormed(): bool {
return true;
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/SubqueryExpression.php
Expand Up @@ -22,6 +22,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): dataset {
return $this->query->execute($conn, $row);
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function isWellFormed(): bool {
return true;
Expand Down
5 changes: 5 additions & 0 deletions src/Expressions/UnaryExpression.php
Expand Up @@ -42,6 +42,11 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
return $val;
}

<<__Override>>
public function getIndexCandidates(dict<string, Column> $_columns): ?dict<string, mixed> {
return null;
}

<<__Override>>
public function setNextChild(Expression $expr, bool $overwrite = false): void {
if ($this->subject is nonnull && !$overwrite) {
Expand Down
24 changes: 14 additions & 10 deletions src/Query/DeleteQuery.php
Expand Up @@ -17,7 +17,16 @@ public function execute(AsyncMysqlConnection $conn): int {

Metrics::trackQuery(QueryType::DELETE, $conn->getServer()->name, $table_name, $this->sql);

return $this->applyWhere($conn, $data[0])
$columns = null;

if ($schema?->fields is nonnull) {
$columns = dict[];
foreach ($schema?->fields as $field) {
$columns[$field->name] = $field;
}
}

return $this->applyWhere($conn, $data[0], $data[1], $data[2], $columns, $schema?->indexes)
|> $this->applyOrderBy($conn, $$)
|> $this->applyLimit($$)
|> $this->applyDelete($conn, $database, $table_name, $$, $data[0], $data[1], $data[2], $schema);
Expand All @@ -37,19 +46,14 @@ protected function applyDelete(
?TableSchema $table_schema,
): int {
$rows_to_delete = Keyset\keys($filtered_rows);
$remaining_rows = Dict\filter_with_key(
$original_table,
($row_num, $_) ==> !C\contains_key($rows_to_delete, $row_num),
);
$remaining_rows =
Dict\filter_with_key($original_table, ($row_num, $_) ==> !C\contains_key($rows_to_delete, $row_num));
$rows_affected = C\count($original_table) - C\count($remaining_rows);

if ($table_schema is nonnull) {
foreach ($filtered_rows as $row_id => $row_to_delete) {
list($unique_index_ref_deletes, $index_ref_deletes) = self::getIndexRemovalsForRow(
$table_schema->indexes,
$row_id,
$row_to_delete,
);
list($unique_index_ref_deletes, $index_ref_deletes) =
self::getIndexRemovalsForRow($table_schema->indexes, $row_id, $row_to_delete);

foreach ($unique_index_ref_deletes as list($index_name, $index_key)) {
unset($unique_index_refs[$index_name][$index_key]);
Expand Down
98 changes: 98 additions & 0 deletions src/Query/Query.php
Expand Up @@ -26,16 +26,114 @@ abstract class Query {
protected function applyWhere(
AsyncMysqlConnection $conn,
dataset $data,
unique_index_refs $unique_index_refs,
index_refs $index_refs,
?dict<string, Column> $columns,
?vec<Index> $indexes,
): dataset {
$where = $this->whereClause;
if ($where === null) {
// no where clause? cool! just return the given data
return $data;
}

if ($columns is nonnull && $indexes) {
$candidates = $where->getIndexCandidates($columns);
if ($candidates) {
$candidate_keys = Keyset\keys($candidates);
$matched_fields = 0;
$matched_index = null;
foreach ($indexes as $index) {
if ($index->fields === $candidate_keys) {
$matched_index = $index;
$matched_fields = C\count($index->fields);
break;
}

if (Keyset\intersect($candidate_keys, $index->fields) === $index->fields) {
$index_field_count = C\count($index->fields);
if ($index_field_count > $matched_fields) {
$matched_fields = $index_field_count;
$matched_index = $index;
}
}
}

if ($matched_index) {
if ($matched_fields === 1) {
$matched_field = vec($matched_index->fields)[0];
$candidate_key = $candidates[$matched_field] as arraykey;
} else {
$candidate_key = '';
foreach ($matched_index->fields as $matched_field) {
$candidate_key .= ($candidates[$matched_field] as arraykey).'||';
}
}

$data = self::filterDataWithMatchedIndex(
$data,
$unique_index_refs,
$index_refs,
$matched_index,
$candidate_key,
);
}
}
}

return Dict\filter($data, $row ==> (bool)$where->evaluate($row, $conn));
}

private static function filterDataWithMatchedIndex(
dataset $data,
unique_index_refs $unique_index_refs,
index_refs $index_refs,
Index $matched_index,
arraykey $candidate_key,
): dataset {
if ($matched_index->type === 'PRIMARY') {
if (C\contains_key($data, $candidate_key)) {
return dict[
$candidate_key => $data[$candidate_key],
];
}

return dict[];
}

if ($matched_index->type === 'UNIQUE') {
if (C\contains_key($unique_index_refs, $matched_index->name)) {
$matched_index_refs = $unique_index_refs[$matched_index->name];

if (C\contains_key($matched_index_refs, $candidate_key)) {
$ref = $matched_index_refs[$candidate_key];
if (C\contains_key($data, $ref)) {
return dict[
$ref => $data[$ref],
];
}
}
}

return dict[];
}

if ($matched_index->type === 'INDEX') {
$matched_index_refs = $index_refs[$matched_index->name] ?? null;

if ($matched_index_refs is nonnull) {
$refs = $matched_index_refs[$candidate_key] ?? null;
if ($refs is nonnull) {
return Dict\filter_with_key($data, ($row_id, $_) ==> C\contains_key($refs, $row_id));
}
}

return dict[];
}

throw new \Exception('Unrecognised index');
}

/**
* Apply the ORDER BY clause to sort the rows
*/
Expand Down

0 comments on commit b7e3e2a

Please sign in to comment.