Skip to content

Commit

Permalink
Calculate indexes for all rows (#101)
Browse files Browse the repository at this point in the history
* Use indexes for fast retrieval

* Remove WHERE index calculation for now
  • Loading branch information
muglug committed May 11, 2023
1 parent e27e311 commit 91d5dd1
Show file tree
Hide file tree
Showing 23 changed files with 652 additions and 251 deletions.
4 changes: 2 additions & 2 deletions src/AsyncMysql/AsyncMysqlQueryResult.php
Expand Up @@ -10,9 +10,9 @@
final class AsyncMysqlQueryResult extends \AsyncMysqlQueryResult {

/* HH_IGNORE_ERROR[3012] I don't want to call parent::construct */
public function __construct(private dataset $rows, private int $rows_affected = 0, private int $last_insert_id = 0) {}
public function __construct(private vec<dict<string, mixed>> $rows, private int $rows_affected = 0, private int $last_insert_id = 0) {}

public function rows(): dataset {
public function rows(): vec<dict<string, mixed>> {
return $this->rows;
}

Expand Down
30 changes: 8 additions & 22 deletions src/DataIntegrity.php
Expand Up @@ -78,31 +78,19 @@ public static function ensureFieldsPresent(dict<string, mixed> $row, TableSchema
$field_unsigned = $field->unsigned ?? false;

if (!C\contains_key($row, $field_name)) {
$row[$field_name] = self::getDefaultValueForField(
$field_type,
$field_nullable,
$field_default,
$field_name,
$schema->name,
);
$row[$field_name] =
self::getDefaultValueForField($field_type, $field_nullable, $field_default, $field_name, $schema->name);
} else if ($row[$field_name] === null) {
if ($field_nullable) {
// explicit null value and nulls are allowed, let it through
continue;
} else if (QueryContext::$strictSQLMode) {
// if we got this far the column has no default and isn't nullable, strict would throw
// but default MySQL mode would coerce to a valid value
throw new SQLFakeRuntimeException(
"Column '{$field_name}' on '{$schema->name}' does not allow null values",
);
throw new SQLFakeRuntimeException("Column '{$field_name}' on '{$schema->name}' does not allow null values");
} else {
$row[$field_name] = self::getDefaultValueForField(
$field_type,
$field_nullable,
$field_default,
$field_name,
$schema->name,
);
$row[$field_name] =
self::getDefaultValueForField($field_type, $field_nullable, $field_default, $field_name, $schema->name);
}
} else {
// TODO more integrity constraints, check field length for varchars, check timestamps
Expand Down Expand Up @@ -315,7 +303,7 @@ public static function checkUniqueConstraints(
dict<string, mixed> $row,
TableSchema $schema,
?arraykey $update_row_id = null,
): ?(string, int) {
): ?(string, arraykey) {

// gather all unique keys
$unique_keys = dict[];
Expand Down Expand Up @@ -343,10 +331,8 @@ public static function checkUniqueConstraints(
if (C\every($unique_key, $field ==> $r[$field] === $row[$field])) {
$dupe_unique_key_value = Vec\map($unique_key, $field ==> (string)$row[$field])
|> Str\join($$, ', ');
return tuple(
"Duplicate entry '{$dupe_unique_key_value}' for key '{$name}' in table '{$schema->name}'",
$row_id,
);
return
tuple("Duplicate entry '{$dupe_unique_key_value}' for key '{$name}' in table '{$schema->name}'", $row_id);
}
}
}
Expand Down
24 changes: 23 additions & 1 deletion src/Expressions/BinaryOperatorExpression.php
Expand Up @@ -4,7 +4,7 @@

namespace Slack\SQLFake;

use namespace HH\Lib\{C, Regex, Str, Vec};
use namespace HH\Lib\{C, Dict, Regex, Str, Vec};

/**
* any operator that takes arguments on the left and right side, like +, -, *, AND, OR...
Expand Down Expand Up @@ -303,6 +303,28 @@ public function evaluateImpl(row $row, AsyncMysqlConnection $conn): mixed {
}
}

private static function getColumnNamesFromBinop(BinaryOperatorExpression $expr): dict<string, mixed> {
$column_names = dict[];

if ($expr->operator === Operator::EQUALS) {
if ($expr->left is ColumnExpression && $expr->left->name !== '*' && $expr->right is ConstantExpression) {
$column_names[$expr->left->name] = $expr->right->value;
}
}

if ($expr->operator === Operator::AND) {
if ($expr->left is BinaryOperatorExpression) {
$column_names = self::getColumnNamesFromBinop($expr->left);
}

if ($expr->right is BinaryOperatorExpression) {
$column_names = Dict\merge($column_names, self::getColumnNamesFromBinop($expr->right));
}
}

return $column_names;
}

/**
* Coerce a mixed value to a num,
* but also handle sub-expressions that return a dataset containing a num
Expand Down
37 changes: 18 additions & 19 deletions src/Expressions/JSONFunctionExpression.hack
Expand Up @@ -331,9 +331,8 @@ final class JSONFunctionExpression extends BaseFunctionExpression {

$argCount = C\count($args);
if ($argCount !== 1) {
throw new SQLFakeRuntimeException(
'MySQL JSON_DEPTH() function must be called with 1 JSON document argument',
);
throw
new SQLFakeRuntimeException('MySQL JSON_DEPTH() function must be called with 1 JSON document argument');
}

$json = $args[0]->evaluate($row, $conn);
Expand Down Expand Up @@ -403,43 +402,43 @@ final class JSONFunctionExpression extends BaseFunctionExpression {
}

$term = (new JSONPath\JSONObject($term))->get('$');
if ($term is null || $term->value is null || !($term->value is vec<_>)) {
if ($term is null || $term->value is null || !($term->value is vec<_>)) {
throw new SQLFakeRuntimeException('MySQL JSON_CONTAINS() function given invalid json');
}
$term = $term->value[0];

if ($json is vec<_>) {
// If $json is a vec then we have an array and will test if the array contains the given value
if ($term is dict<_,_>) {
if ($term is dict<_, _>) {
return C\count(Vec\filter($json, $val ==> {
if ($val is dict<_,_>) {
if ($val is dict<_, _>) {
return Dict\equal($val, $term);
}
return false;
})) > 0;
}
else {
})) >
0;
} else {
return C\contains($json, $term);
}
}
else if ($json is dict<_,_>) {
} else if ($json is dict<_, _>) {
// If $json is a dict then we have an object and will test that either (1) $json and $term are the same or
// (2) one of $json's members is the same as $term
if ($term is dict<_,_>) {
if (Dict\equal($json, $term)) { return true; }
if ($term is dict<_, _>) {
if (Dict\equal($json, $term)) {
return true;
}

return C\count(Dict\filter($json, $val ==> {
if ($val is dict<_,_>) {
if ($val is dict<_, _>) {
return Dict\equal($val, $term);
}
return false;
})) > 0;
}
else {
})) >
0;
} else {
return C\count(Dict\filter($json, $val ==> $term == $val)) > 0;
}
}
else {
} else {
return $json == $term;
}

Expand Down
6 changes: 3 additions & 3 deletions src/Logger.php
Expand Up @@ -43,7 +43,7 @@ protected static function write(string $message): void {
* 1 row from cluster1
*
*/
public static function logResult(string $server, dataset $data, int $rows_affected): void {
public static function logResult(string $server, vec<dict<string, mixed>> $data, int $rows_affected): void {
if (QueryContext::$verbosity >= Verbosity::RESULTS) {
if ($rows_affected > 0) {
self::write("{$rows_affected} rows affected\n");
Expand All @@ -56,7 +56,7 @@ public static function logResult(string $server, dataset $data, int $rows_affect
}
}

private static function formatData(dataset $rows, string $server): string {
private static function formatData(vec<dict<string, mixed>> $rows, string $server): string {
$count = C\count($rows);

$tbl_columns = static::formatColumns($rows);
Expand All @@ -83,7 +83,7 @@ private static function formatData(dataset $rows, string $server): string {
/**
* Determine maximum string length of column names or values
*/
protected static function formatColumns(dataset $data): dict<string, int> {
protected static function formatColumns(vec<dict<string, mixed>> $data): dict<string, int> {

$columns = dict[];
foreach ($data as $row) {
Expand Down
20 changes: 10 additions & 10 deletions src/Parser/CreateTableParser.php
Expand Up @@ -310,21 +310,21 @@ private function parseCreateTable(vec<string> $tokens, string $sql): parsed_tabl
//

$fields = vec[];
$indexes = vec[];
$index_refs = vec[];

if ($this->nextTokenIs($tokens, '(')) {
$tokens = Vec\drop($tokens, 1);
$ret = $this->parseCreateDefinition(inout $tokens);
$fields = $ret['fields'];
$indexes = $ret['indexes'];
$index_refs = $ret['indexes'];
}

$props = $this->parseTableProps(inout $tokens);

$table = shape(
'name' => $name,
'fields' => $fields,
'indexes' => $indexes,
'indexes' => $index_refs,
'props' => $props,
'sql' => $sql,
);
Expand All @@ -342,26 +342,26 @@ private function parseCreateDefinition(inout vec<string> $tokens): shape(
) {

$fields = vec[];
$indexes = vec[];
$index_refs = vec[];

while ($tokens[0] !== ')') {
$these_tokens = $this->sliceUntilNextField(inout $tokens);

$this->parseFieldOrKey(inout $these_tokens, inout $fields, inout $indexes);
$this->parseFieldOrKey(inout $these_tokens, inout $fields, inout $index_refs);
}

$tokens = Vec\drop($tokens, 1); // closing paren

return shape(
'fields' => $fields,
'indexes' => $indexes,
'indexes' => $index_refs,
);
}

private function parseFieldOrKey(
inout vec<string> $tokens,
inout vec<parsed_field> $fields,
inout vec<parsed_index> $indexes,
inout vec<parsed_index> $index_refs,
): void {

//
Expand Down Expand Up @@ -424,7 +424,7 @@ private function parseFieldOrKey(
if (C\count($tokens)) {
$index['more'] = $tokens;
}
$indexes[] = $index;
$index_refs[] = $index;
return;

//
Expand All @@ -447,7 +447,7 @@ private function parseFieldOrKey(
if (C\count($tokens)) {
$index['more'] = $tokens;
}
$indexes[] = $index;
$index_refs[] = $index;
return;

// FULLTEXT [index_name] (index_col_name,...) [index_option] ...
Expand Down Expand Up @@ -487,7 +487,7 @@ private function parseFieldOrKey(
if (C\count($tokens)) {
$index['more'] = $tokens;
}
$indexes[] = $index;
$index_refs[] = $index;
return;

// older stuff
Expand Down
43 changes: 32 additions & 11 deletions src/Query/DeleteQuery.php
Expand Up @@ -2,7 +2,7 @@

namespace Slack\SQLFake;

use namespace HH\Lib\{C, Keyset, Vec};
use namespace HH\Lib\{C, Dict, Keyset};

final class DeleteQuery extends Query {
public ?from_table $fromClause = null;
Expand All @@ -12,13 +12,15 @@ public function __construct(public string $sql) {}
public function execute(AsyncMysqlConnection $conn): int {
$this->fromClause as nonnull;
list($database, $table_name) = Query::parseTableName($conn, $this->fromClause['name']);
$data = $conn->getServer()->getTable($database, $table_name) ?? vec[];
$data = $conn->getServer()->getTableData($database, $table_name) ?? tuple(dict[], dict[], dict[]);
$schema = QueryContext::getSchema($database, $table_name);

Metrics::trackQuery(QueryType::DELETE, $conn->getServer()->name, $table_name, $this->sql);

return $this->applyWhere($conn, $data)
return $this->applyWhere($conn, $data[0])
|> $this->applyOrderBy($conn, $$)
|> $this->applyLimit($$)
|> $this->applyDelete($conn, $database, $table_name, $$, $data);
|> $this->applyDelete($conn, $database, $table_name, $$, $data[0], $data[1], $data[2], $schema);
}

/**
Expand All @@ -30,18 +32,37 @@ protected function applyDelete(
string $table_name,
dataset $filtered_rows,
dataset $original_table,
unique_index_refs $unique_index_refs,
index_refs $index_refs,
?TableSchema $table_schema,
): int {

// if this isn't a dict keyed by the original ids in the row, it could delete the wrong rows
$filtered_rows as dict<_, _>;

$rows_to_delete = Keyset\keys($filtered_rows);
$remaining_rows =
Vec\filter_with_key($original_table, ($row_num, $_) ==> !C\contains_key($rows_to_delete, $row_num));
$remaining_rows = Dict\filter_with_key(
$original_table,
($row_num, $_) ==> !C\contains_key($rows_to_delete, $row_num),
);
$rows_affected = C\count($original_table) - C\count($remaining_rows);

if ($table_schema is nonnull) {
foreach ($filtered_rows as $row_id => $row_to_delete) {
list($unique_index_ref_deletes, $index_ref_deletes) = self::getIndexRemovalsForRow(
$table_schema->indexes,
$row_id,
$row_to_delete,
);

foreach ($unique_index_ref_deletes as list($index_name, $index_key)) {
unset($unique_index_refs[$index_name][$index_key]);
}

foreach ($index_ref_deletes as list($index_name, $index_key, $_)) {
unset($index_refs[$index_name][$index_key][$row_id]);
}
}
}

// write it back to the database
$conn->getServer()->saveTable($database, $table_name, $remaining_rows);
$conn->getServer()->saveTable($database, $table_name, $remaining_rows, $unique_index_refs, $index_refs);
return $rows_affected;
}
}

0 comments on commit 91d5dd1

Please sign in to comment.