Skip to content

Commit

Permalink
fix: handle count multiple PK & edge cases more gracefully (#6870)
Browse files Browse the repository at this point in the history
currently we use concatenation of multiple primary keys and a
COUNT DISTINCT of that to figure out how many records we have
matched in a query.

however, that fails if the records have keys when the keys
are ambigious when concatenated (`"A", "AA"` & `"AA", "A"`)

the fact that we do a distinct can also be a performance impact
that isn't needed when we aren't doing joins

as such, in MySQL & Postgres we can use the built in counting
of multiple distinct values to resolve some of the issues,
and in other environments we can make it SLIGHTLY better by
adding delimiters between the concatenated values.  It is not
perfect because it technically could run into the same issue
if the delimiters are in the primary keys but it's BETTER
in most cases.

also, in cases where we do not perform any joins we can
short circuit all of this and do a much more performant
`COUNT(1)` operation

fixes #5989
fixes #5314
fixes #4550
  • Loading branch information
imnotjames committed Oct 9, 2020
1 parent 5c407de commit 4abfb34
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 34 deletions.
93 changes: 62 additions & 31 deletions src/query-builder/SelectQueryBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1774,52 +1774,83 @@ export class SelectQueryBuilder<Entity> extends QueryBuilder<Entity> implements
});
}

protected async executeCountQuery(queryRunner: QueryRunner): Promise<number> {

private computeCountExpression() {
const mainAlias = this.expressionMap.mainAlias!.name; // todo: will this work with "fromTableName"?
const metadata = this.expressionMap.mainAlias!.metadata;

const primaryColumns = metadata.primaryColumns;
const distinctAlias = this.escape(mainAlias);
let countSql: string = "";
if (metadata.hasMultiplePrimaryKeys) {
if (this.connection.driver instanceof AbstractSqliteDriver) {
countSql = `COUNT(DISTINCT(` + metadata.primaryColumns.map((primaryColumn, index) => {
const propertyName = this.escape(primaryColumn.databaseName);
return `${distinctAlias}.${propertyName}`;
}).join(" || ") + ")) as \"cnt\"";

} else if (this.connection.driver instanceof CockroachDriver) {
countSql = `COUNT(DISTINCT(CONCAT(` + metadata.primaryColumns.map((primaryColumn, index) => {
const propertyName = this.escape(primaryColumn.databaseName);
return `${distinctAlias}.${propertyName}::text`;
}).join(", ") + "))) as \"cnt\"";
} else if (this.connection.driver instanceof OracleDriver) {
countSql = `COUNT(DISTINCT(` + metadata.primaryColumns.map((primaryColumn, index) => {
const propertyName = this.escape(primaryColumn.databaseName);
return `${distinctAlias}.${propertyName}`;
}).join(" || ") + ")) as \"cnt\"";
} else {
countSql = `COUNT(DISTINCT(CONCAT(` + metadata.primaryColumns.map((primaryColumn, index) => {
const propertyName = this.escape(primaryColumn.databaseName);
return `${distinctAlias}.${propertyName}`;
}).join(", ") + "))) as \"cnt\"";

// If we aren't doing anything that will create a join, we can use a simpler `COUNT` instead
// so we prevent poor query patterns in the most likely cases
if (
this.expressionMap.joinAttributes.length === 0 &&
this.expressionMap.relationIdAttributes.length === 0 &&
this.expressionMap.relationCountAttributes.length === 0
) {
return "COUNT(1)";
}

// For everything else, we'll need to do some hackery to get the correct count values.

if (this.connection.driver instanceof CockroachDriver || this.connection.driver instanceof PostgresDriver) {
// Postgres and CockroachDB can pass multiple parameters to the `DISTINCT` function
// https://www.postgresql.org/docs/9.5/sql-select.html#SQL-DISTINCT
return "COUNT(DISTINCT(" +
primaryColumns.map(c => `${distinctAlias}.${this.escape(c.databaseName)}`).join(", ") +
"))";
}

if (this.connection.driver instanceof MysqlDriver) {
// MySQL & MariaDB can pass multiple parameters to the `DISTINCT` language construct
// https://mariadb.com/kb/en/count-distinct/
return "COUNT(DISTINCT " +
primaryColumns.map(c => `${distinctAlias}.${this.escape(c.databaseName)}`).join(", ") +
")";
}

if (this.connection.driver instanceof SqlServerDriver) {
// SQL Server has gotta be different from everyone else. They don't support
// distinct counting multiple columns & they don't have the same operator
// characteristic for concatenating, so we gotta use the `CONCAT` function.
// However, If it's exactly 1 column we can omit the `CONCAT` for better performance.

const columnsExpression = primaryColumns.map(
primaryColumn => `${distinctAlias}.${this.escape(primaryColumn.databaseName)}`
).join(", '|;|', ");

if (primaryColumns.length === 1) {

return `COUNT(DISTINCT(${columnsExpression}))`;
}

} else {
countSql = `COUNT(DISTINCT(` + metadata.primaryColumns.map((primaryColumn, index) => {
const propertyName = this.escape(primaryColumn.databaseName);
return `${distinctAlias}.${propertyName}`;
}).join(", ") + ")) as \"cnt\"";
return `COUNT(DISTINCT(CONCAT(${columnsExpression})))`;

}

// If all else fails, fall back to a `COUNT` and `DISTINCT` across all the primary columns concatenated.
// Per the SQL spec, this is the canonical string concatenation mechanism which is most
// likely to work across servers implementing the SQL standard.

// Please note, if there is only one primary column that the concatenation does not occur in this
// query and the query is a standard `COUNT DISTINCT` in that case.

return `COUNT(DISTINCT(` +
primaryColumns.map(c => `${distinctAlias}.${this.escape(c.databaseName)}`).join(" || '|;|' || ") +
"))";
}

protected async executeCountQuery(queryRunner: QueryRunner): Promise<number> {
const countSql = this.computeCountExpression();

const results = await this.clone()
.orderBy()
.groupBy()
.offset(undefined)
.limit(undefined)
.skip(undefined)
.take(undefined)
.select(countSql)
.select(countSql, "cnt")
.setOption("disable-global-order")
.loadRawResults(queryRunner);

Expand Down
19 changes: 19 additions & 0 deletions test/functional/query-builder/count/entity/AmbigiousPrimaryKey.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import {Entity, PrimaryColumn} from "../../../../../src";

@Entity("ambig_primary_key")
export class AmbigiousPrimaryKey {
@PrimaryColumn()
a: string;

@PrimaryColumn()
b: string;

static make({ a, b }: { a: string, b: string }): AmbigiousPrimaryKey {
const apk = new AmbigiousPrimaryKey();
apk.a = a;
apk.b = b;

return apk;
}

}
73 changes: 70 additions & 3 deletions test/functional/query-builder/count/query-builder-count.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,88 @@ import {closeTestingConnections, createTestingConnections, reloadTestingDatabase
import {Connection} from "../../../../src/connection/Connection";
import {expect} from "chai";
import {Test} from "./entity/Test";
import {AmbigiousPrimaryKey} from "./entity/AmbigiousPrimaryKey";

describe("query builder > count", () => {

let connections: Connection[];
before(async () => connections = await createTestingConnections({
entities: [Test],
entities: [Test, AmbigiousPrimaryKey],
schemaCreate: true,
dropSchema: true,
}));
beforeEach(() => reloadTestingDatabases(connections));
after(() => closeTestingConnections(connections));

it("Count query should be completed successfully", () => Promise.all(connections.map(async connection => {
const count = await connection.getRepository(Test).count();
it("Count query should of empty table should be 0", () => Promise.all(connections.map(async connection => {
const repo = connection.getRepository(Test);

const count = await repo.count();
expect(count).to.be.equal(0);
})));

it("Count query should count database values", () => Promise.all(connections.map(async connection => {
const repo = connection.getRepository(Test);

await Promise.all([
repo.save({ varcharField: 'ok', uuidField: '123e4567-e89b-12d3-a456-426614174000', intField: 4}),
repo.save({ varcharField: 'ok', uuidField: '123e4567-e89b-12d3-a456-426614174001', intField: 4}),
]);

const count = await repo.count();
expect(count).to.be.equal(2);
})));

it("Count query should handle ambiguous values", () => Promise.all(connections.map(async connection => {
const repo = connection.getRepository(AmbigiousPrimaryKey);

await Promise.all([
repo.save({ a: 'A', b: 'AAA' }),
repo.save({ a: 'AAA', b: 'A' }),
repo.save({ a: 'AA', b: 'AA' }),
repo.save({ a: 'BB', b: 'BB' }),
repo.save({ a: 'B', b: 'BBB' }),
repo.save({ a: 'BBB', b: 'B' })
]);

const count = await repo.count();
expect(count).to.be.equal(6, connection.name);
})));

it("counting joined query should count database values", () => Promise.all(connections.map(async connection => {
const repo = connection.getRepository(Test);

await Promise.all([
repo.save({ varcharField: 'ok', uuidField: '123e4567-e89b-12d3-a456-426614174000', intField: 4}),
repo.save({ varcharField: 'ok', uuidField: '123e4567-e89b-12d3-a456-426614174001', intField: 4}),
]);

const count = await repo.createQueryBuilder()
.from(Test, 'main')
.leftJoin(Test, 'self', 'self.intField = main.intField')
.getCount();

expect(count).to.be.equal(2);
})));

it("counting joined queries should handle ambiguous values", () => Promise.all(connections.map(async connection => {
const repo = connection.getRepository(AmbigiousPrimaryKey);

await Promise.all([
repo.save({ a: 'A', b: 'AAA' }),
repo.save({ a: 'AAA', b: 'A' }),
repo.save({ a: 'AA', b: 'AA' }),
repo.save({ a: 'BB', b: 'BB' }),
repo.save({ a: 'B', b: 'BBB' }),
repo.save({ a: 'BBB', b: 'B' })
]);

const count = await repo.createQueryBuilder()
.from(AmbigiousPrimaryKey, 'main')
.leftJoin(AmbigiousPrimaryKey, 'self', 'self.a = main.a')
.getCount();

expect(count).to.be.equal(6, connection.name);
})));

});

0 comments on commit 4abfb34

Please sign in to comment.