Skip to content

Commit

Permalink
Optimised email stats aggregation query for typical column usage
Browse files Browse the repository at this point in the history
ref https://linear.app/tryghost/issue/ENG-790/remove-use-of-sub-queries-in-email-analytics

- the `delivered_at` column is typically entirely/nearly entirely filled with values meaning the `IS NOT NULL` query matches a huge number of rows that MySQL has to fetch from the index to count
- using `IS NULL` switches that behaviour around as it will now match very few rows which has been shown in testing to be considerably quicker
- after switching to `IS NULL` the query returns an "undelivered" count rather than a "delivered" count, in order to keep the rest of the system behaviour the same we can calculate the delivered count by subtracting the query result from the total number of emails sent which we can fetch using a very fast primary key lookup query on the `emails` table
  • Loading branch information
kevinansfield committed Apr 3, 2024
1 parent 4f5a7b4 commit bd93bf0
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
Expand Up @@ -41,12 +41,14 @@ module.exports = {
},

async aggregateEmailStats(emailId) {
const [deliveredCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND delivered_at IS NOT NULL', [emailId]);
const {totalCount} = await db.knex('emails').select(db.knex.raw('email_count as totalCount')).where('id', emailId).first() || {totalCount: 0};
// use IS NULL here because that will typically match far fewer rows than IS NOT NULL making the query faster
const [undeliveredCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND delivered_at IS NULL', [emailId]);
const [openedCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND opened_at IS NOT NULL', [emailId]);
const [failedCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND failed_at IS NOT NULL', [emailId]);

await db.knex('emails').update({
delivered_count: deliveredCount.count,
delivered_count: totalCount - undeliveredCount.count,
opened_count: openedCount.count,
failed_count: failedCount.count
}).where('id', emailId);
Expand Down
Expand Up @@ -491,7 +491,7 @@ Object {
Object {
"created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/,
"delivered_count": 1,
"email_count": 2,
"email_count": 6,
"error": null,
"error_data": null,
"failed_count": 1,
Expand All @@ -517,7 +517,7 @@ Object {
},
Object {
"created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/,
"delivered_count": 0,
"delivered_count": 3,
"email_count": 3,
"error": "Everything went south",
"error_data": null,
Expand Down Expand Up @@ -690,7 +690,7 @@ Object {
Object {
"created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/,
"delivered_count": 1,
"email_count": 2,
"email_count": 6,
"error": null,
"error_data": null,
"failed_count": 1,
Expand Down Expand Up @@ -736,7 +736,7 @@ Object {
"emails": Array [
Object {
"created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/,
"delivered_count": 0,
"delivered_count": 3,
"email_count": 3,
"error": "Everything went south",
"error_data": null,
Expand Down
4 changes: 2 additions & 2 deletions ghost/core/test/utils/fixtures/data-generator.js
Expand Up @@ -731,7 +731,7 @@ DataGenerator.Content = {
id: ObjectId().toHexString(),
uuid: '6b6afda6-4b5e-4893-bff6-f16859e8349a',
status: 'submitted',
email_count: 2,
email_count: 6, // match the number of email_recipients relations below
recipient_filter: 'all',
subject: 'You got mailed!',
html: '<p>Look! I\'m an email</p>',
Expand All @@ -745,7 +745,7 @@ DataGenerator.Content = {
uuid: '365daa11-4bf0-4614-ad43-6346387ffa00',
status: 'failed',
error: 'Everything went south',
email_count: 3,
email_count: 3, // doesn't match the number of email_recipients relations below, some calculations may be off
subject: 'You got mailed! Again!',
html: '<p>What\'s that? Another email!</p>',
plaintext: 'yes this is an email',
Expand Down

0 comments on commit bd93bf0

Please sign in to comment.