From c8eba37afc3e6f362cc8b2d1a714700a47bbbccf Mon Sep 17 00:00:00 2001 From: Kevin Ansfield Date: Mon, 25 Mar 2024 16:39:38 +0000 Subject: [PATCH 1/2] Removed use of subqueries in email analytics queries closes https://linear.app/tryghost/issue/ENG-790/remove-use-of-sub-queries-in-email-analytics Avoiding sub queries means we don't have a process tied up for longer than necessary and we can more easily see if one of the queries is non-performant. - extracted the count queries into separate queries and used the retrieved values in the final update query - removed a query by moving the email open rate calculation into JS as we've already fetched the necessary data before that point --- .../services/email-analytics/lib/queries.js | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/ghost/core/core/server/services/email-analytics/lib/queries.js b/ghost/core/core/server/services/email-analytics/lib/queries.js index 1504e4b1220e..c641496db8a6 100644 --- a/ghost/core/core/server/services/email-analytics/lib/queries.js +++ b/ghost/core/core/server/services/email-analytics/lib/queries.js @@ -41,10 +41,14 @@ module.exports = { }, async aggregateEmailStats(emailId) { + const [deliveredCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND delivered_at IS NOT NULL', [emailId]); + const [openedCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND opened_at IS NOT NULL', [emailId]); + const [failedCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND failed_at IS NOT NULL', [emailId]); + await db.knex('emails').update({ - delivered_count: db.knex.raw(`(SELECT COUNT(id) FROM email_recipients WHERE email_id = ? AND delivered_at IS NOT NULL)`, [emailId]), - opened_count: db.knex.raw(`(SELECT COUNT(id) FROM email_recipients WHERE email_id = ? AND opened_at IS NOT NULL)`, [emailId]), - failed_count: db.knex.raw(`(SELECT COUNT(id) FROM email_recipients WHERE email_id = ? AND failed_at IS NOT NULL)`, [emailId]) + delivered_count: deliveredCount.count, + opened_count: openedCount.count, + failed_count: failedCount.count }).where('id', emailId); }, @@ -56,15 +60,16 @@ module.exports = { .where('emails.track_opens', true) .first() || {}; + const [emailCount] = await db.knex('email_recipients').count('id as count').whereRaw('member_id = ?', [memberId]); + const [emailOpenedCount] = await db.knex('email_recipients').count('id as count').whereRaw('member_id = ? AND opened_at IS NOT NULL', [memberId]); + const updateQuery = { - email_count: db.knex.raw('(SELECT COUNT(id) FROM email_recipients WHERE member_id = ?)', [memberId]), - email_opened_count: db.knex.raw('(SELECT COUNT(id) FROM email_recipients WHERE member_id = ? AND opened_at IS NOT NULL)', [memberId]) + email_count: emailCount.count, + email_opened_count: emailOpenedCount.count }; if (trackedEmailCount >= MIN_EMAIL_COUNT_FOR_OPEN_RATE) { - updateQuery.email_open_rate = db.knex.raw(` - ROUND(((SELECT COUNT(id) FROM email_recipients WHERE member_id = ? AND opened_at IS NOT NULL) * 1.0 / ? * 100), 0) - `, [memberId, trackedEmailCount]); + updateQuery.email_open_rate = Math.round(emailOpenedCount.count / trackedEmailCount * 100); } await db.knex('members') From 056937955ba174624730918360b3c383b068fdcc Mon Sep 17 00:00:00 2001 From: Kevin Ansfield Date: Tue, 2 Apr 2024 13:10:02 +0100 Subject: [PATCH 2/2] Optimised email stats aggregation query for typical column usage ref https://linear.app/tryghost/issue/ENG-790/remove-use-of-sub-queries-in-email-analytics - the `delivered_at` column is typically entirely/nearly entirely filled with values meaning the `IS NOT NULL` query matches a huge number of rows that MySQL has to fetch from the index to count - using `IS NULL` switches that behaviour around as it will now match very few rows which has been shown in testing to be considerably quicker - after switching to `IS NULL` the query returns an "undelivered" count rather than a "delivered" count, in order to keep the rest of the system behaviour the same we can calculate the delivered count by subtracting the query result from the total number of emails sent which we can fetch using a very fast primary key lookup query on the `emails` table --- .../core/server/services/email-analytics/lib/queries.js | 6 ++++-- .../test/e2e-api/admin/__snapshots__/emails.test.js.snap | 8 ++++---- ghost/core/test/utils/fixtures/data-generator.js | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ghost/core/core/server/services/email-analytics/lib/queries.js b/ghost/core/core/server/services/email-analytics/lib/queries.js index c641496db8a6..fbe2019fdc3a 100644 --- a/ghost/core/core/server/services/email-analytics/lib/queries.js +++ b/ghost/core/core/server/services/email-analytics/lib/queries.js @@ -41,12 +41,14 @@ module.exports = { }, async aggregateEmailStats(emailId) { - const [deliveredCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND delivered_at IS NOT NULL', [emailId]); + const {totalCount} = await db.knex('emails').select(db.knex.raw('email_count as totalCount')).where('id', emailId).first() || {totalCount: 0}; + // use IS NULL here because that will typically match far fewer rows than IS NOT NULL making the query faster + const [undeliveredCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND delivered_at IS NULL', [emailId]); const [openedCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND opened_at IS NOT NULL', [emailId]); const [failedCount] = await db.knex('email_recipients').count('id as count').whereRaw('email_id = ? AND failed_at IS NOT NULL', [emailId]); await db.knex('emails').update({ - delivered_count: deliveredCount.count, + delivered_count: totalCount - undeliveredCount.count, opened_count: openedCount.count, failed_count: failedCount.count }).where('id', emailId); diff --git a/ghost/core/test/e2e-api/admin/__snapshots__/emails.test.js.snap b/ghost/core/test/e2e-api/admin/__snapshots__/emails.test.js.snap index 37f8f6f0aabd..c6f86209e5ad 100644 --- a/ghost/core/test/e2e-api/admin/__snapshots__/emails.test.js.snap +++ b/ghost/core/test/e2e-api/admin/__snapshots__/emails.test.js.snap @@ -491,7 +491,7 @@ Object { Object { "created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/, "delivered_count": 1, - "email_count": 2, + "email_count": 6, "error": null, "error_data": null, "failed_count": 1, @@ -517,7 +517,7 @@ Object { }, Object { "created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/, - "delivered_count": 0, + "delivered_count": 3, "email_count": 3, "error": "Everything went south", "error_data": null, @@ -690,7 +690,7 @@ Object { Object { "created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/, "delivered_count": 1, - "email_count": 2, + "email_count": 6, "error": null, "error_data": null, "failed_count": 1, @@ -736,7 +736,7 @@ Object { "emails": Array [ Object { "created_at": StringMatching /\\\\d\\{4\\}-\\\\d\\{2\\}-\\\\d\\{2\\}T\\\\d\\{2\\}:\\\\d\\{2\\}:\\\\d\\{2\\}\\\\\\.000Z/, - "delivered_count": 0, + "delivered_count": 3, "email_count": 3, "error": "Everything went south", "error_data": null, diff --git a/ghost/core/test/utils/fixtures/data-generator.js b/ghost/core/test/utils/fixtures/data-generator.js index d671503b014b..be1a079d808d 100644 --- a/ghost/core/test/utils/fixtures/data-generator.js +++ b/ghost/core/test/utils/fixtures/data-generator.js @@ -731,7 +731,7 @@ DataGenerator.Content = { id: ObjectId().toHexString(), uuid: '6b6afda6-4b5e-4893-bff6-f16859e8349a', status: 'submitted', - email_count: 2, + email_count: 6, // match the number of email_recipients relations below recipient_filter: 'all', subject: 'You got mailed!', html: '

Look! I\'m an email

', @@ -745,7 +745,7 @@ DataGenerator.Content = { uuid: '365daa11-4bf0-4614-ad43-6346387ffa00', status: 'failed', error: 'Everything went south', - email_count: 3, + email_count: 3, // doesn't match the number of email_recipients relations below, some calculations may be off subject: 'You got mailed! Again!', html: '

What\'s that? Another email!

', plaintext: 'yes this is an email',