Skip to content

Commit 95b8da9

Browse files
authoredDec 10, 2021
fix(logs): log retention fails with OperationAbortedException (#17688)
Fixes: #17546 This adds to the fix in #16083 that was addressing the issue where the LogRetention Lambda can be executed concurrently and create a race condition where multiple invocations are trying to create or modify the same log group. The previous fix addressed the issue if it occurred during log group creation, in the `createLogGroupSafe` method, but did not account for the same problem happening when modifying a log group's retention period in the `setRetentionPolicy` method. This fix applies the same logic from the last fix to the other method. ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 3d64f9b commit 95b8da9

File tree

2 files changed

+233
-13
lines changed

2 files changed

+233
-13
lines changed
 

‎packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts

+32-10
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options
4646
throw new Error('Out of attempts to create a logGroup');
4747
}
4848
}
49-
// Any other error
50-
console.error(error);
5149
throw error;
5250
}
5351
} while (true); // exit happens on retry count check
@@ -62,12 +60,36 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options
6260
* @param retentionInDays the number of days to retain the log events in the specified log group.
6361
*/
6462
async function setRetentionPolicy(logGroupName: string, region?: string, options?: SdkRetryOptions, retentionInDays?: number) {
65-
const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options });
66-
if (!retentionInDays) {
67-
await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise();
68-
} else {
69-
await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise();
70-
}
63+
// The same as in createLogGroupSafe(), here we could end up with the race
64+
// condition where a log group is either already being created or its retention
65+
// policy is being updated. This would result in an OperationAbortedException,
66+
// which we will try to catch and retry the command a number of times before failing
67+
let retryCount = options?.maxRetries == undefined ? 10 : options.maxRetries;
68+
const delay = options?.retryOptions?.base == undefined ? 10 : options.retryOptions.base;
69+
do {
70+
try {
71+
const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options });
72+
if (!retentionInDays) {
73+
await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise();
74+
} else {
75+
await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise();
76+
}
77+
return;
78+
79+
} catch (error) {
80+
if (error.code === 'OperationAbortedException') {
81+
if (retryCount > 0) {
82+
retryCount--;
83+
await new Promise(resolve => setTimeout(resolve, delay));
84+
continue;
85+
} else {
86+
// The log group is still being created by another execution but we are out of retries
87+
throw new Error('Out of attempts to create a logGroup');
88+
}
89+
}
90+
throw error;
91+
}
92+
} while (true); // exit happens on retry count check
7193
}
7294

7395
export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent, context: AWSLambda.Context) {
@@ -92,10 +114,10 @@ export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent
92114
// Set a retention policy of 1 day on the logs of this very function.
93115
// Due to the async nature of the log group creation, the log group for this function might
94116
// still be not created yet at this point. Therefore we attempt to create it.
95-
// In case it is being created, createLogGroupSafe will handle the conflic.
117+
// In case it is being created, createLogGroupSafe will handle the conflict.
96118
const region = process.env.AWS_REGION;
97119
await createLogGroupSafe(`/aws/lambda/${context.functionName}`, region, retryOptions);
98-
// If createLogGroupSafe fails, the log group is not created even after multiple attempts
120+
// If createLogGroupSafe fails, the log group is not created even after multiple attempts.
99121
// In this case we have nothing to set the retention policy on but an exception will skip
100122
// the next line.
101123
await setRetentionPolicy(`/aws/lambda/${context.functionName}`, region, retryOptions, 1);

‎packages/@aws-cdk/aws-logs/test/log-retention-provider.test.ts

+201-3
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ describe('log retention provider', () => {
238238

239239
});
240240

241-
test('does not if when operations on provider log group fails', async () => {
241+
test('succeeds when createLogGroup for provider log group returns OperationAbortedException twice', async () => {
242242
let attempt = 2;
243243
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
244244
if (params.logGroupName === '/aws/lambda/provider') {
@@ -280,7 +280,7 @@ describe('log retention provider', () => {
280280

281281
});
282282

283-
test('does not fail if operations on CDK lambda log group fails twice', async () => {
283+
test('succeeds when createLogGroup for CDK lambda log group returns OperationAbortedException twice', async () => {
284284
let attempt = 2;
285285
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
286286
if (params.logGroupName === 'group') {
@@ -322,7 +322,7 @@ describe('log retention provider', () => {
322322

323323
});
324324

325-
test('does fail if operations on CDK lambda log group fails indefinitely', async () => {
325+
test('fails when createLogGroup for CDK lambda log group fails with OperationAbortedException indefinitely', async () => {
326326
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
327327
if (params.logGroupName === 'group') {
328328
return Promise.reject(new MyError(
@@ -356,6 +356,204 @@ describe('log retention provider', () => {
356356
expect(request.isDone()).toEqual(true);
357357

358358

359+
});
360+
361+
test('succeeds when putRetentionPolicy for provider log group returns OperationAbortedException twice', async () => {
362+
let attempt = 2;
363+
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
364+
if (params.logGroupName === '/aws/lambda/provider') {
365+
if (attempt > 0) {
366+
attempt--;
367+
return Promise.reject(new MyError(
368+
'A conflicting operation is currently in progress against this resource. Please try again.',
369+
'OperationAbortedException'));
370+
} else {
371+
return Promise.resolve({});
372+
}
373+
}
374+
return Promise.resolve({});
375+
};
376+
377+
const createLogGroupFake = sinon.fake.resolves({});
378+
const deleteRetentionPolicyFake = sinon.fake.resolves({});
379+
380+
AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
381+
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
382+
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);
383+
384+
const event = {
385+
...eventCommon,
386+
RequestType: 'Create',
387+
ResourceProperties: {
388+
ServiceToken: 'token',
389+
RetentionInDays: '30',
390+
LogGroupName: 'group',
391+
},
392+
};
393+
394+
const request = createRequest('SUCCESS');
395+
396+
await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);
397+
398+
expect(request.isDone()).toEqual(true);
399+
400+
401+
});
402+
403+
test('succeeds when putRetentionPolicy for CDK lambda log group returns OperationAbortedException twice', async () => {
404+
let attempt = 2;
405+
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
406+
if (params.logGroupName === 'group') {
407+
if (attempt > 0) {
408+
attempt--;
409+
return Promise.reject(new MyError(
410+
'A conflicting operation is currently in progress against this resource. Please try again.',
411+
'OperationAbortedException'));
412+
} else {
413+
return Promise.resolve({});
414+
}
415+
}
416+
return Promise.resolve({});
417+
};
418+
419+
const createLogGroupFake = sinon.fake.resolves({});
420+
const deleteRetentionPolicyFake = sinon.fake.resolves({});
421+
422+
AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
423+
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
424+
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);
425+
426+
const event = {
427+
...eventCommon,
428+
RequestType: 'Create',
429+
ResourceProperties: {
430+
ServiceToken: 'token',
431+
RetentionInDays: '30',
432+
LogGroupName: 'group',
433+
},
434+
};
435+
436+
const request = createRequest('SUCCESS');
437+
438+
await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);
439+
440+
expect(request.isDone()).toEqual(true);
441+
442+
443+
});
444+
445+
test('fails when putRetentionPolicy for CDK lambda log group fails with OperationAbortedException indefinitely', async () => {
446+
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
447+
if (params.logGroupName === 'group') {
448+
return Promise.reject(new MyError(
449+
'A conflicting operation is currently in progress against this resource. Please try again.',
450+
'OperationAbortedException'));
451+
}
452+
return Promise.resolve({});
453+
};
454+
455+
const createLogGroupFake = sinon.fake.resolves({});
456+
const deleteRetentionPolicyFake = sinon.fake.resolves({});
457+
458+
AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
459+
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
460+
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);
461+
462+
const event = {
463+
...eventCommon,
464+
RequestType: 'Create',
465+
ResourceProperties: {
466+
ServiceToken: 'token',
467+
RetentionInDays: '30',
468+
LogGroupName: 'group',
469+
},
470+
};
471+
472+
const request = createRequest('FAILED');
473+
474+
await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);
475+
476+
expect(request.isDone()).toEqual(true);
477+
478+
479+
});
480+
481+
test('succeeds when deleteRetentionPolicy for provider log group returns OperationAbortedException twice', async () => {
482+
let attempt = 2;
483+
const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
484+
if (params.logGroupName === '/aws/lambda/provider') {
485+
if (attempt > 0) {
486+
attempt--;
487+
return Promise.reject(new MyError(
488+
'A conflicting operation is currently in progress against this resource. Please try again.',
489+
'OperationAbortedException'));
490+
} else {
491+
return Promise.resolve({});
492+
}
493+
}
494+
return Promise.resolve({});
495+
};
496+
497+
const createLogGroupFake = sinon.fake.resolves({});
498+
const putRetentionPolicyFake = sinon.fake.resolves({});
499+
500+
AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
501+
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
502+
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);
503+
504+
const event = {
505+
...eventCommon,
506+
RequestType: 'Create',
507+
ResourceProperties: {
508+
ServiceToken: 'token',
509+
RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy
510+
LogGroupName: 'group',
511+
},
512+
};
513+
514+
const request = createRequest('SUCCESS');
515+
516+
await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);
517+
518+
expect(request.isDone()).toEqual(true);
519+
520+
521+
});
522+
523+
test('fails when deleteRetentionPolicy for provider log group fails with OperationAbortedException indefinitely', async () => {
524+
const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
525+
if (params.logGroupName === 'group') {
526+
return Promise.reject(new MyError(
527+
'A conflicting operation is currently in progress against this resource. Please try again.',
528+
'OperationAbortedException'));
529+
}
530+
return Promise.resolve({});
531+
};
532+
533+
const createLogGroupFake = sinon.fake.resolves({});
534+
const putRetentionPolicyFake = sinon.fake.resolves({});
535+
536+
AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
537+
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
538+
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);
539+
540+
const event = {
541+
...eventCommon,
542+
RequestType: 'Create',
543+
ResourceProperties: {
544+
ServiceToken: 'token',
545+
RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy
546+
LogGroupName: 'group',
547+
},
548+
};
549+
550+
const request = createRequest('FAILED');
551+
552+
await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);
553+
554+
expect(request.isDone()).toEqual(true);
555+
556+
359557
});
360558

361559
test('response data contains the log group name', async () => {

0 commit comments

Comments
 (0)
Please sign in to comment.