-
Notifications
You must be signed in to change notification settings - Fork 903
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Delete Container Registry images left after Functions deployment #3439
Changes from 2 commits
74d8960
ee75643
dba3a0d
b5c65ad
3fb4424
e18b3d4
fb5a5d7
b245331
8b9b383
72b4267
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
// This code is very aggressive about running requests in parallel and does not use | ||
// a task queue, because the quota limits for GCR.io are absurdly high. At the time | ||
// of writing, we can make 50K requests per 10m. | ||
// https://cloud.google.com/container-registry/quotas | ||
|
||
import * as clc from "cli-color"; | ||
|
||
import { logger } from "../../logger"; | ||
import * as gcr from "../../gcp/containerregistry"; | ||
import * as backend from "./backend"; | ||
import * as utils from "../../utils"; | ||
|
||
// A flattening of container_registry_hosts and | ||
// region_multiregion_map from regionconfig.borg | ||
const SUBDOMAIN_MAPPING: Record<string, string> = { | ||
"us-west2": "us", | ||
"us-west3": "us", | ||
"us-west4": "us", | ||
"us-central1": "us", | ||
"us-central2": "us", | ||
"us-east1": "us", | ||
"us-east4": "us", | ||
"northamerica-northeast1": "us", | ||
"southamerica-east1": "us", | ||
"europe-west1": "eu", | ||
"europe-west2": "eu", | ||
"europe-west3": "eu", | ||
"europe-west5": "eu", | ||
"europe-west6": "eu", | ||
"europe-central2": "eu", | ||
"asia-east1": "asia", | ||
"asia-east2": "asia", | ||
"asia-northeast1": "asia", | ||
"asia-northeast2": "asia", | ||
"asia-northeast3": "asia", | ||
"asia-south1": "asia", | ||
"asia-southeast2": "asia", | ||
"australia-southeast1": "asia", | ||
}; | ||
|
||
export async function cleanupBuildImages(functions: backend.FunctionSpec[]): Promise<void> { | ||
utils.logBullet(clc.bold.cyan("functions: ") + "cleaning up build files..."); | ||
const gcrCleaner = new ContainerRegistryCleaner(); | ||
try { | ||
await Promise.all(functions.map((func) => gcrCleaner.cleanupFunction(func))); | ||
} catch (err) { | ||
logger.debug("Failed to delete container registry artifacts with error", err); | ||
utils.logLabeledWarning( | ||
"functions", | ||
"Unhnandled error cleaning up build files. This could result in a small monthly bill if not corrected" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo in unhandled There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
); | ||
} | ||
|
||
// TODO: clean up Artifact Registry images as well. | ||
} | ||
|
||
export class ContainerRegistryCleaner { | ||
readonly helpers: Record<string, ContainerRegistryHelper> = {}; | ||
|
||
private helper(location: string): ContainerRegistryHelper { | ||
const subdomain = SUBDOMAIN_MAPPING[location] || "us"; | ||
if (!this.helpers[subdomain]) { | ||
this.helpers[subdomain] = new ContainerRegistryHelper(subdomain); | ||
} | ||
return this.helpers[subdomain]; | ||
} | ||
|
||
// GCFv1 has the directory structure: | ||
// gcf/ | ||
// +- <region>/ | ||
// +- <uuid> | ||
// +- <hash> (tags: <FuncName>_version-<#>) | ||
// +- cache/ (Only present in first deploy of region) | ||
// | +- <hash> (tags: latest) | ||
// +- worker/ (Only present in first deploy of region) | ||
// +- <hash> (tags: latest) | ||
// | ||
// We'll parallel search for the valid <uuid> and their children | ||
// until we find one with the right tag for the function name. | ||
// The underlying Helper's caching should make this expensive for | ||
// the first function and free for the next functions in the same | ||
// region. | ||
async cleanupFunction(func: backend.FunctionSpec): Promise<void> { | ||
const helper = this.helper(func.region); | ||
const uuids = (await helper.ls(`${func.project}/gcf/${func.region}`)).children; | ||
|
||
const uuidTags: Record<string, string[]> = {}; | ||
const loadUuidTags: Promise<void>[] = []; | ||
for (const uuid of uuids) { | ||
loadUuidTags.push( | ||
(async () => { | ||
const path = `${func.project}/gcf/${func.region}/${uuid}`; | ||
const tags = (await helper.ls(path)).tags; | ||
uuidTags[path] = tags; | ||
})() | ||
); | ||
} | ||
await Promise.all(loadUuidTags); | ||
|
||
const extractFunction = /^(.*)_version-\d+$/; | ||
const entry = Object.entries(uuidTags).find(([, tags]) => { | ||
joehan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return tags.find((tag) => { | ||
const match = tag.match(extractFunction); | ||
return match && match[1] === func.id; | ||
}); | ||
}); | ||
|
||
if (!entry) { | ||
logger.debug("Could not find image for function", backend.functionName(func)); | ||
return; | ||
} | ||
await helper.rm(entry[0]); | ||
} | ||
} | ||
|
||
export interface Stat { | ||
children: string[]; | ||
digests: gcr.Digest[]; | ||
tags: gcr.Tag[]; | ||
} | ||
|
||
export class ContainerRegistryHelper { | ||
readonly client: gcr.Client; | ||
readonly cache: Record<string, Stat> = {}; | ||
|
||
constructor(subdomain: string) { | ||
this.client = new gcr.Client(subdomain); | ||
} | ||
|
||
async ls(path: string): Promise<Stat> { | ||
if (!this.cache[path]) { | ||
const raw = await this.client.listTags(path); | ||
this.cache[path] = { | ||
tags: raw.tags, | ||
digests: Object.keys(raw.manifest), | ||
children: raw.child, | ||
}; | ||
} | ||
return this.cache[path]; | ||
} | ||
|
||
async rm(path: string): Promise<void> { | ||
const stat = await this.ls(path); | ||
const deleteChildren: Promise<void>[] = []; | ||
const recursive = stat.children.map((child) => this.rm(`${path}/${child}`)); | ||
// Let children ("directories") be cleaned up in parallel while we clean | ||
// up the "files" in this location. | ||
|
||
const deleteTags = stat.tags.map((tag) => this.client.deleteTag(path, tag)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to delete all tags before starting to delete the images? If not, we could combine these await Promise.all()'s and do them in parallel There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm also a little concerned about how this behaves if just one call fails - for example, if the first Promise.all rejects, this errors and we never try to deleteImages... but we already made our recursive call, so that has started but will get 'cut off' at some arbitrary point. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good to me - this is definitely a tricky one to handle and it seems likely that if one call fails, the rest are high likelihood to as well. I feel better about this strategy tho. |
||
await Promise.all(deleteTags); | ||
stat.tags = []; | ||
|
||
const deleteImages = stat.digests.map((digest) => this.client.deleteImage(path, digest)); | ||
await Promise.all(deleteImages); | ||
stat.digests = []; | ||
|
||
await Promise.all(recursive); | ||
stat.children = []; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
// Note: unlike Google APIs, the documentation for the GCR API is | ||
// actually the Docker REST API. This can be found at | ||
// https://docs.docker.com/registry/spec/api/ | ||
// This API is _very_ complex in its entirety and is very subtle (e.g. tags and digests | ||
// are both strings and can both be put in the same route to get completely different | ||
// response document types). | ||
// This file will only implement a minimal subset as needed. | ||
import { FirebaseError } from "../error"; | ||
import { containerRegistryDomain } from "../api"; | ||
import * as api from "../apiv2"; | ||
|
||
// A Digest is a string in the format <algorithm>:<hex>. For example: | ||
// sha256:146d8c9dff0344fb01417ef28673ed196e38215f3c94837ae733d3b064ba439e | ||
export type Digest = string; | ||
export type Tag = string; | ||
|
||
export interface Tags { | ||
name: string; | ||
tags: string[]; | ||
|
||
// These fields are not documented in the Docker API but are | ||
// present in the GCR API. | ||
manifest: Record<Digest, ImageInfo>; | ||
child: string[]; | ||
} | ||
|
||
export interface ImageInfo { | ||
// times are string milliseconds | ||
timeCreatedMs: string; | ||
timeUploadedMs: string; | ||
tag: string[]; | ||
mediaType: string; | ||
imageSizeBytes: string; | ||
layerId: string; | ||
} | ||
|
||
interface ErrorsResponse { | ||
errors?: { | ||
code: string; | ||
message: string; | ||
details: unknown; | ||
}[]; | ||
} | ||
|
||
function isErrors(response: unknown): response is ErrorsResponse { | ||
return Object.prototype.hasOwnProperty.bind(response)("errors"); | ||
} | ||
|
||
const API_VERSION = "v2"; | ||
|
||
export class Client { | ||
readonly client: api.Client; | ||
|
||
constructor(subdomain?: string) { | ||
let origin: string; | ||
if (subdomain) { | ||
origin = `https://${subdomain}.${containerRegistryDomain}`; | ||
} else { | ||
origin = `https://${containerRegistryDomain}`; | ||
} | ||
this.client = new api.Client({ | ||
apiVersion: API_VERSION, | ||
auth: true, | ||
urlPrefix: origin, | ||
}); | ||
} | ||
|
||
async listTags(path: string): Promise<Tags> { | ||
const response = await this.client.get<Tags | ErrorsResponse>(`${path}/tags/list`); | ||
if (isErrors(response.body)) { | ||
throw new FirebaseError(`Failed to list GCR tags at ${path}`, { | ||
children: response.body.errors, | ||
}); | ||
} | ||
return response.body; | ||
} | ||
|
||
async deleteTag(path: string, tag: Tag): Promise<void> { | ||
const response = await this.client.delete<ErrorsResponse>(`${path}/manifests/${tag}`); | ||
if (response.body.errors?.length != 0) { | ||
throw new FirebaseError(`Failed to delete tag ${tag} at path ${path}`, { | ||
children: response.body.errors, | ||
}); | ||
} | ||
} | ||
|
||
async deleteImage(path: string, digest: Digest): Promise<void> { | ||
const response = await this.client.delete<ErrorsResponse>(`${path}/manifests/${digest}`); | ||
if (response.body.errors?.length != 0) { | ||
throw new FirebaseError(`Failed to delete image ${digest} at path ${path}`, { | ||
children: response.body.errors, | ||
}); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be great to include a console link to where you can clean these up, plus a list of the files that were not cleaned up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. I've split the difference between what I think you're asking for and what I think I can reliably deliver (since I can't do a recursive LS to find all files that need to be deleted when I can't trust API calls to succeed).
I've made the code try to resume from as many errors as possible by catching and throwing at the end of each process. Since this can lead to multiple errors, I throw a random error but log all errors. When there are any errors, I print the top-level directory for GCF's images in that multi-region. I have a list format for 2 or more regions and an inline format for one region. I've manually tested a case where there was: