Skip to content

Commit

Permalink
Utility for listing the Bug IDs for a source in Datastore (#2212)
Browse files Browse the repository at this point in the history
Helpful for large-scale data operations and comparing between staging
and production.

Borrowed heavily from reimport_gcs_record.py
  • Loading branch information
andrewpollock committed May 19, 2024
1 parent 42b2c3b commit df6bd72
Showing 1 changed file with 57 additions and 0 deletions.
57 changes: 57 additions & 0 deletions tools/datafix/list_ids_for_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python3
"""Utility to list the Bug IDs for all of the PROCESSED Bugs in Cloud Datastore.
Cloud Datastore lacks a CLI for issuing queries, and sometimes it's helpful to
reason about all of the Bug IDs present for a given a source.
"""

from google.cloud import datastore
from google.cloud.datastore.query import PropertyFilter

import argparse

MAX_BATCH_SIZE = 500


def main() -> None:
parser = argparse.ArgumentParser(
description="Query the identifiers for the Bugs in Datastore "
"in a PROCESSED state for a given source")
parser.add_argument(
"--verbose",
action=argparse.BooleanOptionalAction,
dest="verbose",
default=False,
help="Be more verbose")
parser.add_argument(
"--project",
action="store",
dest="project",
default="oss-vdb-test",
help="GCP project to operate on")
parser.add_argument(
"--source_id",
action="store",
dest="source_id",
default="cve-osv",
help="the source_id to filter on")
args = parser.parse_args()

ds_client = datastore.Client(project=args.project)

query = ds_client.query(kind="Bug")
query.add_filter(filter=PropertyFilter("source", "=", args.source_id))
query.add_filter(filter=PropertyFilter("status", "=", 1))
print(f"Running query {query.filters} "
f"on {query.kind} (in {query.project})...")
result = list(query.fetch())
print(f"Retrieved {len(result)} bugs")

# Chunk the results to modify in acceptibly sized batches for the API.
for batch in range(0, len(result), MAX_BATCH_SIZE):
for bug in result[batch:batch + MAX_BATCH_SIZE]:
print(f"{bug['db_id']}")


if __name__ == "__main__":
main()

0 comments on commit df6bd72

Please sign in to comment.