Skip to content

Commit

Permalink
Fix milestone check script to paginate through large result sets.
Browse files Browse the repository at this point in the history
We still cannot retrieve more than 250 commits for a PR, so exceptionally large PRs such as jupyterlab#5508 will still need some special-casing.
  • Loading branch information
jasongrout committed Feb 2, 2019
1 parent 7b54559 commit e29339e
Showing 1 changed file with 107 additions and 30 deletions.
137 changes: 107 additions & 30 deletions scripts/milestone_check.py
Expand Up @@ -15,10 +15,12 @@
print('Error: set the environment variable GITHUB_TOKEN to a GitHub authentication token (see https://github.com/settings/tokens)')
exit(1)

MILESTONE=18
MILESTONE='1.0'

ranges = {
18: 'origin/master --not origin/0.34.x' #0.35.0
18: 'origin/0.35.0 --not origin/0.34.x', #0.35.0
20: 'origin/0.35.x --not v0.35.0', #0.35.x
'1.0': 'origin/master --not origin/0.35.x',
}

out = subprocess.run("git log {} --format='%H,%cE,%s'".format(ranges[MILESTONE]), shell=True, encoding='utf8', stdout=subprocess.PIPE)
Expand All @@ -27,47 +29,122 @@

url = 'https://api.github.com/graphql'
json = { 'query' : """
query test($milestone: Int!) {
repository(owner:"jupyterlab" name:"jupyterlab") {
milestone(number:$milestone) {
query test($cursor: String) {
search(first: 50, after: $cursor, type: ISSUE, query: "repo:jupyterlab/jupyterlab milestone:%s is:pr is:merged ") {
issueCount
pageInfo {
endCursor
hasNextPage
}
nodes {
... on PullRequest {
title
pullRequests(first:100 states:[MERGED]) {
number
mergeCommit {
oid
}
commits(first: 100) {
totalCount
nodes {
title
number
mergeCommit {
commit {
oid
}
commits(first:100) {
nodes {
commit {
oid
}
}
}
}
}
}
}
}
""",
'variables': {
'milestone': MILESTONE
}
}
}
"""%MILESTONE,
'variables': {
'cursor': None
}
}

headers = {'Authorization': 'token %s' % api_token}

r = requests.post(url=url, json=json, headers=headers)
milestone_data = r.json()['data']['repository']['milestone']
pr_list = milestone_data['pullRequests']['nodes']

headers = {'Authorization': 'token %s' % api_token}
# construct a commit to PR dictionary
prs = {}
for pr in pr_list:
prs[pr['number']] = {'mergeCommit': pr['mergeCommit']['oid'],
'commits': set(i['commit']['oid'] for i in pr['commits']['nodes'])}

large_prs = []
cursor = None
while True:
json['variables']['cursor'] = cursor
r = requests.post(url=url, json=json, headers=headers)
results = r.json()['data']['search']
total_prs = results['issueCount']

pr_list = results['nodes']
for pr in pr_list:
if pr['commits']['totalCount'] > 100:
large_prs.append(pr['number'])
print('Large PR, fetching commits individually: %s'%pr['number'])
continue
# TODO fetch commits
prs[pr['number']] = {'mergeCommit': pr['mergeCommit']['oid'],
'commits': set(i['commit']['oid'] for i in pr['commits']['nodes'])}

has_next_page = results['pageInfo']['hasNextPage']
cursor = results['pageInfo']['endCursor']

if not has_next_page:
break

prjson = {'query': """
query test($pr:Int!, $cursor: String) {
repository(owner: "jupyterlab", name: "jupyterlab") {
pullRequest(number: $pr) {
title
number
mergeCommit {
oid
}
commits(first: 100, after: $cursor) {
totalCount
pageInfo {
endCursor
hasNextPage
}
nodes {
commit {
oid
}
}
}
}
}
}
""", 'variables': {
'pr': None,
'cursor': None
}}

for prnumber in large_prs:
prjson['variables']['pr']=prnumber
pr_commits = set()
while True:
r = requests.post(url=url, json=prjson, headers=headers)
pr = r.json()['data']['repository']['pullRequest']
assert pr['number']==prnumber
total_commits = pr['commits']['totalCount']
pr_commits.update(i['commit']['oid'] for i in pr['commits']['nodes'])
has_next_page = results['pageInfo']['hasNextPage']
cursor = results['pageInfo']['endCursor']

if not pr['commits']['pageInfo']['hasNextPage']:
break
prjson['variables']['cursor'] = pr['commits']['pageInfo']['endCursor']

prs[prnumber] = {'mergeCommit': pr['mergeCommit']['oid'],
'commits': pr_commits}
if total_commits > len(pr_commits):
print("WARNING: PR %d has %d commits, but GitHub is only giving us %d of them"%(prnumber, total_commits, len(pr_commits)))



# Check we got all PRs
assert len(prs) == total_prs

# Reverse dictionary
commits_to_prs={}
for key,value in prs.items():
Expand All @@ -86,7 +163,7 @@

prs_not_represented = set(prs.keys()) - good

print("Milestone: %s, %d merged PRs"%(milestone_data['title'], len(milestone_data['pullRequests']['nodes'])))
print("Milestone: %s, %d merged PRs"%(MILESTONE, total_prs))
print("""
PRs that are in the milestone, but have no commits in the version range.
These PRs probably belong in a different milestone.
Expand Down

0 comments on commit e29339e

Please sign in to comment.