-
Notifications
You must be signed in to change notification settings - Fork 2
/
make_issues_csvs.py
117 lines (81 loc) · 4.14 KB
/
make_issues_csvs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import snyk
import datetime
import os
import pandas as pd
def process_org(org, client, date_path, the_date):
org_id = org.id
org_path = os.path.join(date_path,f'org-{org_id}')
if os.path.isdir(org_path) is not True:
os.mkdir(org_path)
# This currently just gets all projects in an org, but the snyk library supports filtering by tags
# https://github.com/snyk-labs/pysnyk#projects
projects = client.organizations.get(org_id).projects.all()
for p in projects:
update_project_issues(p.id, client, org_id, org_path, the_date)
def update_project_issues(p_id, client, org_id, o_path, the_date):
project_csv = os.path.join(o_path,f'{p_id}.csv')
if os.path.isfile(project_csv):
print(f'{datetime.datetime.now()}: Issues already cached for {p_id} from {org_id}')
else:
print(f'{datetime.datetime.now()}: Getting issues for {p_id} from {org_id}')
save_project_issues(p_id, client, org_id, project_csv, the_date)
def save_project_issues(p_id, client, org_id, project_csv, the_date):
i_filter = {'filters':{'orgs':[org_id],'projects':[p_id]}}
# this lets us get a total issue count so we can do pagination
# this is using the low level client of the snyk library
# https://github.com/snyk-labs/pysnyk#low-level-client
# combined with our reporting api endpoint:
# https://snyk.docs.apiary.io/#reference/reporting-api/issues/get-list-of-issues
ireq = client.post(f'reporting/issues/?from={the_date}&to={the_date}&page=1&perPage=1&sortBy=issueTitle&order=asc&groupBy=issue', i_filter)
total = ireq.json()['total']
per_page = 250
page_count = (total // per_page) + 1
df = pd.DataFrame()
for x in range(1,page_count+1):
req = client.post(f'reporting/issues/?from={the_date}&to={the_date}&page={x}&perPage={per_page}&sortBy=issueTitle&order=asc&groupBy=issue', i_filter)
results = req.json()['results']
for y in results:
y['project.repo'] = y['projects'][0]['name'].split(':')[0]
y.update({f'project.{k}':v for k,v in y['projects'][0].items()})
y.pop('projects',None)
y.update({f'issue.{k}':v for k,v in y['issue'].items()})
y.pop('issue',None)
y['issue.isFixed'] = y['isFixed']
y.pop('isFixed',None)
y['issue.introducedDate'] = y['introducedDate']
y.pop('introducedDate',None)
df = df.append(pd.DataFrame.from_dict(results))
#project_issues.extend(results)
df.reset_index(drop=True, inplace=True)
if df.empty:
print(f'{datetime.datetime.now()}: No issues for {p_id} from {org_id}')
else:
print(f'{datetime.datetime.now()}: Saving issues from {p_id} to {project_csv}')
print(df)
df.to_csv(project_csv,index=False)
snyktoken = os.environ['SNYK_TOKEN']
snykgroup = os.environ['SNYK_GROUP']
# this sets the session to include retries in case of api timeouts etc
client = snyk.SnykClient(snyktoken, tries=3, delay=1, backoff=1)
yesterday = datetime.datetime.now() - datetime.timedelta(days = 1)
the_date = yesterday.strftime('%Y-%m-%d')
print(f'{datetime.datetime.now()}: Gathering all issues for all orgs with group ID: {snykgroup} for ({the_date})')
# Gets current working directory
cwd_path = os.getcwd()
output_dir = os.environ.get('SNYK_OUTPUT_DIR', os.path.join(cwd_path, 'output'))
if os.path.isdir(output_dir) is not True:
os.mkdir(output_dir)
date_path = os.path.join(output_dir, the_date)
if os.path.isdir(date_path) is not True:
os.mkdir(date_path)
group_path = os.path.join(date_path, f'group-{snykgroup}')
if os.path.isdir(group_path) is not True:
os.mkdir(group_path)
# remove the phantom orgs that are really the groups
orgs = [ y for y in client.organizations.all() if hasattr(y.group,'id') ]
# remove orgs that don't match the snykgroup
orgs = [ y for y in orgs if y.group.id == snykgroup ]
for org in orgs:
process_org(org, client, group_path, the_date)
print(f'{datetime.datetime.now()}: Completed gathering issues into folder: {group_path}')
print(f'{datetime.datetime.now()}: To combine into one CSV run `python join_csv.py {group_path}`')