Skip to content

Commit

Permalink
Reenable download-less dataset generation
Browse files Browse the repository at this point in the history
New filerepo query provides MD5 sums.
  • Loading branch information
mih committed Apr 26, 2022
1 parent a5b6437 commit 129a448
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
2 changes: 1 addition & 1 deletion datalad_ebrains/kg2ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def process_revision(ds, rev_id, rev_record, auth_token):
# 'ebrains_last_modified={last_modified}',
# 'ebrain_last_modification_userid={last_modifier}',
),
#fast=True,
fast=True,
save=False,
result_renderer='disabled',
return_type='generator',
Expand Down
44 changes: 27 additions & 17 deletions datalad_ebrains/kg_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,32 @@ def _filerec2annexrec(rec, baseurl):
# filerepo 'prefix' and any subdirectories. Both are important to
# get the internal dataset organization right

# make sure to strip any leading '/' to ensure a relative path
# TODO double-check windows FS semantics
rpath = iri[len(baseurl):].lstrip('/')

return dict(
props = dict(
url=iri,
name=rpath,
# there is no such thing in the query ATM
#md5sum=rec['https://schema.hbp.eu/myQuery/hash'],
# TODO confirm unit rec['size']['unit'] == 'byte'
size=rec['size']['value'],
# TODO seems to give a mime type, confirm
content_type=rec['format']['fullName']
# TODO can we get a modification time?
# can we get the entity that last modified this file record
#last_modifier=rec[
# 'https://schema.hbp.eu/myQuery/lastModificationUserId'],
#last_modified=rec['https://schema.hbp.eu/myQuery/last_modified'],
# make sure to strip any leading '/' to ensure a relative path
# TODO double-check windows FS semantics
name=iri[len(baseurl):].lstrip('/'),
)
md5sum = None
for hsh in rec.get('hash', {}):
if hsh.get('algorithm') == 'MD5':
md5sum = hsh.get('digest')
if md5sum:
props['md5sum'] = md5sum

size = rec.get('size', {}).get('value') \
if rec.get('size', {}).get('unit') == 'byte' else None
if size is not None:
props['size'] = size

content_type = rec.get('format', {}).get('fullName')
if content_type:
props['content_type'] = content_type

# TODO can we get a modification time?
# can we get the entity that last modified this file record
#last_modifier=rec[
# 'https://schema.hbp.eu/myQuery/lastModificationUserId'],
#last_modified=rec['https://schema.hbp.eu/myQuery/last_modified'],

return props

0 comments on commit 129a448

Please sign in to comment.