diff options
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/cord19_fatcat_enrich.py (renamed from scripts/who_enrich.py) | 20 | ||||
| -rwxr-xr-x | scripts/deliver_file2disk.py | 10 | 
2 files changed, 7 insertions, 23 deletions
diff --git a/scripts/who_enrich.py b/scripts/cord19_fatcat_enrich.py index b445927..5d3a554 100755 --- a/scripts/who_enrich.py +++ b/scripts/cord19_fatcat_enrich.py @@ -1,17 +1,8 @@  #!/usr/bin/env python3  """ -This script takes a "Paper" MAG TSV file which has been joined with (at most) a -single "PaperExtendedAttributes", parses it into JSON, and does fatcat fetches -to "enrich" the output. Outputs a single JSON object per line with attributes: - -    mag_id -    mag_paper -    release_id -    fatcat_release - -Input columns: - +Takes a JSON-transformed CORD-19 *metadata* file and enriches it with fatcat +metadata.  """  import sys @@ -50,6 +41,9 @@ def do_line(row, args):      doi = row.get('doi') or None      fatcat_release = None +    if doi == '0.1126/science.abb7331': +        doi = '10.1126/science.abb7331' +      if not fatcat_release and pmcid:          resp = args.session.get('https://api.fatcat.wiki/v0/release/lookup',              params={ @@ -79,7 +73,7 @@ def do_line(row, args):              fatcat_release = resp.json()      obj = dict( -        who_paper=row, +        cord19_paper=row,      )      if fatcat_release:          obj['fatcat_release'] = fatcat_release @@ -96,7 +90,7 @@ def main():      parser = argparse.ArgumentParser(          formatter_class=argparse.ArgumentDefaultsHelpFormatter)      parser.add_argument('json_file', -        help="WHO/S2 parsed JSON file", +        help="CORD-19 parsed JSON file",          type=argparse.FileType('r'))      subparsers = parser.add_subparsers() diff --git a/scripts/deliver_file2disk.py b/scripts/deliver_file2disk.py index d661acc..9ec234a 100755 --- a/scripts/deliver_file2disk.py +++ b/scripts/deliver_file2disk.py @@ -10,16 +10,6 @@ Behavior:      - try downloading from any archive.org or web.archive.org URLs      - verify SHA-1      - write out to disk - -TODO: -x blob_path(sha1hex) -> returns relative/local path file would be saved to -x filter_files(files) -> list of files to try -- fetch_release(release) -> tries to download PDF bytes -- fetch_file(file) -> returns bytes of fetched file -- fetch_content(url) -> tries to download PDF bytes - -LATER: -- GRBOID XML as well, from minio?  """  # XXX: some broken MRO thing going on in here due to python3 object wrangling  | 
