From 826c7538e091fac14d987a3cd654975da964e240 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 27 Oct 2021 18:50:17 -0700 Subject: make fmt (black 21.9b0) --- python/scripts/unpaywall2ingestrequest.py | 63 +++++++++++++++---------------- 1 file changed, 31 insertions(+), 32 deletions(-) (limited to 'python/scripts/unpaywall2ingestrequest.py') diff --git a/python/scripts/unpaywall2ingestrequest.py b/python/scripts/unpaywall2ingestrequest.py index b79f316..ad5353b 100755 --- a/python/scripts/unpaywall2ingestrequest.py +++ b/python/scripts/unpaywall2ingestrequest.py @@ -11,7 +11,6 @@ import urlcanon DOMAIN_BLOCKLIST = [ # large OA publishers (we get via DOI) - # large repos and aggregators (we crawl directly) "://arxiv.org/", "://europepmc.org/", @@ -25,11 +24,11 @@ DOMAIN_BLOCKLIST = [ ] RELEASE_STAGE_MAP = { - 'draftVersion': 'draft', - 'submittedVersion': 'submitted', - 'acceptedVersion': 'accepted', - 'publishedVersion': 'published', - 'updatedVersion': 'updated', + "draftVersion": "draft", + "submittedVersion": "submitted", + "acceptedVersion": "accepted", + "publishedVersion": "published", + "updatedVersion": "updated", } @@ -45,44 +44,44 @@ def transform(obj): """ requests = [] - if not obj['doi'].startswith('10.'): + if not obj["doi"].startswith("10."): return requests - if not obj['oa_locations']: + if not obj["oa_locations"]: return requests - for location in obj['oa_locations']: - if not location['url_for_pdf']: + for location in obj["oa_locations"]: + if not location["url_for_pdf"]: continue skip = False for domain in DOMAIN_BLOCKLIST: - if domain in location['url_for_pdf']: + if domain in location["url_for_pdf"]: skip = True if skip: continue try: - base_url = canon(location['url_for_pdf']) + base_url = canon(location["url_for_pdf"]) except UnicodeEncodeError: continue request = { - 'base_url': base_url, - 'ingest_type': 'pdf', - 'link_source': 'unpaywall', - 'link_source_id': obj['doi'].lower(), - 'ingest_request_source': 'unpaywall', - 'release_stage': RELEASE_STAGE_MAP.get(location['version']), - 'rel': location['host_type'], - 'ext_ids': { - 'doi': obj['doi'].lower(), + "base_url": base_url, + "ingest_type": "pdf", + "link_source": "unpaywall", + "link_source_id": obj["doi"].lower(), + "ingest_request_source": "unpaywall", + "release_stage": RELEASE_STAGE_MAP.get(location["version"]), + "rel": location["host_type"], + "ext_ids": { + "doi": obj["doi"].lower(), }, - 'edit_extra': {}, + "edit_extra": {}, } - if obj.get('oa_status'): - request['edit_extra']['oa_status'] = obj['oa_status'] - if location.get('evidence'): - request['edit_extra']['evidence'] = location['evidence'] - if location['pmh_id']: - request['ext_ids']['pmh_id'] = location['pmh_id'] + if obj.get("oa_status"): + request["edit_extra"]["oa_status"] = obj["oa_status"] + if location.get("evidence"): + request["edit_extra"]["evidence"] = location["evidence"] + if location["pmh_id"]: + request["ext_ids"]["pmh_id"] = location["pmh_id"] requests.append(request) return requests @@ -101,9 +100,9 @@ def run(args): def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('json_file', - help="unpaywall dump file to use", - type=argparse.FileType('r')) + parser.add_argument( + "json_file", help="unpaywall dump file to use", type=argparse.FileType("r") + ) subparsers = parser.add_subparsers() args = parser.parse_args() @@ -111,5 +110,5 @@ def main(): run(args) -if __name__ == '__main__': +if __name__ == "__main__": main() -- cgit v1.2.3