diff options
-rwxr-xr-x | python/fatcat_export.py | 13 | ||||
-rwxr-xr-x | python/fatcat_harvest.py | 5 | ||||
-rwxr-xr-x | python/fatcat_import.py | 49 | ||||
-rw-r--r-- | python/fatcat_tools/__init__.py | 1 | ||||
-rw-r--r-- | python/fatcat_tools/api_auth.py | 40 | ||||
-rwxr-xr-x | python/fatcat_worker.py | 11 |
6 files changed, 96 insertions, 23 deletions
diff --git a/python/fatcat_export.py b/python/fatcat_export.py index 6a5395de..a59fcc0b 100755 --- a/python/fatcat_export.py +++ b/python/fatcat_export.py @@ -15,11 +15,9 @@ from fatcat_client.rest import ApiException from fatcat_client import ReleaseEntity from fatcat_tools import uuid2fcid, entity_from_json, release_to_elasticsearch -def run_export_releases(args): - conf = fatcat_client.Configuration() - conf.host = args.host_url - api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) +def run_export_releases(args): + api = args.api for line in args.ident_file: ident = uuid2fcid(line.split()[0]) release = api.get_release(id=ident, expand="all") @@ -35,10 +33,7 @@ def run_transform_releases(args): json.dumps(release_to_elasticsearch(release)) + '\n') def run_export_changelog(args): - conf = fatcat_client.Configuration() - conf.host = args.host_url - api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) - + api = args.api end = args.end if end is None: latest = api.get_changelog(limit=1)[0] @@ -92,6 +87,8 @@ def main(): if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) + + args.api = public_api(args.host_url) args.func(args) if __name__ == '__main__': diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py index 5f6f471b..e28c9b08 100755 --- a/python/fatcat_harvest.py +++ b/python/fatcat_harvest.py @@ -1,12 +1,17 @@ #!/usr/bin/env python3 import sys +import raven import argparse import datetime from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\ HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\ HarvestDoajJournalWorker +# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable +sentry_client = raven.Client() + + def run_crossref(args): worker = HarvestCrossrefWorker( kafka_hosts=args.kafka_hosts, diff --git a/python/fatcat_import.py b/python/fatcat_import.py index fe5b24a6..0e176b2c 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -1,14 +1,18 @@ #!/usr/bin/env python3 -import sys -import argparse +""" +""" + +import os, sys, argparse +from fatcat_tools import authenticated_api from fatcat_tools.importers import CrossrefImporter, OrcidImporter, \ IssnImporter, MatchedImporter, GrobidMetadataImporter, make_kafka_consumer def run_crossref(args): - fci = CrossrefImporter(args.host_url, args.issn_map_file, - args.extid_map_file, create_containers=(not args.no_create_containers), + fci = CrossrefImporter(args.api, args.issn_map_file, + extid_map_file=args.extid_map_file, + create_containers=(not args.no_create_containers), check_existing=(not args.no_release_updates)) if args.kafka_mode: consumer = make_kafka_consumer( @@ -19,23 +23,23 @@ def run_crossref(args): fci.describe_run() def run_orcid(args): - foi = OrcidImporter(args.host_url) + foi = OrcidImporter(args.api) foi.process_batch(args.json_file, size=args.batch_size) foi.describe_run() def run_issn(args): - fii = IssnImporter(args.host_url) + fii = IssnImporter(args.api) fii.process_csv_batch(args.csv_file, size=args.batch_size) fii.describe_run() def run_matched(args): - fmi = MatchedImporter(args.host_url, + fmi = MatchedImporter(args.api, skip_file_updates=args.no_file_updates) fmi.process_batch(args.json_file, size=args.batch_size) fmi.describe_run() def run_grobid_metadata(args): - fmi = GrobidMetadataImporter(args.host_url) + fmi = GrobidMetadataImporter(args.api) fmi.process_source(args.tsv_file, group_size=args.group_size) fmi.describe_run() @@ -56,7 +60,10 @@ def main(): subparsers = parser.add_subparsers() sub_crossref = subparsers.add_parser('crossref') - sub_crossref.set_defaults(func=run_crossref) + sub_crossref.set_defaults( + func=run_crossref, + auth_var="FATCAT_AUTH_WORKER_CROSSREF", + ) sub_crossref.add_argument('json_file', help="crossref JSON file to import from", default=sys.stdin, type=argparse.FileType('r')) @@ -80,7 +87,10 @@ def main(): help="don't lookup existing DOIs, just insert (only for bootstrap)") sub_orcid = subparsers.add_parser('orcid') - sub_orcid.set_defaults(func=run_orcid) + sub_orcid.set_defaults( + func=run_orcid, + auth_var="FATCAT_AUTH_WORKER_ORCID" + ) sub_orcid.add_argument('json_file', help="orcid JSON file to import from (or stdin)", default=sys.stdin, type=argparse.FileType('r')) @@ -89,7 +99,10 @@ def main(): default=50, type=int) sub_issn = subparsers.add_parser('issn') - sub_issn.set_defaults(func=run_issn) + sub_issn.set_defaults( + func=run_issn, + auth_var="FATCAT_AUTH_WORKER_ISSN", + ) sub_issn.add_argument('csv_file', help="Journal ISSN CSV metadata file to import from (or stdin)", default=sys.stdin, type=argparse.FileType('r')) @@ -98,7 +111,10 @@ def main(): default=50, type=int) sub_matched = subparsers.add_parser('matched') - sub_matched.set_defaults(func=run_matched) + sub_matched.set_defaults( + func=run_matched, + auth_var="FATCAT_AUTH_WORKER_MATCHED", + ) sub_matched.add_argument('json_file', help="JSON file to import from (or stdin)", default=sys.stdin, type=argparse.FileType('r')) @@ -110,7 +126,10 @@ def main(): default=50, type=int) sub_grobid_metadata = subparsers.add_parser('grobid-metadata') - sub_grobid_metadata.set_defaults(func=run_grobid_metadata) + sub_grobid_metadata.set_defaults( + func=run_grobid_metadata, + auth_var="FATCAT_AUTH_WORKER_GROBID_METADATA", + ) sub_grobid_metadata.add_argument('tsv_file', help="TSV file to import from (or stdin)", default=sys.stdin, type=argparse.FileType('r')) @@ -122,6 +141,10 @@ def main(): if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) + + args.api = authenticated_api( + args.host_url, + token=os.environ.get(args.auth_var)) args.func(args) if __name__ == '__main__': diff --git a/python/fatcat_tools/__init__.py b/python/fatcat_tools/__init__.py index 0bb42ab5..06e59c14 100644 --- a/python/fatcat_tools/__init__.py +++ b/python/fatcat_tools/__init__.py @@ -1,3 +1,4 @@ +from .api_auth import authenticated_api, public_api from .fcid import fcid2uuid, uuid2fcid from .transforms import entity_to_json, entity_from_json, release_to_elasticsearch diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py new file mode 100644 index 00000000..b36d467c --- /dev/null +++ b/python/fatcat_tools/api_auth.py @@ -0,0 +1,40 @@ + +import sys +import fatcat_client +from fatcat_client.rest import ApiException + + +def public_api(host_uri): + """ + Note: unlike the authenticated variant, this helper might get called even + if the API isn't going to be used, so it's important that it doesn't try to + actually connect to the API host or something. + """ + conf = fatcat_client.Configuration() + conf.host = host_uri + return fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) + +def authenticated_api(host_uri, token=None): + """ + Note: if this helper is called, it's implied that an actual API connection + is needed, so it does try to connect and verify credentials. + """ + + conf = fatcat_client.Configuration() + conf.host = host_uri + if not token: + token = sys.env['FATCAT_API_AUTH_TOKEN'] + if not token: + sys.stderr.write( + 'This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n') + sys.exit(-1) + + conf.api_key["Authorization"] = token + conf.api_key_prefix["Authorization"] = "Bearer" + api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) + + # verify up front that auth is working + api.check_auth() + + return api + diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index e0ac48d8..3c4cacc1 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -1,21 +1,26 @@ #!/usr/bin/env python3 import sys +import raven import argparse import datetime +from fatcat_tools import public_api from fatcat_tools.workers import ChangelogWorker, EntityUpdatesWorker, ElasticsearchReleaseWorker +# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable +sentry_client = raven.Client() + def run_changelog(args): topic = "fatcat-{}.changelog".format(args.env) - worker = ChangelogWorker(args.api_host_url, args.kafka_hosts, topic, + worker = ChangelogWorker(args.api, args.kafka_hosts, topic, args.poll_interval) worker.run() def run_entity_updates(args): changelog_topic = "fatcat-{}.changelog".format(args.env) release_topic = "fatcat-{}.release-updates".format(args.env) - worker = EntityUpdatesWorker(args.api_host_url, args.kafka_hosts, + worker = EntityUpdatesWorker(args.api, args.kafka_hosts, changelog_topic, release_topic) worker.run() @@ -64,6 +69,8 @@ def main(): if not args.__dict__.get("func"): print("tell me what to do!") sys.exit(-1) + + args.api = public_api(args.api_host_url) args.func(args) if __name__ == '__main__': |