aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xpython/fatcat_export.py13
-rwxr-xr-xpython/fatcat_harvest.py5
-rwxr-xr-xpython/fatcat_import.py49
-rw-r--r--python/fatcat_tools/__init__.py1
-rw-r--r--python/fatcat_tools/api_auth.py40
-rwxr-xr-xpython/fatcat_worker.py11
6 files changed, 96 insertions, 23 deletions
diff --git a/python/fatcat_export.py b/python/fatcat_export.py
index 6a5395de..a59fcc0b 100755
--- a/python/fatcat_export.py
+++ b/python/fatcat_export.py
@@ -15,11 +15,9 @@ from fatcat_client.rest import ApiException
from fatcat_client import ReleaseEntity
from fatcat_tools import uuid2fcid, entity_from_json, release_to_elasticsearch
-def run_export_releases(args):
- conf = fatcat_client.Configuration()
- conf.host = args.host_url
- api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
+def run_export_releases(args):
+ api = args.api
for line in args.ident_file:
ident = uuid2fcid(line.split()[0])
release = api.get_release(id=ident, expand="all")
@@ -35,10 +33,7 @@ def run_transform_releases(args):
json.dumps(release_to_elasticsearch(release)) + '\n')
def run_export_changelog(args):
- conf = fatcat_client.Configuration()
- conf.host = args.host_url
- api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
-
+ api = args.api
end = args.end
if end is None:
latest = api.get_changelog(limit=1)[0]
@@ -92,6 +87,8 @@ def main():
if not args.__dict__.get("func"):
print("tell me what to do!")
sys.exit(-1)
+
+ args.api = public_api(args.host_url)
args.func(args)
if __name__ == '__main__':
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
index 5f6f471b..e28c9b08 100755
--- a/python/fatcat_harvest.py
+++ b/python/fatcat_harvest.py
@@ -1,12 +1,17 @@
#!/usr/bin/env python3
import sys
+import raven
import argparse
import datetime
from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\
HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\
HarvestDoajJournalWorker
+# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable
+sentry_client = raven.Client()
+
+
def run_crossref(args):
worker = HarvestCrossrefWorker(
kafka_hosts=args.kafka_hosts,
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index fe5b24a6..0e176b2c 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -1,14 +1,18 @@
#!/usr/bin/env python3
-import sys
-import argparse
+"""
+"""
+
+import os, sys, argparse
+from fatcat_tools import authenticated_api
from fatcat_tools.importers import CrossrefImporter, OrcidImporter, \
IssnImporter, MatchedImporter, GrobidMetadataImporter, make_kafka_consumer
def run_crossref(args):
- fci = CrossrefImporter(args.host_url, args.issn_map_file,
- args.extid_map_file, create_containers=(not args.no_create_containers),
+ fci = CrossrefImporter(args.api, args.issn_map_file,
+ extid_map_file=args.extid_map_file,
+ create_containers=(not args.no_create_containers),
check_existing=(not args.no_release_updates))
if args.kafka_mode:
consumer = make_kafka_consumer(
@@ -19,23 +23,23 @@ def run_crossref(args):
fci.describe_run()
def run_orcid(args):
- foi = OrcidImporter(args.host_url)
+ foi = OrcidImporter(args.api)
foi.process_batch(args.json_file, size=args.batch_size)
foi.describe_run()
def run_issn(args):
- fii = IssnImporter(args.host_url)
+ fii = IssnImporter(args.api)
fii.process_csv_batch(args.csv_file, size=args.batch_size)
fii.describe_run()
def run_matched(args):
- fmi = MatchedImporter(args.host_url,
+ fmi = MatchedImporter(args.api,
skip_file_updates=args.no_file_updates)
fmi.process_batch(args.json_file, size=args.batch_size)
fmi.describe_run()
def run_grobid_metadata(args):
- fmi = GrobidMetadataImporter(args.host_url)
+ fmi = GrobidMetadataImporter(args.api)
fmi.process_source(args.tsv_file, group_size=args.group_size)
fmi.describe_run()
@@ -56,7 +60,10 @@ def main():
subparsers = parser.add_subparsers()
sub_crossref = subparsers.add_parser('crossref')
- sub_crossref.set_defaults(func=run_crossref)
+ sub_crossref.set_defaults(
+ func=run_crossref,
+ auth_var="FATCAT_AUTH_WORKER_CROSSREF",
+ )
sub_crossref.add_argument('json_file',
help="crossref JSON file to import from",
default=sys.stdin, type=argparse.FileType('r'))
@@ -80,7 +87,10 @@ def main():
help="don't lookup existing DOIs, just insert (only for bootstrap)")
sub_orcid = subparsers.add_parser('orcid')
- sub_orcid.set_defaults(func=run_orcid)
+ sub_orcid.set_defaults(
+ func=run_orcid,
+ auth_var="FATCAT_AUTH_WORKER_ORCID"
+ )
sub_orcid.add_argument('json_file',
help="orcid JSON file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -89,7 +99,10 @@ def main():
default=50, type=int)
sub_issn = subparsers.add_parser('issn')
- sub_issn.set_defaults(func=run_issn)
+ sub_issn.set_defaults(
+ func=run_issn,
+ auth_var="FATCAT_AUTH_WORKER_ISSN",
+ )
sub_issn.add_argument('csv_file',
help="Journal ISSN CSV metadata file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -98,7 +111,10 @@ def main():
default=50, type=int)
sub_matched = subparsers.add_parser('matched')
- sub_matched.set_defaults(func=run_matched)
+ sub_matched.set_defaults(
+ func=run_matched,
+ auth_var="FATCAT_AUTH_WORKER_MATCHED",
+ )
sub_matched.add_argument('json_file',
help="JSON file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -110,7 +126,10 @@ def main():
default=50, type=int)
sub_grobid_metadata = subparsers.add_parser('grobid-metadata')
- sub_grobid_metadata.set_defaults(func=run_grobid_metadata)
+ sub_grobid_metadata.set_defaults(
+ func=run_grobid_metadata,
+ auth_var="FATCAT_AUTH_WORKER_GROBID_METADATA",
+ )
sub_grobid_metadata.add_argument('tsv_file',
help="TSV file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -122,6 +141,10 @@ def main():
if not args.__dict__.get("func"):
print("tell me what to do!")
sys.exit(-1)
+
+ args.api = authenticated_api(
+ args.host_url,
+ token=os.environ.get(args.auth_var))
args.func(args)
if __name__ == '__main__':
diff --git a/python/fatcat_tools/__init__.py b/python/fatcat_tools/__init__.py
index 0bb42ab5..06e59c14 100644
--- a/python/fatcat_tools/__init__.py
+++ b/python/fatcat_tools/__init__.py
@@ -1,3 +1,4 @@
+from .api_auth import authenticated_api, public_api
from .fcid import fcid2uuid, uuid2fcid
from .transforms import entity_to_json, entity_from_json, release_to_elasticsearch
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
new file mode 100644
index 00000000..b36d467c
--- /dev/null
+++ b/python/fatcat_tools/api_auth.py
@@ -0,0 +1,40 @@
+
+import sys
+import fatcat_client
+from fatcat_client.rest import ApiException
+
+
+def public_api(host_uri):
+ """
+ Note: unlike the authenticated variant, this helper might get called even
+ if the API isn't going to be used, so it's important that it doesn't try to
+ actually connect to the API host or something.
+ """
+ conf = fatcat_client.Configuration()
+ conf.host = host_uri
+ return fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
+
+def authenticated_api(host_uri, token=None):
+ """
+ Note: if this helper is called, it's implied that an actual API connection
+ is needed, so it does try to connect and verify credentials.
+ """
+
+ conf = fatcat_client.Configuration()
+ conf.host = host_uri
+ if not token:
+ token = sys.env['FATCAT_API_AUTH_TOKEN']
+ if not token:
+ sys.stderr.write(
+ 'This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n')
+ sys.exit(-1)
+
+ conf.api_key["Authorization"] = token
+ conf.api_key_prefix["Authorization"] = "Bearer"
+ api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
+
+ # verify up front that auth is working
+ api.check_auth()
+
+ return api
+
diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py
index e0ac48d8..3c4cacc1 100755
--- a/python/fatcat_worker.py
+++ b/python/fatcat_worker.py
@@ -1,21 +1,26 @@
#!/usr/bin/env python3
import sys
+import raven
import argparse
import datetime
+from fatcat_tools import public_api
from fatcat_tools.workers import ChangelogWorker, EntityUpdatesWorker, ElasticsearchReleaseWorker
+# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable
+sentry_client = raven.Client()
+
def run_changelog(args):
topic = "fatcat-{}.changelog".format(args.env)
- worker = ChangelogWorker(args.api_host_url, args.kafka_hosts, topic,
+ worker = ChangelogWorker(args.api, args.kafka_hosts, topic,
args.poll_interval)
worker.run()
def run_entity_updates(args):
changelog_topic = "fatcat-{}.changelog".format(args.env)
release_topic = "fatcat-{}.release-updates".format(args.env)
- worker = EntityUpdatesWorker(args.api_host_url, args.kafka_hosts,
+ worker = EntityUpdatesWorker(args.api, args.kafka_hosts,
changelog_topic, release_topic)
worker.run()
@@ -64,6 +69,8 @@ def main():
if not args.__dict__.get("func"):
print("tell me what to do!")
sys.exit(-1)
+
+ args.api = public_api(args.api_host_url)
args.func(args)
if __name__ == '__main__':