summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-08 14:35:46 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-08 14:35:46 -0800
commitd1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e (patch)
tree7e84e4848ecbe2d2c5e013c2f16eb19ada634b13
parentfc74ae5843d78fd072fbdce483db4608577a4794 (diff)
downloadfatcat-d1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e.tar.gz
fatcat-d1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e.zip
start updating importer auth with crossref importer
-rw-r--r--python/fatcat_tools/api_auth.py6
-rw-r--r--python/fatcat_tools/importers/common.py32
-rw-r--r--python/fatcat_tools/importers/crossref.py16
-rw-r--r--python/tests/fixtures.py5
-rw-r--r--python/tests/import_crossref.py16
5 files changed, 56 insertions, 19 deletions
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
index b36d467c..c49051f6 100644
--- a/python/fatcat_tools/api_auth.py
+++ b/python/fatcat_tools/api_auth.py
@@ -1,5 +1,5 @@
-import sys
+import os, sys
import fatcat_client
from fatcat_client.rest import ApiException
@@ -23,7 +23,7 @@ def authenticated_api(host_uri, token=None):
conf = fatcat_client.Configuration()
conf.host = host_uri
if not token:
- token = sys.env['FATCAT_API_AUTH_TOKEN']
+ token = os.environ['FATCAT_API_AUTH_TOKEN']
if not token:
sys.stderr.write(
'This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n')
@@ -34,7 +34,7 @@ def authenticated_api(host_uri, token=None):
api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
# verify up front that auth is working
- api.check_auth()
+ api.auth_check()
return api
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 40c7abc0..5c33ebc9 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -4,6 +4,7 @@ import sys
import csv
import json
import itertools
+import subprocess
from collections import Counter
import pykafka
@@ -37,19 +38,32 @@ class FatcatImporter:
Base class for fatcat importers
"""
- def __init__(self, host_url, issn_map_file=None):
- conf = fatcat_client.Configuration()
- conf.host = host_url
- self.api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
+ def __init__(self, api, **kwargs):
+
+ eg_extra = kwargs.get('editgroup_extra', dict())
+ eg_extra['git_rev'] = eg_extra.get('git_rev',
+ subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8')
+
+ self.api = api
+ self._editgroup_description = kwargs.get('editgroup_description')
+ self._editgroup_extra = kwargs.get('editgroup_extra')
+ issn_map_file = kwargs.get('issn_map_file')
+
self._issnl_id_map = dict()
self._orcid_id_map = dict()
self._doi_id_map = dict()
- self._issn_issnl_map = None
- self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")
if issn_map_file:
self.read_issn_map_file(issn_map_file)
+ self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")
self.counts = Counter({'insert': 0, 'update': 0, 'processed_lines': 0})
+ def _editgroup(self):
+ eg = fatcat_client.Editgroup(
+ description=self._editgroup_description,
+ extra=self._editgroup_extra,
+ )
+ return self.api.create_editgroup(eg)
+
def describe_run(self):
print("Processed {} lines, inserted {}, updated {}.".format(
self.counts['processed_lines'], self.counts['insert'], self.counts['update']))
@@ -64,13 +78,13 @@ class FatcatImporter:
def process_source(self, source, group_size=100):
"""Creates and auto-accepts editgroup every group_size rows"""
- eg = self.api.create_editgroup(fatcat_client.Editgroup())
+ eg = self._editgroup()
i = 0
for i, row in enumerate(source):
self.create_row(row, editgroup_id=eg.editgroup_id)
if i > 0 and (i % group_size) == 0:
self.api.accept_editgroup(eg.editgroup_id)
- eg = self.api.create_editgroup(fatcat_client.Editgroup())
+ eg = self._editgroup()
self.counts['processed_lines'] += 1
if i == 0 or (i % group_size) != 0:
self.api.accept_editgroup(eg.editgroup_id)
@@ -81,7 +95,7 @@ class FatcatImporter:
if decode_kafka:
rows = [msg.value.decode('utf-8') for msg in rows]
self.counts['processed_lines'] += len(rows)
- eg = self.api.create_editgroup(fatcat_client.Editgroup())
+ eg = self._editgroup()
self.create_batch(rows, editgroup_id=eg.editgroup_id)
def process_csv_source(self, source, group_size=100, delimiter=','):
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 05543590..4f7faf59 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -4,6 +4,7 @@ import json
import sqlite3
import datetime
import itertools
+import subprocess
import fatcat_client
from .common import FatcatImporter
@@ -40,8 +41,19 @@ class CrossrefImporter(FatcatImporter):
See https://github.com/CrossRef/rest-api-doc for JSON schema notes
"""
- def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True, check_existing=True):
- super().__init__(host_url, issn_map_file)
+ def __init__(self, api, issn_map_file, **kwargs):
+
+ eg_desc = kwargs.get('editgroup_description',
+ "Automated import of Crossref DOI metadata, harvested from REST API")
+ eg_extra = kwargs.get('editgroup_extra', dict())
+ eg_extra['agent'] = eg_extra.get('agent', 'CrossrefImporter')
+ super().__init__(api,
+ issn_map_file=issn_map_file,
+ editgroup_description=eg_desc,
+ editgroup_extra=eg_extra)
+ extid_map_file = kwargs.get('extid_map_file')
+ create_containers = kwargs.get('create_containers')
+ check_existing = kwargs.get('check_existing')
self.extid_map_db = None
if extid_map_file:
db_uri = "file:{}?mode=ro".format(extid_map_file)
diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py
index c415abef..6a880c48 100644
--- a/python/tests/fixtures.py
+++ b/python/tests/fixtures.py
@@ -4,12 +4,14 @@ import time
import json
import signal
import pytest
+from dotenv import load_dotenv
import fatcat_web
import fatcat_client
@pytest.fixture
def full_app():
+ load_dotenv(dotenv_path="./env.example")
fatcat_web.app.testing = True
fatcat_web.app.debug = False
return fatcat_web.app
@@ -20,9 +22,10 @@ def app(full_app):
@pytest.fixture
def api():
+ load_dotenv(dotenv_path="./env.example")
conf = fatcat_client.Configuration()
conf.host = "http://localhost:9411/v0"
- conf.api_key["Authorization"] = "AgEPZGV2LmZhdGNhdC53aWtpAg4yMDE4LTEyLTMxLWRldgACJmVkaXRvcl9pZCA9IGFhYWFhYWFhYWFhYWJrdmthYWFhYWFhYWFlAAIeY3JlYXRlZCA9IDIwMTgtMTItMzFUMjE6MTU6NDdaAAAGIMWFZeZ54pH4OzNl5+U5X3p1H1rMioSuIldihuiM5XAw"
+ conf.api_key["Authorization"] = os.getenv("FATCAT_API_AUTH_TOKEN")
conf.api_key_prefix["Authorization"] = "Bearer"
api_client = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
return api_client
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index 1fb4a70f..3ef97719 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -2,17 +2,18 @@
import json
import pytest
from fatcat_tools.importers import CrossrefImporter
+from fixtures import api
@pytest.fixture(scope="function")
-def crossref_importer():
+def crossref_importer(api):
with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield CrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3', check_existing=False)
+ yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=False)
@pytest.fixture(scope="function")
-def crossref_importer_existing():
+def crossref_importer_existing(api):
with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield CrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3', check_existing=True)
+ yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=True)
def test_crossref_importer_batch(crossref_importer):
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
@@ -21,6 +22,13 @@ def test_crossref_importer_batch(crossref_importer):
def test_crossref_importer(crossref_importer):
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
crossref_importer.process_source(f)
+ # fetch most recent editgroup
+ changes = crossref_importer.api.get_changelog(limit=1)
+ eg = changes[0].editgroup
+ assert eg.description
+ assert "crossref" in eg.description.lower()
+ assert eg.extra['git_rev']
+ assert "CrossrefImporter" in eg.extra['agent']
def test_crossref_mappings(crossref_importer):
assert crossref_importer.map_release_type('journal-article') == "article-journal"