diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-08 14:35:46 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-08 14:35:46 -0800 | 
| commit | d1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e (patch) | |
| tree | 7e84e4848ecbe2d2c5e013c2f16eb19ada634b13 | |
| parent | fc74ae5843d78fd072fbdce483db4608577a4794 (diff) | |
| download | fatcat-d1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e.tar.gz fatcat-d1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e.zip | |
start updating importer auth with crossref importer
| -rw-r--r-- | python/fatcat_tools/api_auth.py | 6 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 32 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 16 | ||||
| -rw-r--r-- | python/tests/fixtures.py | 5 | ||||
| -rw-r--r-- | python/tests/import_crossref.py | 16 | 
5 files changed, 56 insertions, 19 deletions
| diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py index b36d467c..c49051f6 100644 --- a/python/fatcat_tools/api_auth.py +++ b/python/fatcat_tools/api_auth.py @@ -1,5 +1,5 @@ -import sys +import os, sys  import fatcat_client  from fatcat_client.rest import ApiException @@ -23,7 +23,7 @@ def authenticated_api(host_uri, token=None):      conf = fatcat_client.Configuration()      conf.host = host_uri      if not token: -        token = sys.env['FATCAT_API_AUTH_TOKEN'] +        token = os.environ['FATCAT_API_AUTH_TOKEN']      if not token:          sys.stderr.write(              'This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n') @@ -34,7 +34,7 @@ def authenticated_api(host_uri, token=None):      api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))      # verify up front that auth is working -    api.check_auth() +    api.auth_check()      return api diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 40c7abc0..5c33ebc9 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -4,6 +4,7 @@ import sys  import csv  import json  import itertools +import subprocess  from collections import Counter  import pykafka @@ -37,19 +38,32 @@ class FatcatImporter:      Base class for fatcat importers      """ -    def __init__(self, host_url, issn_map_file=None): -        conf = fatcat_client.Configuration() -        conf.host = host_url -        self.api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) +    def __init__(self, api, **kwargs): + +        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra['git_rev'] = eg_extra.get('git_rev', +            subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8') +         +        self.api = api +        self._editgroup_description = kwargs.get('editgroup_description') +        self._editgroup_extra = kwargs.get('editgroup_extra') +        issn_map_file = kwargs.get('issn_map_file') +          self._issnl_id_map = dict()          self._orcid_id_map = dict()          self._doi_id_map = dict() -        self._issn_issnl_map = None -        self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")          if issn_map_file:              self.read_issn_map_file(issn_map_file) +        self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")          self.counts = Counter({'insert': 0, 'update': 0, 'processed_lines': 0}) +    def _editgroup(self): +        eg = fatcat_client.Editgroup( +            description=self._editgroup_description, +            extra=self._editgroup_extra, +        ) +        return self.api.create_editgroup(eg) +      def describe_run(self):          print("Processed {} lines, inserted {}, updated {}.".format(              self.counts['processed_lines'], self.counts['insert'], self.counts['update'])) @@ -64,13 +78,13 @@ class FatcatImporter:      def process_source(self, source, group_size=100):          """Creates and auto-accepts editgroup every group_size rows""" -        eg = self.api.create_editgroup(fatcat_client.Editgroup()) +        eg = self._editgroup()          i = 0          for i, row in enumerate(source):              self.create_row(row, editgroup_id=eg.editgroup_id)              if i > 0 and (i % group_size) == 0:                  self.api.accept_editgroup(eg.editgroup_id) -                eg = self.api.create_editgroup(fatcat_client.Editgroup()) +                eg = self._editgroup()              self.counts['processed_lines'] += 1          if i == 0 or (i % group_size) != 0:              self.api.accept_editgroup(eg.editgroup_id) @@ -81,7 +95,7 @@ class FatcatImporter:              if decode_kafka:                  rows = [msg.value.decode('utf-8') for msg in rows]              self.counts['processed_lines'] += len(rows) -            eg = self.api.create_editgroup(fatcat_client.Editgroup()) +            eg = self._editgroup()              self.create_batch(rows, editgroup_id=eg.editgroup_id)      def process_csv_source(self, source, group_size=100, delimiter=','): diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 05543590..4f7faf59 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -4,6 +4,7 @@ import json  import sqlite3  import datetime  import itertools +import subprocess  import fatcat_client  from .common import FatcatImporter @@ -40,8 +41,19 @@ class CrossrefImporter(FatcatImporter):      See https://github.com/CrossRef/rest-api-doc for JSON schema notes      """ -    def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True, check_existing=True): -        super().__init__(host_url, issn_map_file) +    def __init__(self, api, issn_map_file, **kwargs): + +        eg_desc = kwargs.get('editgroup_description', +            "Automated import of Crossref DOI metadata, harvested from REST API") +        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra['agent'] = eg_extra.get('agent', 'CrossrefImporter') +        super().__init__(api, +            issn_map_file=issn_map_file, +            editgroup_description=eg_desc, +            editgroup_extra=eg_extra) +        extid_map_file = kwargs.get('extid_map_file') +        create_containers = kwargs.get('create_containers') +        check_existing = kwargs.get('check_existing')          self.extid_map_db = None          if extid_map_file:              db_uri = "file:{}?mode=ro".format(extid_map_file) diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index c415abef..6a880c48 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -4,12 +4,14 @@ import time  import json  import signal  import pytest +from dotenv import load_dotenv  import fatcat_web  import fatcat_client  @pytest.fixture  def full_app(): +    load_dotenv(dotenv_path="./env.example")      fatcat_web.app.testing = True      fatcat_web.app.debug = False      return fatcat_web.app @@ -20,9 +22,10 @@ def app(full_app):  @pytest.fixture  def api(): +    load_dotenv(dotenv_path="./env.example")      conf = fatcat_client.Configuration()      conf.host = "http://localhost:9411/v0" -    conf.api_key["Authorization"] = "AgEPZGV2LmZhdGNhdC53aWtpAg4yMDE4LTEyLTMxLWRldgACJmVkaXRvcl9pZCA9IGFhYWFhYWFhYWFhYWJrdmthYWFhYWFhYWFlAAIeY3JlYXRlZCA9IDIwMTgtMTItMzFUMjE6MTU6NDdaAAAGIMWFZeZ54pH4OzNl5+U5X3p1H1rMioSuIldihuiM5XAw" +    conf.api_key["Authorization"] = os.getenv("FATCAT_API_AUTH_TOKEN")      conf.api_key_prefix["Authorization"] = "Bearer"      api_client = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))      return api_client diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index 1fb4a70f..3ef97719 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -2,17 +2,18 @@  import json  import pytest  from fatcat_tools.importers import CrossrefImporter +from fixtures import api  @pytest.fixture(scope="function") -def crossref_importer(): +def crossref_importer(api):      with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: -        yield CrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3', check_existing=False) +        yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=False)  @pytest.fixture(scope="function") -def crossref_importer_existing(): +def crossref_importer_existing(api):      with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: -        yield CrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3', check_existing=True) +        yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=True)  def test_crossref_importer_batch(crossref_importer):      with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: @@ -21,6 +22,13 @@ def test_crossref_importer_batch(crossref_importer):  def test_crossref_importer(crossref_importer):      with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:          crossref_importer.process_source(f) +    # fetch most recent editgroup +    changes = crossref_importer.api.get_changelog(limit=1) +    eg = changes[0].editgroup +    assert eg.description +    assert "crossref" in eg.description.lower() +    assert eg.extra['git_rev'] +    assert "CrossrefImporter" in eg.extra['agent']  def test_crossref_mappings(crossref_importer):      assert crossref_importer.map_release_type('journal-article') == "article-journal" | 
