From d1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 8 Jan 2019 14:35:46 -0800 Subject: start updating importer auth with crossref importer --- python/fatcat_tools/api_auth.py | 6 +++--- python/fatcat_tools/importers/common.py | 32 ++++++++++++++++++++++--------- python/fatcat_tools/importers/crossref.py | 16 ++++++++++++++-- python/tests/fixtures.py | 5 ++++- python/tests/import_crossref.py | 16 ++++++++++++---- 5 files changed, 56 insertions(+), 19 deletions(-) (limited to 'python') diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py index b36d467c..c49051f6 100644 --- a/python/fatcat_tools/api_auth.py +++ b/python/fatcat_tools/api_auth.py @@ -1,5 +1,5 @@ -import sys +import os, sys import fatcat_client from fatcat_client.rest import ApiException @@ -23,7 +23,7 @@ def authenticated_api(host_uri, token=None): conf = fatcat_client.Configuration() conf.host = host_uri if not token: - token = sys.env['FATCAT_API_AUTH_TOKEN'] + token = os.environ['FATCAT_API_AUTH_TOKEN'] if not token: sys.stderr.write( 'This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n') @@ -34,7 +34,7 @@ def authenticated_api(host_uri, token=None): api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) # verify up front that auth is working - api.check_auth() + api.auth_check() return api diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 40c7abc0..5c33ebc9 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -4,6 +4,7 @@ import sys import csv import json import itertools +import subprocess from collections import Counter import pykafka @@ -37,19 +38,32 @@ class FatcatImporter: Base class for fatcat importers """ - def __init__(self, host_url, issn_map_file=None): - conf = fatcat_client.Configuration() - conf.host = host_url - self.api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) + def __init__(self, api, **kwargs): + + eg_extra = kwargs.get('editgroup_extra', dict()) + eg_extra['git_rev'] = eg_extra.get('git_rev', + subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8') + + self.api = api + self._editgroup_description = kwargs.get('editgroup_description') + self._editgroup_extra = kwargs.get('editgroup_extra') + issn_map_file = kwargs.get('issn_map_file') + self._issnl_id_map = dict() self._orcid_id_map = dict() self._doi_id_map = dict() - self._issn_issnl_map = None - self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$") if issn_map_file: self.read_issn_map_file(issn_map_file) + self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$") self.counts = Counter({'insert': 0, 'update': 0, 'processed_lines': 0}) + def _editgroup(self): + eg = fatcat_client.Editgroup( + description=self._editgroup_description, + extra=self._editgroup_extra, + ) + return self.api.create_editgroup(eg) + def describe_run(self): print("Processed {} lines, inserted {}, updated {}.".format( self.counts['processed_lines'], self.counts['insert'], self.counts['update'])) @@ -64,13 +78,13 @@ class FatcatImporter: def process_source(self, source, group_size=100): """Creates and auto-accepts editgroup every group_size rows""" - eg = self.api.create_editgroup(fatcat_client.Editgroup()) + eg = self._editgroup() i = 0 for i, row in enumerate(source): self.create_row(row, editgroup_id=eg.editgroup_id) if i > 0 and (i % group_size) == 0: self.api.accept_editgroup(eg.editgroup_id) - eg = self.api.create_editgroup(fatcat_client.Editgroup()) + eg = self._editgroup() self.counts['processed_lines'] += 1 if i == 0 or (i % group_size) != 0: self.api.accept_editgroup(eg.editgroup_id) @@ -81,7 +95,7 @@ class FatcatImporter: if decode_kafka: rows = [msg.value.decode('utf-8') for msg in rows] self.counts['processed_lines'] += len(rows) - eg = self.api.create_editgroup(fatcat_client.Editgroup()) + eg = self._editgroup() self.create_batch(rows, editgroup_id=eg.editgroup_id) def process_csv_source(self, source, group_size=100, delimiter=','): diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 05543590..4f7faf59 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -4,6 +4,7 @@ import json import sqlite3 import datetime import itertools +import subprocess import fatcat_client from .common import FatcatImporter @@ -40,8 +41,19 @@ class CrossrefImporter(FatcatImporter): See https://github.com/CrossRef/rest-api-doc for JSON schema notes """ - def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True, check_existing=True): - super().__init__(host_url, issn_map_file) + def __init__(self, api, issn_map_file, **kwargs): + + eg_desc = kwargs.get('editgroup_description', + "Automated import of Crossref DOI metadata, harvested from REST API") + eg_extra = kwargs.get('editgroup_extra', dict()) + eg_extra['agent'] = eg_extra.get('agent', 'CrossrefImporter') + super().__init__(api, + issn_map_file=issn_map_file, + editgroup_description=eg_desc, + editgroup_extra=eg_extra) + extid_map_file = kwargs.get('extid_map_file') + create_containers = kwargs.get('create_containers') + check_existing = kwargs.get('check_existing') self.extid_map_db = None if extid_map_file: db_uri = "file:{}?mode=ro".format(extid_map_file) diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index c415abef..6a880c48 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -4,12 +4,14 @@ import time import json import signal import pytest +from dotenv import load_dotenv import fatcat_web import fatcat_client @pytest.fixture def full_app(): + load_dotenv(dotenv_path="./env.example") fatcat_web.app.testing = True fatcat_web.app.debug = False return fatcat_web.app @@ -20,9 +22,10 @@ def app(full_app): @pytest.fixture def api(): + load_dotenv(dotenv_path="./env.example") conf = fatcat_client.Configuration() conf.host = "http://localhost:9411/v0" - conf.api_key["Authorization"] = "AgEPZGV2LmZhdGNhdC53aWtpAg4yMDE4LTEyLTMxLWRldgACJmVkaXRvcl9pZCA9IGFhYWFhYWFhYWFhYWJrdmthYWFhYWFhYWFlAAIeY3JlYXRlZCA9IDIwMTgtMTItMzFUMjE6MTU6NDdaAAAGIMWFZeZ54pH4OzNl5+U5X3p1H1rMioSuIldihuiM5XAw" + conf.api_key["Authorization"] = os.getenv("FATCAT_API_AUTH_TOKEN") conf.api_key_prefix["Authorization"] = "Bearer" api_client = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) return api_client diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index 1fb4a70f..3ef97719 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -2,17 +2,18 @@ import json import pytest from fatcat_tools.importers import CrossrefImporter +from fixtures import api @pytest.fixture(scope="function") -def crossref_importer(): +def crossref_importer(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3', check_existing=False) + yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=False) @pytest.fixture(scope="function") -def crossref_importer_existing(): +def crossref_importer_existing(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3', check_existing=True) + yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=True) def test_crossref_importer_batch(crossref_importer): with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: @@ -21,6 +22,13 @@ def test_crossref_importer_batch(crossref_importer): def test_crossref_importer(crossref_importer): with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: crossref_importer.process_source(f) + # fetch most recent editgroup + changes = crossref_importer.api.get_changelog(limit=1) + eg = changes[0].editgroup + assert eg.description + assert "crossref" in eg.description.lower() + assert eg.extra['git_rev'] + assert "CrossrefImporter" in eg.extra['agent'] def test_crossref_mappings(crossref_importer): assert crossref_importer.map_release_type('journal-article') == "article-journal" -- cgit v1.2.3