diff options
-rwxr-xr-x | python/fatcat_import.py | 10 | ||||
-rw-r--r-- | python/fatcat_tools/importers/common.py (renamed from python/fatcat_tools/importer_common.py) | 0 | ||||
-rw-r--r-- | python/fatcat_tools/importers/crossref.py (renamed from python/fatcat_tools/crossref_importer.py) | 2 | ||||
-rw-r--r--[-rwxr-xr-x] | python/fatcat_tools/importers/grobid_metadata.py (renamed from python/fatcat_tools/grobid_metadata_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/issn.py (renamed from python/fatcat_tools/issn_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/matched.py (renamed from python/fatcat_tools/matched_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/orcid.py (renamed from python/fatcat_tools/orcid_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/raw_api_client.py | 66 | ||||
-rw-r--r-- | python/fatcat_tools/workers/changelog.py (renamed from python/fatcat_tools/changelog_workers.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/workers/elastic.py (renamed from python/fatcat_tools/elastic_workers.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/workers/worker_common.py (renamed from python/fatcat_tools/worker_common.py) | 0 | ||||
-rwxr-xr-x | python/fatcat_worker.py | 4 | ||||
-rw-r--r-- | python/tests/import_crossref.py (renamed from python/tests/crossref.py) | 2 | ||||
-rw-r--r-- | python/tests/import_grobid_metadata.py (renamed from python/tests/grobid_metadata_importer_test.py) | 2 | ||||
-rw-r--r-- | python/tests/import_issn.py (renamed from python/tests/issn.py) | 2 | ||||
-rw-r--r-- | python/tests/import_matched.py (renamed from python/tests/matched_importer_test.py) | 2 | ||||
-rw-r--r-- | python/tests/import_orcid.py (renamed from python/tests/orcid.py) | 2 | ||||
-rw-r--r-- | python/tests/importer.py | 2 | ||||
-rw-r--r-- | python/tests/transform_tests.py | 4 |
19 files changed, 22 insertions, 88 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 6c66d98a..0ec0cfa8 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -2,11 +2,11 @@ import sys import argparse -from fatcat_tools.crossref_importer import FatcatCrossrefImporter -from fatcat_tools.orcid_importer import FatcatOrcidImporter -from fatcat_tools.issn_importer import FatcatIssnImporter -from fatcat_tools.matched_importer import FatcatMatchedImporter -from fatcat_tools.grobid_metadata_importer import FatcatGrobidMetadataImporter +from fatcat_tools.importers.crossref import FatcatCrossrefImporter +from fatcat_tools.importers.orcid import FatcatOrcidImporter +from fatcat_tools.importers.issn import FatcatIssnImporter +from fatcat_tools.importers.matched import FatcatMatchedImporter +from fatcat_tools.importers.grobid_metadata import FatcatGrobidMetadataImporter def run_import_crossref(args): fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file, diff --git a/python/fatcat_tools/importer_common.py b/python/fatcat_tools/importers/common.py index 8dfee875..8dfee875 100644 --- a/python/fatcat_tools/importer_common.py +++ b/python/fatcat_tools/importers/common.py diff --git a/python/fatcat_tools/crossref_importer.py b/python/fatcat_tools/importers/crossref.py index 6a5ad824..dddb58d1 100644 --- a/python/fatcat_tools/crossref_importer.py +++ b/python/fatcat_tools/importers/crossref.py @@ -5,7 +5,7 @@ import sqlite3 import datetime import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter class FatcatCrossrefImporter(FatcatImporter): diff --git a/python/fatcat_tools/grobid_metadata_importer.py b/python/fatcat_tools/importers/grobid_metadata.py index effa0d94..56b2ee02 100755..100644 --- a/python/fatcat_tools/grobid_metadata_importer.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -5,7 +5,7 @@ import json import base64 import datetime import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter MAX_ABSTRACT_BYTES=4096 diff --git a/python/fatcat_tools/issn_importer.py b/python/fatcat_tools/importers/issn.py index e3ed7382..d7fb9082 100644 --- a/python/fatcat_tools/issn_importer.py +++ b/python/fatcat_tools/importers/issn.py @@ -3,7 +3,7 @@ import sys import json import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter # CSV format (generated from git.archive.org/webgroup/oa-journal-analysis): # ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count diff --git a/python/fatcat_tools/matched_importer.py b/python/fatcat_tools/importers/matched.py index 627ab6f1..6270fe88 100644 --- a/python/fatcat_tools/matched_importer.py +++ b/python/fatcat_tools/importers/matched.py @@ -4,7 +4,7 @@ import json import sqlite3 import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter #row = row.split('\t') #assert len(row) == 2 diff --git a/python/fatcat_tools/orcid_importer.py b/python/fatcat_tools/importers/orcid.py index f2366c66..350c4c57 100644 --- a/python/fatcat_tools/orcid_importer.py +++ b/python/fatcat_tools/importers/orcid.py @@ -3,7 +3,7 @@ import sys import json import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter def value_or_none(e): if type(e) == dict: diff --git a/python/fatcat_tools/raw_api_client.py b/python/fatcat_tools/raw_api_client.py deleted file mode 100644 index 75151ebb..00000000 --- a/python/fatcat_tools/raw_api_client.py +++ /dev/null @@ -1,66 +0,0 @@ - -import sys -import json -import requests - - -class RawFatcatApiClient: - - def __init__(self, host_url): - self.host_url = host_url - self.session = requests.Session() - self._issn_map = dict() - - def get(self, path, data=None): - headers = {"content-type": "application/json"} - return self.session.get(self.host_url + path, json=data, - headers=headers) - - def post(self, path, data=None): - headers = {"content-type": "application/json"} - return self.session.post(self.host_url + path, json=data, - headers=headers) - - def new_editgroup(self): - rv = self.post('/v0/editgroup', data=dict( - editor_id=1)) - print(rv) - print(rv.json()) - assert rv.status_code == 201 - editgroup_id = rv.json()['id'] - return editgroup_id - - def accept_editgroup(self, eg): - rv = self.post('/v0/editgroup/{}/accept'.format(eg)) - assert rv.status_code == 200 - return rv - - def import_issn_file(self, json_file, create_containers=False, batchsize=100): - eg = self.new_editgroup() - i = 0 - with open(json_file, 'r') as file: - for line in file: - if i % batchsize == 0: - sys.stdout.write('\n{}: '.format(i)) - if (i+1) % 20 == 0: - sys.stdout.write('.') - i = i + 1 - obj = json.loads(line) - if not ("author" in obj and "title" in obj): - continue - try: - self.import_crossref_dict(obj, editgroup=eg, - create_containers=create_containers) - except Exception as e: - print("ERROR: {}".format(e)) - if i % batchsize == 0: - self.accept_editgroup(eg) - eg = self.new_editgroup() - if i % batchsize != 0: - self.accept_editgroup(eg) - print("done!") - - def health(self): - rv = self.get("/health") - assert rv.status_code == 200 - return rv.json() diff --git a/python/fatcat_tools/changelog_workers.py b/python/fatcat_tools/workers/changelog.py index 223d4478..92bb8bdd 100644 --- a/python/fatcat_tools/changelog_workers.py +++ b/python/fatcat_tools/workers/changelog.py @@ -2,7 +2,7 @@ import json import time from itertools import islice -from fatcat_tools.worker_common import FatcatWorker +from fatcat_tools.workers.worker_common import FatcatWorker from pykafka.common import OffsetType diff --git a/python/fatcat_tools/elastic_workers.py b/python/fatcat_tools/workers/elastic.py index eac8d6b0..46632792 100644 --- a/python/fatcat_tools/elastic_workers.py +++ b/python/fatcat_tools/workers/elastic.py @@ -2,7 +2,7 @@ import json import time import requests -from fatcat_tools.worker_common import FatcatWorker +from fatcat_tools.workers.worker_common import FatcatWorker from fatcat_client.models import ReleaseEntity from fatcat_tools.transforms import * from pykafka.common import OffsetType diff --git a/python/fatcat_tools/worker_common.py b/python/fatcat_tools/workers/worker_common.py index 77ea2c15..77ea2c15 100644 --- a/python/fatcat_tools/worker_common.py +++ b/python/fatcat_tools/workers/worker_common.py diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index 5d3731f6..0bc9963c 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -2,8 +2,8 @@ import sys import argparse -from fatcat_tools.changelog_workers import FatcatChangelogWorker, FatcatEntityUpdatesWorker -from fatcat_tools.elastic_workers import FatcatElasticReleaseWorker +from fatcat_tools.workers.changelog import FatcatChangelogWorker, FatcatEntityUpdatesWorker +from fatcat_tools.workers.elastic import FatcatElasticReleaseWorker def run_changelog_worker(args): topic = "fatcat-{}.changelog".format(args.env) diff --git a/python/tests/crossref.py b/python/tests/import_crossref.py index 04ac5e8e..ab33d0fc 100644 --- a/python/tests/crossref.py +++ b/python/tests/import_crossref.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.importers.crossref import FatcatCrossrefImporter @pytest.fixture(scope="function") diff --git a/python/tests/grobid_metadata_importer_test.py b/python/tests/import_grobid_metadata.py index 502ca74a..8b268e21 100644 --- a/python/tests/grobid_metadata_importer_test.py +++ b/python/tests/import_grobid_metadata.py @@ -3,7 +3,7 @@ import os import json import base64 import pytest -from fatcat_tools.grobid_metadata_importer import FatcatGrobidMetadataImporter +from fatcat_tools.importers.grobid_metadata import FatcatGrobidMetadataImporter """ WARNING: these tests are currently very fragile because they have database diff --git a/python/tests/issn.py b/python/tests/import_issn.py index 76c8aecb..f45747ed 100644 --- a/python/tests/issn.py +++ b/python/tests/import_issn.py @@ -1,6 +1,6 @@ import pytest -from fatcat_tools.issn_importer import FatcatIssnImporter +from fatcat_tools.importers.issn import FatcatIssnImporter @pytest.fixture(scope="function") diff --git a/python/tests/matched_importer_test.py b/python/tests/import_matched.py index 4042eabb..8004e3bd 100644 --- a/python/tests/matched_importer_test.py +++ b/python/tests/import_matched.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.matched_importer import FatcatMatchedImporter +from fatcat_tools.importers.matched import FatcatMatchedImporter @pytest.fixture(scope="function") diff --git a/python/tests/orcid.py b/python/tests/import_orcid.py index f8228cc0..2dc98d76 100644 --- a/python/tests/orcid.py +++ b/python/tests/import_orcid.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.orcid_importer import FatcatOrcidImporter +from fatcat_tools.importers.orcid import FatcatOrcidImporter @pytest.fixture(scope="function") diff --git a/python/tests/importer.py b/python/tests/importer.py index 0de86635..d98638e4 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,7 +1,7 @@ import pytest -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter def test_issnl_mapping_lookup(): diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py index 669c2526..52a9965a 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_tests.py @@ -1,10 +1,10 @@ import json import pytest -from fatcat_tools.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.importers.crossref import FatcatCrossrefImporter from fatcat_tools.transforms import * -from crossref import crossref_importer +from import_crossref import crossref_importer def test_elastic_convert(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: |