summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/env.example2
-rwxr-xr-xpython/fatcat_import.py18
-rw-r--r--python/fatcat_tools/importers/__init__.py2
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py (renamed from python/fatcat_tools/importers/issn.py)10
-rw-r--r--python/tests/import_issn.py26
-rw-r--r--python/tests/import_journal_metadata.py26
6 files changed, 42 insertions, 42 deletions
diff --git a/python/env.example b/python/env.example
index 9896dc86..a171ac09 100644
--- a/python/env.example
+++ b/python/env.example
@@ -15,6 +15,6 @@ SENTRY_DSN=""
# FATCAT_API_AUTH_TOKEN
FATCAT_AUTH_WORKER_CROSSREF=""
FATCAT_AUTH_WORKER_ORCID=""
-FATCAT_AUTH_WORKER_ISSN=""
+FATCAT_AUTH_WORKER_JOURNAL_METADATA=""
FATCAT_AUTH_WORKER_MATCHED=""
FATCAT_AUTH_WORKER_GROBID_METADATA=""
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index 0e176b2c..ed12416c 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -6,7 +6,7 @@
import os, sys, argparse
from fatcat_tools import authenticated_api
from fatcat_tools.importers import CrossrefImporter, OrcidImporter, \
- IssnImporter, MatchedImporter, GrobidMetadataImporter, make_kafka_consumer
+ JournalMetadataImporter, MatchedImporter, GrobidMetadataImporter, make_kafka_consumer
def run_crossref(args):
@@ -27,8 +27,8 @@ def run_orcid(args):
foi.process_batch(args.json_file, size=args.batch_size)
foi.describe_run()
-def run_issn(args):
- fii = IssnImporter(args.api)
+def run_journal_metadata(args):
+ fii = JournalMetadataImporter(args.api)
fii.process_csv_batch(args.csv_file, size=args.batch_size)
fii.describe_run()
@@ -98,15 +98,15 @@ def main():
help="size of batch to send",
default=50, type=int)
- sub_issn = subparsers.add_parser('issn')
- sub_issn.set_defaults(
- func=run_issn,
- auth_var="FATCAT_AUTH_WORKER_ISSN",
+ sub_journal_metadata = subparsers.add_parser('journal-metadata')
+ sub_journal_metadata.set_defaults(
+ func=run_journal_metadata,
+ auth_var="FATCAT_AUTH_WORKER_JOURNAL_METADATA",
)
- sub_issn.add_argument('csv_file',
+ sub_journal_metadata.add_argument('csv_file',
help="Journal ISSN CSV metadata file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
- sub_issn.add_argument('--batch-size',
+ sub_journal_metadata.add_argument('--batch-size',
help="size of batch to send",
default=50, type=int)
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py
index e6f081e5..47fc1fd3 100644
--- a/python/fatcat_tools/importers/__init__.py
+++ b/python/fatcat_tools/importers/__init__.py
@@ -2,6 +2,6 @@
from .common import FatcatImporter, make_kafka_consumer
from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP
from .grobid_metadata import GrobidMetadataImporter
-from .issn import IssnImporter
+from .journal_metadata import JournalMetadataImporter
from .matched import MatchedImporter
from .orcid import OrcidImporter
diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/journal_metadata.py
index f4d525a4..859662ae 100644
--- a/python/fatcat_tools/importers/issn.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -25,7 +25,7 @@ def truthy(s):
else:
return None
-class IssnImporter(FatcatImporter):
+class JournalMetadataImporter(FatcatImporter):
"""
Imports journal metadata ("containers") by ISSN, currently from a custom
(data munged) .csv file format
@@ -40,12 +40,12 @@ class IssnImporter(FatcatImporter):
eg_desc = kwargs.get('editgroup_description',
"Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.")
eg_extra = kwargs.get('editgroup_extra', dict())
- eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IssnImporter')
+ eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.JournalMetadataImporter')
super().__init__(api,
editgroup_description=eg_desc,
editgroup_extra=eg_extra)
- def parse_issn_row(self, row):
+ def parse_journal_metadata_row(self, row):
"""
row is a python dict (parsed from CSV).
returns a ContainerEntity (or None if invalid or couldn't parse)
@@ -75,14 +75,14 @@ class IssnImporter(FatcatImporter):
return ce
def create_row(self, row, editgroup_id=None):
- ce = self.parse_issn_row(row)
+ ce = self.parse_journal_metadata_row(row)
if ce is not None:
self.api.create_container(ce, editgroup_id=editgroup_id)
self.counts['insert'] += 1
def create_batch(self, batch):
"""Reads and processes in batches (not API-call-per-line)"""
- objects = [self.parse_issn_row(l)
+ objects = [self.parse_journal_metadata_row(l)
for l in batch if (l is not None)]
objects = [o for o in objects if (o is not None)]
self.api.create_container_batch(objects, autoaccept="true")
diff --git a/python/tests/import_issn.py b/python/tests/import_issn.py
deleted file mode 100644
index 6b5978d9..00000000
--- a/python/tests/import_issn.py
+++ /dev/null
@@ -1,26 +0,0 @@
-
-import pytest
-from fatcat_tools.importers import IssnImporter
-from fixtures import api
-
-
-@pytest.fixture(scope="function")
-def issn_importer(api):
- yield IssnImporter(api)
-
-# TODO: use API to check that entities actually created...
-def test_issn_importer_batch(issn_importer):
- with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
- issn_importer.process_csv_batch(f)
-
-def test_issn_importer(issn_importer):
- with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
- issn_importer.process_csv_source(f)
-
- # fetch most recent editgroup
- changes = issn_importer.api.get_changelog(limit=1)
- eg = changes[0].editgroup
- assert eg.description
- assert "container" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.IssnImporter" in eg.extra['agent']
diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py
new file mode 100644
index 00000000..81334bc6
--- /dev/null
+++ b/python/tests/import_journal_metadata.py
@@ -0,0 +1,26 @@
+
+import pytest
+from fatcat_tools.importers import JournalMetadataImporter
+from fixtures import api
+
+
+@pytest.fixture(scope="function")
+def journal_metadata_importer(api):
+ yield JournalMetadataImporter(api)
+
+# TODO: use API to check that entities actually created...
+def test_journal_metadata_importer_batch(journal_metadata_importer):
+ with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
+ journal_metadata_importer.process_csv_batch(f)
+
+def test_journal_metadata_importer(journal_metadata_importer):
+ with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
+ journal_metadata_importer.process_csv_source(f)
+
+ # fetch most recent editgroup
+ changes = journal_metadata_importer.api.get_changelog(limit=1)
+ eg = changes[0].editgroup
+ assert eg.description
+ assert "container" in eg.description.lower()
+ assert eg.extra['git_rev']
+ assert "fatcat_tools.JournalMetadataImporter" in eg.extra['agent']