summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/journal_metadata.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/journal_metadata.py')
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py49
1 files changed, 33 insertions, 16 deletions
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index cd058889..ff38cd77 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -3,7 +3,7 @@ import sys
import json
import itertools
import fatcat_client
-from .common import FatcatImporter
+from .common import EntityImporter
def or_none(s):
@@ -25,7 +25,7 @@ def truthy(s):
else:
return None
-class JournalMetadataImporter(FatcatImporter):
+class JournalMetadataImporter(EntityImporter):
"""
Imports journal metadata ("containers") by ISSN, currently from a custom
(data munged) .csv file format
@@ -45,7 +45,12 @@ class JournalMetadataImporter(FatcatImporter):
editgroup_description=eg_desc,
editgroup_extra=eg_extra)
- def parse_journal_metadata_row(self, row):
+ def want(self, raw_record):
+ if raw_record.get('ISSN-L'):
+ return True
+ return False
+
+ def parse_record(self, row):
"""
row is a python dict (parsed from CSV).
returns a ContainerEntity (or None if invalid or couldn't parse)
@@ -72,16 +77,28 @@ class JournalMetadataImporter(FatcatImporter):
extra=extra)
return ce
- def create_row(self, row, editgroup_id=None):
- ce = self.parse_journal_metadata_row(row)
- if ce is not None:
- self.api.create_container(ce, editgroup_id=editgroup_id)
- self.counts['insert'] += 1
-
- def create_batch(self, batch):
- """Reads and processes in batches (not API-call-per-line)"""
- objects = [self.parse_journal_metadata_row(l)
- for l in batch if (l is not None)]
- objects = [o for o in objects if (o is not None)]
- self.api.create_container_batch(objects, autoaccept=True)
- self.counts['insert'] += len(objects)
+ def try_update(self, ce):
+
+ existing = None
+ try:
+ existing = self.api.lookup_container(issnl=ce.issnl)
+ except fatcat_client.rest.ApiException as err:
+ if err.status != 404:
+ raise err
+ # doesn't exist, need to update
+ return True
+
+ # eventually we'll want to support "updates", but for now just skip if
+ # entity already exists
+ if existing:
+ self.counts['exists'] += 1
+ return False
+
+ return True
+
+ def insert_batch(self, batch):
+ self.api.create_container_batch(batch,
+ autoaccept=True,
+ description=self.editgroup_description,
+ extra=json.dumps(self.editgroup_extra))
+