diff options
Diffstat (limited to 'python/fatcat_tools/importers/orcid.py')
-rw-r--r-- | python/fatcat_tools/importers/orcid.py | 51 |
1 files changed, 30 insertions, 21 deletions
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 0c8b1d62..02c9bf00 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -3,7 +3,7 @@ import sys import json import itertools import fatcat_client -from .common import FatcatImporter +from .common import EntityImporter, clean def value_or_none(e): if type(e) == dict: @@ -20,7 +20,7 @@ def value_or_none(e): return None return e -class OrcidImporter(FatcatImporter): +class OrcidImporter(EntityImporter): def __init__(self, api, **kwargs): @@ -32,14 +32,16 @@ class OrcidImporter(FatcatImporter): editgroup_description=eg_desc, editgroup_extra=eg_extra) - def parse_orcid_dict(self, obj): + def want(self, raw_record): + return True + + def parse_record(self, obj): """ obj is a python dict (parsed from json). returns a CreatorEntity """ name = obj['person']['name'] - if name is None: - return None + assert name extra = None given = value_or_none(name.get('given-names')) sur = value_or_none(name.get('family-name')) @@ -61,23 +63,30 @@ class OrcidImporter(FatcatImporter): return None ce = fatcat_client.CreatorEntity( orcid=orcid, - given_name=given, - surname=sur, - display_name=display, + given_name=clean(given), + surname=clean(sur), + display_name=clean(display), extra=extra) return ce - def create_row(self, row, editgroup_id=None): - obj = json.loads(row) - ce = self.parse_orcid_dict(obj) - if ce is not None: - self.api.create_creator(ce, editgroup_id=editgroup_id) - self.counts['insert'] += 1 + def try_update(self, raw_record): + existing = None + try: + existing = self.api.lookup_creator(orcid=raw_record.orcid) + except fatcat_client.rest.ApiException as err: + if err.status != 404: + raise err + + # eventually we'll want to support "updates", but for now just skip if + # entity already exists + if existing: + self.counts['exists'] += 1 + return False + + return True - def create_batch(self, batch): - """Reads and processes in batches (not API-call-per-line)""" - objects = [self.parse_orcid_dict(json.loads(l)) - for l in batch if l != None] - objects = [o for o in objects if o != None] - self.api.create_creator_batch(objects, autoaccept="true") - self.counts['insert'] += len(objects) + def insert_batch(self, batch): + self.api.create_creator_batch(batch, + autoaccept=True, + description=self.editgroup_description, + extra=json.dumps(self.editgroup_extra)) |