diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 11:32:41 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 11:32:41 -0800 | 
| commit | 279b22e30d9b590838268f5f5acdaa1110ee593a (patch) | |
| tree | c9965a089be1b8ef607573ea9261c0c378c0ab47 /python/fatcat_tools/importers/orcid.py | |
| parent | 7ebda2e051b51e49544ab75673b19ec5f27d9d45 (diff) | |
| download | fatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.tar.gz fatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.zip | |
shuffle around fatcat_tools layout
Diffstat (limited to 'python/fatcat_tools/importers/orcid.py')
| -rw-r--r-- | python/fatcat_tools/importers/orcid.py | 73 | 
1 files changed, 73 insertions, 0 deletions
| diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py new file mode 100644 index 00000000..350c4c57 --- /dev/null +++ b/python/fatcat_tools/importers/orcid.py @@ -0,0 +1,73 @@ + +import sys +import json +import itertools +import fatcat_client +from fatcat_tools.importers.common import FatcatImporter + +def value_or_none(e): +    if type(e) == dict: +        e = e.get('value') +    if type(e) == str and len(e) == 0: +        e = None +    # TODO: this is probably bogus; patched in desperation; remove? +    if e: +        try: +            e.encode() +        except UnicodeEncodeError: +            # Invalid JSON? +            print("BAD UNICODE") +            return None +    return e + +class FatcatOrcidImporter(FatcatImporter): + +    def parse_orcid_dict(self, obj): +        """ +        obj is a python dict (parsed from json). +        returns a CreatorEntity +        """ +        name = obj['person']['name'] +        if name is None: +            return None +        extra = None +        given = value_or_none(name.get('given-names')) +        sur = value_or_none(name.get('family-name')) +        display = value_or_none(name.get('credit-name')) +        if display is None: +            # TODO: sorry human beings +            if given and sur: +                display = "{} {}".format(given, sur) +            elif sur: +                display = sur +            elif given: +                display = given +            else: +                # must have *some* name +                return None +        orcid = obj['orcid-identifier']['path'] +        if not self.is_orcid(orcid): +            sys.stderr.write("Bad ORCID: {}\n".format(orcid)) +            return None +        ce = fatcat_client.CreatorEntity( +            orcid=orcid, +            given_name=given, +            surname=sur, +            display_name=display, +            extra=extra) +        return ce + +    def create_row(self, row, editgroup=None): +        obj = json.loads(row) +        ce = self.parse_orcid_dict(obj) +        if ce is not None: +            self.api.create_creator(ce, editgroup=editgroup) +            self.insert_count = self.insert_count + 1 + +    def create_batch(self, batch, editgroup=None): +        """Reads and processes in batches (not API-call-per-line)""" +        objects = [self.parse_orcid_dict(json.loads(l)) +                   for l in batch if l != None] +        objects = [o for o in objects if o != None] +        self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup) +        self.insert_count = self.insert_count + len(objects) | 
