diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-07-10 18:29:31 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-07-10 18:29:31 +0200 |
commit | d2bcd77f73c6496a2ffdd865d2348f33f4fb17f1 (patch) | |
tree | dfed92c605b0a0fb5c6ad4cc2e6dbbf6e67bca75 /python/fatcat_tools/importers | |
parent | 2411bad315b48b99c19958ea3c393dc4d09d6486 (diff) | |
download | fatcat-d2bcd77f73c6496a2ffdd865d2348f33f4fb17f1.tar.gz fatcat-d2bcd77f73c6496a2ffdd865d2348f33f4fb17f1.zip |
datacite: there should be no index gaps
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 797ccf19..962d80c6 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -758,9 +758,10 @@ class DataciteImporter(EntityImporter): # Names, that should be ignored right away. name_blacklist = set(('Occdownload Gbif.Org',)) - for i, c in enumerate(creators): + i = 0 + for c in creators: if not set_index: - i = None + i = None nameType = c.get('nameType', '') or '' if nameType in ('', 'Personal'): creator_id = None @@ -838,8 +839,11 @@ class DataciteImporter(EntityImporter): raw_affiliation=raw_affiliation, extra=extra, ) + # Filter out duplicates early. if not contributor_list_contains_contributor(contribs, rc): contribs.append(rc) + if i is not None: + i += 1 elif nameType == 'Organizational': name = c.get('name', '') or '' if name in UNKNOWN_MARKERS: @@ -849,6 +853,8 @@ class DataciteImporter(EntityImporter): extra = {'organization': name} contribs.append(fatcat_openapi_client.ReleaseContrib( index=i, extra=extra)) + if i is not None: + i += 1 else: print('[{}] unknown name type: {}'.format(doi, nameType), file=sys.stderr) |