From d2bcd77f73c6496a2ffdd865d2348f33f4fb17f1 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 10 Jul 2020 18:29:31 +0200 Subject: datacite: there should be no index gaps --- python/fatcat_tools/importers/datacite.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 797ccf19..962d80c6 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -758,9 +758,10 @@ class DataciteImporter(EntityImporter): # Names, that should be ignored right away. name_blacklist = set(('Occdownload Gbif.Org',)) - for i, c in enumerate(creators): + i = 0 + for c in creators: if not set_index: - i = None + i = None nameType = c.get('nameType', '') or '' if nameType in ('', 'Personal'): creator_id = None @@ -838,8 +839,11 @@ class DataciteImporter(EntityImporter): raw_affiliation=raw_affiliation, extra=extra, ) + # Filter out duplicates early. if not contributor_list_contains_contributor(contribs, rc): contribs.append(rc) + if i is not None: + i += 1 elif nameType == 'Organizational': name = c.get('name', '') or '' if name in UNKNOWN_MARKERS: @@ -849,6 +853,8 @@ class DataciteImporter(EntityImporter): extra = {'organization': name} contribs.append(fatcat_openapi_client.ReleaseContrib( index=i, extra=extra)) + if i is not None: + i += 1 else: print('[{}] unknown name type: {}'.format(doi, nameType), file=sys.stderr) -- cgit v1.2.3