summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-07-10 18:29:31 +0200
committerMartin Czygan <martin.czygan@gmail.com>2020-07-10 18:29:31 +0200
commitd2bcd77f73c6496a2ffdd865d2348f33f4fb17f1 (patch)
treedfed92c605b0a0fb5c6ad4cc2e6dbbf6e67bca75
parent2411bad315b48b99c19958ea3c393dc4d09d6486 (diff)
downloadfatcat-d2bcd77f73c6496a2ffdd865d2348f33f4fb17f1.tar.gz
fatcat-d2bcd77f73c6496a2ffdd865d2348f33f4fb17f1.zip
datacite: there should be no index gaps
-rw-r--r--python/fatcat_tools/importers/datacite.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 797ccf19..962d80c6 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -758,9 +758,10 @@ class DataciteImporter(EntityImporter):
# Names, that should be ignored right away.
name_blacklist = set(('Occdownload Gbif.Org',))
- for i, c in enumerate(creators):
+ i = 0
+ for c in creators:
if not set_index:
- i = None
+ i = None
nameType = c.get('nameType', '') or ''
if nameType in ('', 'Personal'):
creator_id = None
@@ -838,8 +839,11 @@ class DataciteImporter(EntityImporter):
raw_affiliation=raw_affiliation,
extra=extra,
)
+ # Filter out duplicates early.
if not contributor_list_contains_contributor(contribs, rc):
contribs.append(rc)
+ if i is not None:
+ i += 1
elif nameType == 'Organizational':
name = c.get('name', '') or ''
if name in UNKNOWN_MARKERS:
@@ -849,6 +853,8 @@ class DataciteImporter(EntityImporter):
extra = {'organization': name}
contribs.append(fatcat_openapi_client.ReleaseContrib(
index=i, extra=extra))
+ if i is not None:
+ i += 1
else:
print('[{}] unknown name type: {}'.format(doi, nameType), file=sys.stderr)