aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-07-07 02:08:26 +0200
committerMartin Czygan <martin.czygan@gmail.com>2020-07-07 02:08:26 +0200
commitfcc6f24a95a7b77bda4ec813daecc2b737a82412 (patch)
tree23795219ad991387d30a1c72c8b79e5993e254a9 /python/fatcat_tools
parentca8fa64c1590a43b1e92fd8898275625d083451a (diff)
downloadfatcat-fcc6f24a95a7b77bda4ec813daecc2b737a82412.tar.gz
fatcat-fcc6f24a95a7b77bda4ec813daecc2b737a82412.zip
datacite: address duplicated contributor issue
Use string comparison. * https://fatcat.wiki/release/spjysmrnsrgyzgq6ise5o44rlu/contribs * https://api.datacite.org/dois/10.25940/roper-31098406
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/datacite.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 434a2941..66ec2023 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -298,6 +298,9 @@ class DataciteImporter(EntityImporter):
contribs = self.parse_datacite_creators(creators, doi=doi) + self.parse_datacite_creators(contributors, role=None, set_index=False, doi=doi)
+ # Address duplicated author names; use raw_name string comparison; refs #59.
+ contribs = unique_contributors(contribs)
+
# Title, may come with "attributes.titles[].titleType", like
# "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
titles = attributes.get('titles', []) or []
@@ -823,6 +826,19 @@ class DataciteImporter(EntityImporter):
return contribs
+def unique_contributors(contribs):
+ """
+ Given a list of ReleaseContrib items, return a list of unique
+ ReleaseContribs, refs GH #59.
+ """
+ unique_names, unique_contribs = set(), []
+ for rc in contribs:
+ if rc.raw_name and rc.raw_name in unique_names:
+ continue
+ unique_names.add(rc.raw_name)
+ unique_contribs.append(rc)
+ return unique_contribs
+
def lookup_license_slug(raw):
"""
Resolve a variety of strings into a some pseudo-canonical form, e.g.