summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-07-10 00:50:42 +0200
committerMartin Czygan <martin.czygan@gmail.com>2020-07-10 00:50:42 +0200
commitdf8dcde8d5eaf530e35f1467951271bff7475e64 (patch)
treee9fc117d824b8997c83e416cd0e021bb1f3dce74 /python/fatcat_tools/importers
parent40f77b78aa331ca67b510dfece77e6a6000f8c2f (diff)
downloadfatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.tar.gz
fatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.zip
wip: contrib, GH59
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/datacite.py38
1 files changed, 22 insertions, 16 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 66ec2023..7797812f 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -292,14 +292,17 @@ class DataciteImporter(EntityImporter):
print('[{}] skipping non-ascii doi for now'.format(doi))
return None
-
creators = attributes.get('creators', []) or []
contributors = attributes.get('contributors', []) or [] # Much fewer than creators.
- contribs = self.parse_datacite_creators(creators, doi=doi) + self.parse_datacite_creators(contributors, role=None, set_index=False, doi=doi)
+ contribs = self.parse_datacite_creators(creators, doi=doi)
+ contribs_extra_contributors = self.parse_datacite_creators(contributors, set_index=False, doi=doi)
- # Address duplicated author names; use raw_name string comparison; refs #59.
- contribs = unique_contributors(contribs)
+ # Unfortunately, creators and contributors might overlap, refs GH59.
+ for cc in contribs_extra_contributors:
+ if contributor_list_contains_contributor(contribs, cc):
+ continue
+ contribs.append(cc)
# Title, may come with "attributes.titles[].titleType", like
# "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
@@ -800,8 +803,7 @@ class DataciteImporter(EntityImporter):
if contributorType:
extra = {'type': contributorType}
- contribs.append(
- fatcat_openapi_client.ReleaseContrib(
+ rc = fatcat_openapi_client.ReleaseContrib(
creator_id=creator_id,
index=i,
raw_name=name,
@@ -810,7 +812,9 @@ class DataciteImporter(EntityImporter):
role=role,
raw_affiliation=raw_affiliation,
extra=extra,
- ))
+ )
+ if not contributor_list_contains_contributor(contribs, rc):
+ contribs.append(rc)
elif nameType == 'Organizational':
name = c.get('name', '') or ''
if name in UNKNOWN_MARKERS:
@@ -826,18 +830,20 @@ class DataciteImporter(EntityImporter):
return contribs
-def unique_contributors(contribs):
+def contributor_list_contains_contributor(contributor_list, contributor):
"""
- Given a list of ReleaseContrib items, return a list of unique
- ReleaseContribs, refs GH #59.
+ Given a list of contributors, determine, whether contrib is in that list.
"""
- unique_names, unique_contribs = set(), []
- for rc in contribs:
- if rc.raw_name and rc.raw_name in unique_names:
+ for cc in contributor_list:
+ if cc.raw_name != contributor.raw_name:
+ continue
+ cc_role = cc.role or 'author'
+ contributor_role = contributor.role or 'author'
+ if cc_role != contributor_role:
continue
- unique_names.add(rc.raw_name)
- unique_contribs.append(rc)
- return unique_contribs
+ return True
+ return False
+
def lookup_license_slug(raw):
"""