diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-07-10 00:50:42 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-07-10 00:50:42 +0200 |
commit | df8dcde8d5eaf530e35f1467951271bff7475e64 (patch) | |
tree | e9fc117d824b8997c83e416cd0e021bb1f3dce74 /python/fatcat_tools/importers | |
parent | 40f77b78aa331ca67b510dfece77e6a6000f8c2f (diff) | |
download | fatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.tar.gz fatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.zip |
wip: contrib, GH59
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 38 |
1 files changed, 22 insertions, 16 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 66ec2023..7797812f 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -292,14 +292,17 @@ class DataciteImporter(EntityImporter): print('[{}] skipping non-ascii doi for now'.format(doi)) return None - creators = attributes.get('creators', []) or [] contributors = attributes.get('contributors', []) or [] # Much fewer than creators. - contribs = self.parse_datacite_creators(creators, doi=doi) + self.parse_datacite_creators(contributors, role=None, set_index=False, doi=doi) + contribs = self.parse_datacite_creators(creators, doi=doi) + contribs_extra_contributors = self.parse_datacite_creators(contributors, set_index=False, doi=doi) - # Address duplicated author names; use raw_name string comparison; refs #59. - contribs = unique_contributors(contribs) + # Unfortunately, creators and contributors might overlap, refs GH59. + for cc in contribs_extra_contributors: + if contributor_list_contains_contributor(contribs, cc): + continue + contribs.append(cc) # Title, may come with "attributes.titles[].titleType", like # "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle" @@ -800,8 +803,7 @@ class DataciteImporter(EntityImporter): if contributorType: extra = {'type': contributorType} - contribs.append( - fatcat_openapi_client.ReleaseContrib( + rc = fatcat_openapi_client.ReleaseContrib( creator_id=creator_id, index=i, raw_name=name, @@ -810,7 +812,9 @@ class DataciteImporter(EntityImporter): role=role, raw_affiliation=raw_affiliation, extra=extra, - )) + ) + if not contributor_list_contains_contributor(contribs, rc): + contribs.append(rc) elif nameType == 'Organizational': name = c.get('name', '') or '' if name in UNKNOWN_MARKERS: @@ -826,18 +830,20 @@ class DataciteImporter(EntityImporter): return contribs -def unique_contributors(contribs): +def contributor_list_contains_contributor(contributor_list, contributor): """ - Given a list of ReleaseContrib items, return a list of unique - ReleaseContribs, refs GH #59. + Given a list of contributors, determine, whether contrib is in that list. """ - unique_names, unique_contribs = set(), [] - for rc in contribs: - if rc.raw_name and rc.raw_name in unique_names: + for cc in contributor_list: + if cc.raw_name != contributor.raw_name: + continue + cc_role = cc.role or 'author' + contributor_role = contributor.role or 'author' + if cc_role != contributor_role: continue - unique_names.add(rc.raw_name) - unique_contribs.append(rc) - return unique_contribs + return True + return False + def lookup_license_slug(raw): """ |