wip: contrib, GH59

author: Martin Czygan <martin.czygan@gmail.com> 2020-07-10 00:50:42 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2020-07-10 00:50:42 +0200
commit: df8dcde8d5eaf530e35f1467951271bff7475e64 (patch)
tree: e9fc117d824b8997c83e416cd0e021bb1f3dce74
parent: 40f77b78aa331ca67b510dfece77e6a6000f8c2f (diff)
download: fatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.tar.gz
fatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.zip
2 files changed, 383 insertions, 245 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 66ec2023..7797812f 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -292,14 +292,17 @@ class DataciteImporter(EntityImporter):
             print('[{}] skipping non-ascii doi for now'.format(doi))
             return None
 
-
         creators = attributes.get('creators', []) or []
         contributors = attributes.get('contributors', []) or []  # Much fewer than creators.
 
-        contribs = self.parse_datacite_creators(creators, doi=doi) + self.parse_datacite_creators(contributors, role=None, set_index=False, doi=doi)
+        contribs = self.parse_datacite_creators(creators, doi=doi)
+        contribs_extra_contributors = self.parse_datacite_creators(contributors, set_index=False, doi=doi)
 
-        # Address duplicated author names; use raw_name string comparison; refs #59.
-        contribs = unique_contributors(contribs)
+        # Unfortunately, creators and contributors might overlap, refs GH59.
+        for cc in contribs_extra_contributors:
+            if contributor_list_contains_contributor(contribs, cc):
+                continue
+            contribs.append(cc)
 
         # Title, may come with "attributes.titles[].titleType", like
         # "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
@@ -800,8 +803,7 @@ class DataciteImporter(EntityImporter):
                 if contributorType:
                     extra = {'type': contributorType}
 
-                contribs.append(
-                    fatcat_openapi_client.ReleaseContrib(
+                rc = fatcat_openapi_client.ReleaseContrib(
                         creator_id=creator_id,
                         index=i,
                         raw_name=name,
@@ -810,7 +812,9 @@ class DataciteImporter(EntityImporter):
                         role=role,
                         raw_affiliation=raw_affiliation,
                         extra=extra,
-                    ))
+                    )
+                if not contributor_list_contains_contributor(contribs, rc):
+                    contribs.append(rc)
             elif nameType == 'Organizational':
                 name = c.get('name', '') or ''
                 if name in UNKNOWN_MARKERS:
@@ -826,18 +830,20 @@ class DataciteImporter(EntityImporter):
         return contribs
 
 
-def unique_contributors(contribs):
+def contributor_list_contains_contributor(contributor_list, contributor):
     """
-    Given a list of ReleaseContrib items, return a list of unique
-    ReleaseContribs, refs GH #59.
+    Given a list of contributors, determine, whether contrib is in that list.
     """
-    unique_names, unique_contribs = set(), []
-    for rc in contribs:
-        if rc.raw_name and rc.raw_name in unique_names:
+    for cc in contributor_list:
+        if cc.raw_name != contributor.raw_name:
+            continue
+        cc_role = cc.role or 'author'
+        contributor_role = contributor.role or 'author'
+        if cc_role != contributor_role:
             continue
-        unique_names.add(rc.raw_name)
-        unique_contribs.append(rc)
-    return unique_contribs
+        return True
+    return False
+
 
 def lookup_license_slug(raw):
     """
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 1472b8ea..b01a11e6 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -7,33 +7,54 @@ import datetime
 import pytest
 import gzip
 from fatcat_tools.importers import DataciteImporter, JsonLinePusher
-from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi, index_form_to_display_name, lookup_license_slug
+from fatcat_tools.importers.datacite import (
+    find_original_language_title,
+    parse_datacite_titles,
+    parse_datacite_dates,
+    clean_doi,
+    index_form_to_display_name,
+    lookup_license_slug,
+    contributor_list_contains_contributor,
+)
 from fatcat_tools.transforms import entity_to_dict
+import fatcat_openapi_client
 from fixtures import api
 import json
 
 
 @pytest.fixture(scope="function")
 def datacite_importer(api):
-    with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
-        yield DataciteImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3',
-                               bezerk_mode=True)
+    with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+        yield DataciteImporter(
+            api,
+            issn_file,
+            extid_map_file="tests/files/example_map.sqlite3",
+            bezerk_mode=True,
+        )
+
 
 @pytest.fixture(scope="function")
 def datacite_importer_existing(api):
-    with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
-        yield DataciteImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3',
-                               bezerk_mode=False)
+    with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+        yield DataciteImporter(
+            api,
+            issn_file,
+            extid_map_file="tests/files/example_map.sqlite3",
+            bezerk_mode=False,
+        )
+
 
 @pytest.mark.skip(reason="larger datacite import slows tests down")
 def test_datacite_importer_huge(datacite_importer):
     last_index = datacite_importer.api.get_changelog(limit=1)[0].index
-    with gzip.open('tests/files/datacite_1k_records.jsonl.gz', 'rt') as f:
+    with gzip.open("tests/files/datacite_1k_records.jsonl.gz", "rt") as f:
         datacite_importer.bezerk_mode = True
         counts = JsonLinePusher(datacite_importer, f).run()
-    assert counts['insert'] == 998
-    change = datacite_importer.api.get_changelog_entry(index=last_index+1)
-    release = datacite_importer.api.get_release(change.editgroup.edits.releases[0].ident)
+    assert counts["insert"] == 998
+    change = datacite_importer.api.get_changelog_entry(index=last_index + 1)
+    release = datacite_importer.api.get_release(
+        change.editgroup.edits.releases[0].ident
+    )
     assert len(release.contribs) == 3
 
 
@@ -41,122 +62,161 @@ def test_find_original_language_title():
     """
     Original language might be included, in various ways.
     """
-    Case = collections.namedtuple('Case', 'about input result')
+    Case = collections.namedtuple("Case", "about input result")
     cases = [
-        Case('defaults to None', {}, None),
-        Case('ignore unknown keys', {'broken': 'kv'}, None),
-        Case('just a title', {'title': 'Noise Reduction'}, None),
-        Case('same title should be ignored', {
-            'title': 'Noise Reduction',
-            'original_language_title': 'Noise Reduction'
-        }, None),
-        Case('empty subdict is ignored', {
-            'title': 'Noise Reduction',
-            'original_language_title': {},
-        }, None),
-        Case('unknown subdict keys are ignored', {
-            'title': 'Noise Reduction',
-            'original_language_title': {'broken': 'kv'},
-        }, None),
-        Case('original string', {
-            'title': 'Noise Reduction',
-            'original_language_title': 'Подавление шума',
-        }, 'Подавление шума'),
-        Case('language tag is ignored, since its broken', {
-            'title': 'Noise Reduction',
-            'original_language_title': {
-                'language': 'ja',
-                '__content__': 'Noise Reduction'
+        Case("defaults to None", {}, None),
+        Case("ignore unknown keys", {"broken": "kv"}, None),
+        Case("just a title", {"title": "Noise Reduction"}, None),
+        Case(
+            "same title should be ignored",
+            {"title": "Noise Reduction", "original_language_title": "Noise Reduction"},
+            None,
+        ),
+        Case(
+            "empty subdict is ignored",
+            {"title": "Noise Reduction", "original_language_title": {},},
+            None,
+        ),
+        Case(
+            "unknown subdict keys are ignored",
+            {"title": "Noise Reduction", "original_language_title": {"broken": "kv"},},
+            None,
+        ),
+        Case(
+            "original string",
+            {"title": "Noise Reduction", "original_language_title": "Подавление шума",},
+            "Подавление шума",
+        ),
+        Case(
+            "language tag is ignored, since its broken",
+            {
+                "title": "Noise Reduction",
+                "original_language_title": {
+                    "language": "ja",
+                    "__content__": "Noise Reduction",
+                },
             },
-        }, None),
-        Case('do not care about language', {
-            'title': 'Noise Reduction',
-            'original_language_title': {
-                'language': 'ja',
-                '__content__': 'Rauschunterdrückung',
+            None,
+        ),
+        Case(
+            "do not care about language",
+            {
+                "title": "Noise Reduction",
+                "original_language_title": {
+                    "language": "ja",
+                    "__content__": "Rauschunterdrückung",
+                },
             },
-        }, 'Rauschunterdrückung'),
-        Case('ignore excessive questionmarks', {
-            'title': 'Noise Reduction',
-            'original_language_title': {
-                'language': 'ja',
-                '__content__': '???? However',
+            "Rauschunterdrückung",
+        ),
+        Case(
+            "ignore excessive questionmarks",
+            {
+                "title": "Noise Reduction",
+                "original_language_title": {
+                    "language": "ja",
+                    "__content__": "???? However",
+                },
             },
-        }, None),
+            None,
+        ),
     ]
 
     for case in cases:
         result = find_original_language_title(case.input)
         assert result == case.result
 
+
 def test_parse_datacite_titles():
     """
     Given a list of titles, find title, original_language_title and subtitle.
     Result is a 3-tuple of title, original_language_title, subtitle.
     """
-    Case = collections.namedtuple('Case', 'about input result')
+    Case = collections.namedtuple("Case", "about input result")
     cases = [
-        Case('handle None', None, (None, None, None)),
-        Case('empty list', [], (None, None, None)),
-        Case('empty item', [{}], (None, None, None)),
-        Case('broken keys', [{'broken': 'kv'}], (None, None, None)),
-        Case('title only', [{'title': 'Total carbon dioxide'}],
-             ('Total carbon dioxide', None, None),
-        ),
-        Case('title and subtitle', [
-            {'title': 'Total carbon dioxide'},
-            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
-        ],
-             ('Total carbon dioxide', None, 'Station TT043_7-9'),
-        ),
-        Case('title, subtitle order does not matter', [
-            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
-            {'title': 'Total carbon dioxide'},
-        ],
-             ('Total carbon dioxide', None, 'Station TT043_7-9'),
-        ),
-        Case('multiple titles, first wins', [
-            {'title': 'Total carbon dioxide'},
-            {'title': 'Meeting Heterogeneity'},
-        ],
-             ('Total carbon dioxide', None, None),
-        ),
-        Case('multiple titles, plus sub', [
-            {'title': 'Total carbon dioxide'},
-            {'title': 'Meeting Heterogeneity'},
-            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
-        ],
-             ('Total carbon dioxide', None, 'Station TT043_7-9'),
-        ),
-        Case('multiple titles, multiple subs', [
-            {'title': 'Total carbon dioxide'},
-            {'title': 'Meeting Heterogeneity'},
-            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
-            {'title': 'Some other subtitle', 'titleType': 'Subtitle'},
-        ],
-             ('Total carbon dioxide', None, 'Station TT043_7-9'),
-        ),
-        Case('title, original, sub', [
-            {'title': 'Total carbon dioxide', 'original_language_title': 'Всего углекислого газа'},
-            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
-        ],
-             ('Total carbon dioxide', 'Всего углекислого газа', 'Station TT043_7-9'),
-        ),
-        Case('title, original same as title, sub', [
-            {'title': 'Total carbon dioxide', 'original_language_title': {
-                '__content__': 'Total carbon dioxide',
-            }},
-            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
-        ],
-             ('Total carbon dioxide', None, 'Station TT043_7-9'),
-        ),
-        Case('title, original dict, sub', [
-            {'title': 'Total carbon dioxide', 'original_language_title': {
-                '__content__': 'Всего углекислого газа',
-            }},
-            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
-        ],
-             ('Total carbon dioxide', 'Всего углекислого газа', 'Station TT043_7-9'),
+        Case("handle None", None, (None, None, None)),
+        Case("empty list", [], (None, None, None)),
+        Case("empty item", [{}], (None, None, None)),
+        Case("broken keys", [{"broken": "kv"}], (None, None, None)),
+        Case(
+            "title only",
+            [{"title": "Total carbon dioxide"}],
+            ("Total carbon dioxide", None, None),
+        ),
+        Case(
+            "title and subtitle",
+            [
+                {"title": "Total carbon dioxide"},
+                {"title": "Station TT043_7-9", "titleType": "Subtitle"},
+            ],
+            ("Total carbon dioxide", None, "Station TT043_7-9"),
+        ),
+        Case(
+            "title, subtitle order does not matter",
+            [
+                {"title": "Station TT043_7-9", "titleType": "Subtitle"},
+                {"title": "Total carbon dioxide"},
+            ],
+            ("Total carbon dioxide", None, "Station TT043_7-9"),
+        ),
+        Case(
+            "multiple titles, first wins",
+            [{"title": "Total carbon dioxide"}, {"title": "Meeting Heterogeneity"},],
+            ("Total carbon dioxide", None, None),
+        ),
+        Case(
+            "multiple titles, plus sub",
+            [
+                {"title": "Total carbon dioxide"},
+                {"title": "Meeting Heterogeneity"},
+                {"title": "Station TT043_7-9", "titleType": "Subtitle"},
+            ],
+            ("Total carbon dioxide", None, "Station TT043_7-9"),
+        ),
+        Case(
+            "multiple titles, multiple subs",
+            [
+                {"title": "Total carbon dioxide"},
+                {"title": "Meeting Heterogeneity"},
+                {"title": "Station TT043_7-9", "titleType": "Subtitle"},
+                {"title": "Some other subtitle", "titleType": "Subtitle"},
+            ],
+            ("Total carbon dioxide", None, "Station TT043_7-9"),
+        ),
+        Case(
+            "title, original, sub",
+            [
+                {
+                    "title": "Total carbon dioxide",
+                    "original_language_title": "Всего углекислого газа",
+                },
+                {"title": "Station TT043_7-9", "titleType": "Subtitle"},
+            ],
+            ("Total carbon dioxide", "Всего углекислого газа", "Station TT043_7-9"),
+        ),
+        Case(
+            "title, original same as title, sub",
+            [
+                {
+                    "title": "Total carbon dioxide",
+                    "original_language_title": {"__content__": "Total carbon dioxide",},
+                },
+                {"title": "Station TT043_7-9", "titleType": "Subtitle"},
+            ],
+            ("Total carbon dioxide", None, "Station TT043_7-9"),
+        ),
+        Case(
+            "title, original dict, sub",
+            [
+                {
+                    "title": "Total carbon dioxide",
+                    "original_language_title": {
+                        "__content__": "Всего углекислого газа",
+                    },
+                },
+                {"title": "Station TT043_7-9", "titleType": "Subtitle"},
+            ],
+            ("Total carbon dioxide", "Всего углекислого газа", "Station TT043_7-9"),
         ),
     ]
 
@@ -164,91 +224,128 @@ def test_parse_datacite_titles():
         result = parse_datacite_titles(case.input)
         assert result == case.result, case.about
 
+
 def test_parse_datacite_dates():
     """
     Test datacite date parsing.
     """
-    Case = collections.namedtuple('Case', 'about input result')
+    Case = collections.namedtuple("Case", "about input result")
     cases = [
-        Case('None is None', None, (None, None, None)),
-        Case('empty list is None', [], (None, None, None)),
-        Case('empty item is None', [{}], (None, None, None)),
-        Case('year only yields year only', [{'date': '2019'}], (None, None, 2019)),
-        Case('int year', [{'date': 2019}], (None, None, 2019)),
-        Case('first wins', [{'date': '2019'}, {'date': '2020'}], (None, None, 2019)),
-        Case('skip bogus year', [{'date': 'abc'}, {'date': '2020'}], (None, None, 2020)),
-        Case('first with type', [
-            {'date': '2019', 'dateType': 'Accepted'}, {'date': '2020'}
-        ], (None, None, 2019)),
-        Case('full date', [
-            {'date': '2019-12-01', 'dateType': 'Valid'},
-        ], (datetime.date(2019, 12, 1), 12, 2019)),
-        Case('date type prio', [
-            {'date': '2000-12-01', 'dateType': 'Valid'},
-            {'date': '2010-01-01', 'dateType': 'Updated'},
-        ], (datetime.date(2000, 12, 1), 12, 2000)),
-        Case('date type prio, Available > Updated', [
-            {'date': '2010-01-01', 'dateType': 'Updated'},
-            {'date': '2000-12-01', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 12, 2000)),
-        Case('allow different date formats, Available > Updated', [
-            {'date': '2010-01-01T10:00:00', 'dateType': 'Updated'},
-            {'date': '2000-12-01T10:00:00', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 12, 2000)),
-        Case('allow different date formats, Available > Updated', [
-            {'date': '2010-01-01T10:00:00Z', 'dateType': 'Updated'},
-            {'date': '2000-12-01T10:00:00Z', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 12, 2000)),
-        Case('allow fuzzy date formats, Available > Updated', [
-            {'date': '2010', 'dateType': 'Updated'},
-            {'date': '2000 Dec 01', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 12, 2000)),
-        Case('fuzzy year only', [
-            {'date': 'Year 2010', 'dateType': 'Issued'},
-        ], (None, None, 2010)),
-        Case('fuzzy year and month', [
-            {'date': 'Year 2010 Feb', 'dateType': 'Issued'},
-        ], (None, 2, 2010)),
-        Case('fuzzy year, month, day', [
-            {'date': 'Year 2010 Feb 24', 'dateType': 'Issued'},
-        ], (datetime.date(2010, 2, 24), 2, 2010)),
-        Case('ignore broken date', [
-            {'date': 'Febrrr 45', 'dateType': 'Updated'},
-        ], (None, None, None)),
+        Case("None is None", None, (None, None, None)),
+        Case("empty list is None", [], (None, None, None)),
+        Case("empty item is None", [{}], (None, None, None)),
+        Case("year only yields year only", [{"date": "2019"}], (None, None, 2019)),
+        Case("int year", [{"date": 2019}], (None, None, 2019)),
+        Case("first wins", [{"date": "2019"}, {"date": "2020"}], (None, None, 2019)),
+        Case(
+            "skip bogus year", [{"date": "abc"}, {"date": "2020"}], (None, None, 2020)
+        ),
+        Case(
+            "first with type",
+            [{"date": "2019", "dateType": "Accepted"}, {"date": "2020"}],
+            (None, None, 2019),
+        ),
+        Case(
+            "full date",
+            [{"date": "2019-12-01", "dateType": "Valid"},],
+            (datetime.date(2019, 12, 1), 12, 2019),
+        ),
+        Case(
+            "date type prio",
+            [
+                {"date": "2000-12-01", "dateType": "Valid"},
+                {"date": "2010-01-01", "dateType": "Updated"},
+            ],
+            (datetime.date(2000, 12, 1), 12, 2000),
+        ),
+        Case(
+            "date type prio, Available > Updated",
+            [
+                {"date": "2010-01-01", "dateType": "Updated"},
+                {"date": "2000-12-01", "dateType": "Available"},
+            ],
+            (datetime.date(2000, 12, 1), 12, 2000),
+        ),
+        Case(
+            "allow different date formats, Available > Updated",
+            [
+                {"date": "2010-01-01T10:00:00", "dateType": "Updated"},
+                {"date": "2000-12-01T10:00:00", "dateType": "Available"},
+            ],
+            (datetime.date(2000, 12, 1), 12, 2000),
+        ),
+        Case(
+            "allow different date formats, Available > Updated",
+            [
+                {"date": "2010-01-01T10:00:00Z", "dateType": "Updated"},
+                {"date": "2000-12-01T10:00:00Z", "dateType": "Available"},
+            ],
+            (datetime.date(2000, 12, 1), 12, 2000),
+        ),
+        Case(
+            "allow fuzzy date formats, Available > Updated",
+            [
+                {"date": "2010", "dateType": "Updated"},
+                {"date": "2000 Dec 01", "dateType": "Available"},
+            ],
+            (datetime.date(2000, 12, 1), 12, 2000),
+        ),
+        Case(
+            "fuzzy year only",
+            [{"date": "Year 2010", "dateType": "Issued"},],
+            (None, None, 2010),
+        ),
+        Case(
+            "fuzzy year and month",
+            [{"date": "Year 2010 Feb", "dateType": "Issued"},],
+            (None, 2, 2010),
+        ),
+        Case(
+            "fuzzy year, month, day",
+            [{"date": "Year 2010 Feb 24", "dateType": "Issued"},],
+            (datetime.date(2010, 2, 24), 2, 2010),
+        ),
+        Case(
+            "ignore broken date",
+            [{"date": "Febrrr 45", "dateType": "Updated"},],
+            (None, None, None),
+        ),
     ]
     for case in cases:
         result = parse_datacite_dates(case.input)
         assert result == case.result, case.about
 
+
 def test_datacite_importer(datacite_importer):
     last_index = datacite_importer.api.get_changelog(limit=1)[0].index
-    with open('tests/files/datacite_sample.jsonl', 'r') as f:
+    with open("tests/files/datacite_sample.jsonl", "r") as f:
         datacite_importer.bezerk_mode = True
         counts = JsonLinePusher(datacite_importer, f).run()
-    assert counts['insert'] == 1
-    assert counts['exists'] == 0
-    assert counts['skip'] == 0
+    assert counts["insert"] == 1
+    assert counts["exists"] == 0
+    assert counts["skip"] == 0
 
     # fetch most recent editgroup
-    change = datacite_importer.api.get_changelog_entry(index=last_index+1)
+    change = datacite_importer.api.get_changelog_entry(index=last_index + 1)
     eg = change.editgroup
     assert eg.description
     assert "datacite" in eg.description.lower()
-    assert eg.extra['git_rev']
-    assert "fatcat_tools.DataciteImporter" in eg.extra['agent']
+    assert eg.extra["git_rev"]
+    assert "fatcat_tools.DataciteImporter" in eg.extra["agent"]
 
     last_index = datacite_importer.api.get_changelog(limit=1)[0].index
-    with open('tests/files/datacite_sample.jsonl', 'r') as f:
+    with open("tests/files/datacite_sample.jsonl", "r") as f:
         datacite_importer.bezerk_mode = False
         datacite_importer.reset()
         counts = JsonLinePusher(datacite_importer, f).run()
-    assert counts['insert'] == 0
-    assert counts['exists'] == 1
-    assert counts['skip'] == 0
+    assert counts["insert"] == 0
+    assert counts["exists"] == 1
+    assert counts["skip"] == 0
     assert last_index == datacite_importer.api.get_changelog(limit=1)[0].index
 
+
 def test_datacite_dict_parse(datacite_importer):
-    with open('tests/files/datacite_sample.jsonl', 'r') as f:
+    with open("tests/files/datacite_sample.jsonl", "r") as f:
         raw = json.load(f)
         r = datacite_importer.parse_record(raw)
         # ensure the API server is ok with format
@@ -256,7 +353,9 @@ def test_datacite_dict_parse(datacite_importer):
 
         print(r.extra)
         assert r.title == "Triticum turgidum L. subsp. durum (Desf.) Husn. 97090"
-        assert r.publisher == "International Centre for Agricultural Research in Dry Areas"
+        assert (
+            r.publisher == "International Centre for Agricultural Research in Dry Areas"
+        )
         assert r.release_type == "article"
         assert r.release_stage == "published"
         assert r.license_slug == None
@@ -267,13 +366,15 @@ def test_datacite_dict_parse(datacite_importer):
         assert r.subtitle == None
         assert r.release_date == None
         assert r.release_year == 1986
-        assert 'subtitle' not in r.extra
-        assert 'subtitle' not in r.extra['datacite']
-        assert 'funder' not in r.extra
-        assert 'funder' not in r.extra['datacite']
+        assert "subtitle" not in r.extra
+        assert "subtitle" not in r.extra["datacite"]
+        assert "funder" not in r.extra
+        assert "funder" not in r.extra["datacite"]
         # matched by ISSN, so shouldn't be in there
-        #assert extra['container_name'] == "International Journal of Quantum Chemistry"
-        assert r.extra['datacite']['subjects'] == [{'subject': 'Plant Genetic Resource for Food and Agriculture'}]
+        # assert extra['container_name'] == "International Journal of Quantum Chemistry"
+        assert r.extra["datacite"]["subjects"] == [
+            {"subject": "Plant Genetic Resource for Food and Agriculture"}
+        ]
         assert len(r.abstracts) == 1
         assert len(r.abstracts[0].content) == 421
         assert len(r.contribs) == 2
@@ -282,34 +383,41 @@ def test_datacite_dict_parse(datacite_importer):
         assert r.contribs[0].surname == None
         assert len(r.refs) == 0
 
+
 def test_datacite_conversions(datacite_importer):
     """
     Datacite JSON to release entity JSON representation. The count is hardcoded
     for now.
     """
     datacite_importer.debug = True
-    for i in range(34):
-        src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
-        dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
-        with open(src, 'r') as f:
+    for i in range(35):
+        src = "tests/files/datacite/datacite_doc_{0:02d}.json".format(i)
+        dst = "tests/files/datacite/datacite_result_{0:02d}.json".format(i)
+        with open(src, "r") as f:
             re = datacite_importer.parse_record(json.load(f))
             result = entity_to_dict(re)
-        with open(dst, 'r') as f:
-           expected = json.loads(f.read())
+        with open(dst, "r") as f:
+            expected = json.loads(f.read())
+
+        assert result == expected, "output mismatch in {}".format(dst)
 
-        assert result == expected, 'output mismatch in {}'.format(dst)
 
 def test_index_form_to_display_name():
-    Case = collections.namedtuple('Case', 'input output')
+    Case = collections.namedtuple("Case", "input output")
     cases = [
-        Case('', ''),
-        Case('ABC', 'ABC'),
-        Case('International Space Station', 'International Space Station'),
-        Case('Jin, Shan', 'Shan Jin'),
-        Case('Volkshochschule Der Bundesstadt Bonn', 'Volkshochschule Der Bundesstadt Bonn'),
-        Case('Solomon, P. M.', 'P. M. Solomon'),
-        Case('Sujeevan Ratnasingham', 'Sujeevan Ratnasingham'),
-        Case('Paul Stöckli (1906-1991), Künstler', 'Paul Stöckli (1906-1991), Künstler'),
+        Case("", ""),
+        Case("ABC", "ABC"),
+        Case("International Space Station", "International Space Station"),
+        Case("Jin, Shan", "Shan Jin"),
+        Case(
+            "Volkshochschule Der Bundesstadt Bonn",
+            "Volkshochschule Der Bundesstadt Bonn",
+        ),
+        Case("Solomon, P. M.", "P. M. Solomon"),
+        Case("Sujeevan Ratnasingham", "Sujeevan Ratnasingham"),
+        Case(
+            "Paul Stöckli (1906-1991), Künstler", "Paul Stöckli (1906-1991), Künstler"
+        ),
     ]
 
     for c in cases:
@@ -317,45 +425,69 @@ def test_index_form_to_display_name():
 
 
 def test_lookup_license_slug():
-    Case = collections.namedtuple('Case', 'input output')
+    Case = collections.namedtuple("Case", "input output")
     cases = [
-        Case('https://opensource.org/licenses/MIT', 'MIT'),
-        Case('creativecommons.org/licenses/by-nc-nd/3.0/', 'CC-BY-NC-ND'),
-        Case('http://creativecommons.org/licences/by-nc-sa/4.0', 'CC-BY-NC-SA'),
-        Case('http://creativecommons.org/licenses/by-nc-nd/2.5/co', 'CC-BY-NC-ND'),
-        Case('http://creativecommons.org/licenses/by-nd/4.0/legalcode', 'CC-BY-ND'),
-        Case('http://creativecommons.org/licenses/by/2.0/uk/legalcode', 'CC-BY'),
-        Case('http://creativecommons.org/publicdomain/zero/1.0/legalcode', 'CC-0'),
-        Case('http://doi.wiley.com/10.1002/tdm_license_1.1', 'WILEY-TDM-1.1'),
-        Case('http://homepage.data-planet.com/terms-use', 'SAGE-DATA-PLANET'),
-        Case('http://www.springer.com/tdm', 'SPRINGER-TDM'),
-        Case('https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess.xhtml', 'ADS-UK'),
-        Case('https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess', 'ADS-UK'),
-        Case('https://creativecommons.org/public-domain/cc0', 'CC-0'),
-        Case('https://creativecommons.org/publicdomain/zero/1.0', 'CC-0'),
-        Case('https://creativecommons.org/share-your-work/public-domain/cc0', 'CC-0'),
-        Case('https://www.elsevier.com/tdm/userlicense/1.0', 'ELSEVIER-USER-1.0'),
-        Case('https://www.gnu.org/licenses/gpl-3.0.html', 'GPL-3.0'),
-        Case('http://rightsstatements.org/page/InC/1.0?language=en', 'RS-INC'),
-        Case('http://onlinelibrary.wiley.com/termsAndConditions', 'WILEY'),
-        Case('https://publikationen.bibliothek.kit.edu/kitopen-lizenz', 'KIT-OPEN'),
-        Case('http://journals.sagepub.com/page/policies/text-and-data-mining-license', 'SAGE-TDM'),
-    	Case('https://creativecommons.org/publicdomain/mark/1.0/deed.de', 'CC-PUBLICDOMAIN'),
-    	Case('http://creativecommons.org/publicdomain/mark/1.0', 'CC-PUBLICDOMAIN'),
-    	Case('https://creativecommons.org/publicdomain/mark/1.0', 'CC-PUBLICDOMAIN'),
-    	Case('https://creativecommons.org/publicdomain/mark/1.0/', 'CC-PUBLICDOMAIN'),
-    	Case('https://creativecommons.org/publicdomain/mark/1.0/deed.de', 'CC-PUBLICDOMAIN'),
-    	Case('https://creativecommons.org/share-your-work/public-domain/cc0/', 'CC-0'),
-    	Case('http://spdx.org/licenses/CC0-1.0.json', 'CC-0'),
-    	Case('http://spdx.org/licenses/CC-BY-1.0.json', 'CC-BY'),
-    	Case('http://spdx.org/licenses/CC-BY-4.0.json', 'CC-BY'),
-    	Case('http://spdx.org/licenses/CC-BY-NC-4.0.json', 'CC-BY-NC'),
-    	Case('http://spdx.org/licenses/CC-BY-SA-3.0.json', 'CC-BY-SA'),
-    	Case('http://spdx.org/licenses/CC-BY-SA-4.0.json', 'CC-BY-SA'),
-    	Case('http://spdx.org/licenses/MIT.json', 'MIT'),
-    	Case('http://spdx.org/licenses/OGL-Canada-2.0.json', 'OGL-CANADA'),
+        Case("https://opensource.org/licenses/MIT", "MIT"),
+        Case("creativecommons.org/licenses/by-nc-nd/3.0/", "CC-BY-NC-ND"),
+        Case("http://creativecommons.org/licences/by-nc-sa/4.0", "CC-BY-NC-SA"),
+        Case("http://creativecommons.org/licenses/by-nc-nd/2.5/co", "CC-BY-NC-ND"),
+        Case("http://creativecommons.org/licenses/by-nd/4.0/legalcode", "CC-BY-ND"),
+        Case("http://creativecommons.org/licenses/by/2.0/uk/legalcode", "CC-BY"),
+        Case("http://creativecommons.org/publicdomain/zero/1.0/legalcode", "CC-0"),
+        Case("http://doi.wiley.com/10.1002/tdm_license_1.1", "WILEY-TDM-1.1"),
+        Case("http://homepage.data-planet.com/terms-use", "SAGE-DATA-PLANET"),
+        Case("http://www.springer.com/tdm", "SPRINGER-TDM"),
+        Case(
+            "https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess.xhtml",
+            "ADS-UK",
+        ),
+        Case(
+            "https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess", "ADS-UK"
+        ),
+        Case("https://creativecommons.org/public-domain/cc0", "CC-0"),
+        Case("https://creativecommons.org/publicdomain/zero/1.0", "CC-0"),
+        Case("https://creativecommons.org/share-your-work/public-domain/cc0", "CC-0"),
+        Case("https://www.elsevier.com/tdm/userlicense/1.0", "ELSEVIER-USER-1.0"),
+        Case("https://www.gnu.org/licenses/gpl-3.0.html", "GPL-3.0"),
+        Case("http://rightsstatements.org/page/InC/1.0?language=en", "RS-INC"),
+        Case("http://onlinelibrary.wiley.com/termsAndConditions", "WILEY"),
+        Case("https://publikationen.bibliothek.kit.edu/kitopen-lizenz", "KIT-OPEN"),
+        Case(
+            "http://journals.sagepub.com/page/policies/text-and-data-mining-license",
+            "SAGE-TDM",
+        ),
+        Case(
+            "https://creativecommons.org/publicdomain/mark/1.0/deed.de",
+            "CC-PUBLICDOMAIN",
+        ),
+        Case("http://creativecommons.org/publicdomain/mark/1.0", "CC-PUBLICDOMAIN"),
+        Case("https://creativecommons.org/publicdomain/mark/1.0", "CC-PUBLICDOMAIN"),
+        Case("https://creativecommons.org/publicdomain/mark/1.0/", "CC-PUBLICDOMAIN"),
+        Case(
+            "https://creativecommons.org/publicdomain/mark/1.0/deed.de",
+            "CC-PUBLICDOMAIN",
+        ),
+        Case("https://creativecommons.org/share-your-work/public-domain/cc0/", "CC-0"),
+        Case("http://spdx.org/licenses/CC0-1.0.json", "CC-0"),
+        Case("http://spdx.org/licenses/CC-BY-1.0.json", "CC-BY"),
+        Case("http://spdx.org/licenses/CC-BY-4.0.json", "CC-BY"),
+        Case("http://spdx.org/licenses/CC-BY-NC-4.0.json", "CC-BY-NC"),
+        Case("http://spdx.org/licenses/CC-BY-SA-3.0.json", "CC-BY-SA"),
+        Case("http://spdx.org/licenses/CC-BY-SA-4.0.json", "CC-BY-SA"),
+        Case("http://spdx.org/licenses/MIT.json", "MIT"),
+        Case("http://spdx.org/licenses/OGL-Canada-2.0.json", "OGL-CANADA"),
     ]
 
     for c in cases:
         got = lookup_license_slug(c.input)
-        assert c.output == got, '{}: got {}, want {}'.format(c.input, got, c.output)
+        assert c.output == got, "{}: got {}, want {}".format(c.input, got, c.output)
+
+
+def test_contributor_list_contains_contributor():
+    Case = collections.namedtuple("Case", "contrib_list contrib want")
+    cases = [
+        Case([], fatcat_openapi_client.ReleaseContrib(raw_name="Paul Katz"), False),
+    ]
+    for c in cases:
+        got = contributor_list_contains_contributor(c.contrib_list, c.contrib)
+        assert got == c.want
author	Martin Czygan <martin.czygan@gmail.com>	2020-07-10 00:50:42 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2020-07-10 00:50:42 +0200
commit	df8dcde8d5eaf530e35f1467951271bff7475e64 (patch)
tree	e9fc117d824b8997c83e416cd0e021bb1f3dce74
parent	40f77b78aa331ca67b510dfece77e6a6000f8c2f (diff)
download	fatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.tar.gz fatcat-df8dcde8d5eaf530e35f1467951271bff7475e64.zip