summaryrefslogtreecommitdiffstats
path: root/python/tests/import_datacite.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
commitcdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch)
tree5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests/import_datacite.py
parent78f08280edea4ff65ca613ad30005c45cc48dea6 (diff)
downloadfatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz
fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip
fmt (black): tests/
Diffstat (limited to 'python/tests/import_datacite.py')
-rw-r--r--python/tests/import_datacite.py74
1 files changed, 47 insertions, 27 deletions
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index e1c79bc4..220dc0f6 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -54,9 +54,7 @@ def test_datacite_importer_huge(datacite_importer):
counts = JsonLinePusher(datacite_importer, f).run()
assert counts["insert"] == 998
change = datacite_importer.api.get_changelog_entry(index=last_index + 1)
- release = datacite_importer.api.get_release(
- change.editgroup.edits.releases[0].ident
- )
+ release = datacite_importer.api.get_release(change.editgroup.edits.releases[0].ident)
assert len(release.contribs) == 3
@@ -76,17 +74,26 @@ def test_find_original_language_title():
),
Case(
"empty subdict is ignored",
- {"title": "Noise Reduction", "original_language_title": {},},
+ {
+ "title": "Noise Reduction",
+ "original_language_title": {},
+ },
None,
),
Case(
"unknown subdict keys are ignored",
- {"title": "Noise Reduction", "original_language_title": {"broken": "kv"},},
+ {
+ "title": "Noise Reduction",
+ "original_language_title": {"broken": "kv"},
+ },
None,
),
Case(
"original string",
- {"title": "Noise Reduction", "original_language_title": "Подавление шума",},
+ {
+ "title": "Noise Reduction",
+ "original_language_title": "Подавление шума",
+ },
"Подавление шума",
),
Case(
@@ -163,7 +170,10 @@ def test_parse_datacite_titles():
),
Case(
"multiple titles, first wins",
- [{"title": "Total carbon dioxide"}, {"title": "Meeting Heterogeneity"},],
+ [
+ {"title": "Total carbon dioxide"},
+ {"title": "Meeting Heterogeneity"},
+ ],
("Total carbon dioxide", None, None),
),
Case(
@@ -201,7 +211,9 @@ def test_parse_datacite_titles():
[
{
"title": "Total carbon dioxide",
- "original_language_title": {"__content__": "Total carbon dioxide",},
+ "original_language_title": {
+ "__content__": "Total carbon dioxide",
+ },
},
{"title": "Station TT043_7-9", "titleType": "Subtitle"},
],
@@ -239,9 +251,7 @@ def test_parse_datacite_dates():
Case("year only yields year only", [{"date": "2019"}], (None, None, 2019)),
Case("int year", [{"date": 2019}], (None, None, 2019)),
Case("first wins", [{"date": "2019"}, {"date": "2020"}], (None, None, 2019)),
- Case(
- "skip bogus year", [{"date": "abc"}, {"date": "2020"}], (None, None, 2020)
- ),
+ Case("skip bogus year", [{"date": "abc"}, {"date": "2020"}], (None, None, 2020)),
Case(
"first with type",
[{"date": "2019", "dateType": "Accepted"}, {"date": "2020"}],
@@ -249,7 +259,9 @@ def test_parse_datacite_dates():
),
Case(
"full date",
- [{"date": "2019-12-01", "dateType": "Valid"},],
+ [
+ {"date": "2019-12-01", "dateType": "Valid"},
+ ],
(datetime.date(2019, 12, 1), 12, 2019),
),
Case(
@@ -294,22 +306,30 @@ def test_parse_datacite_dates():
),
Case(
"fuzzy year only",
- [{"date": "Year 2010", "dateType": "Issued"},],
+ [
+ {"date": "Year 2010", "dateType": "Issued"},
+ ],
(None, None, 2010),
),
Case(
"fuzzy year and month",
- [{"date": "Year 2010 Feb", "dateType": "Issued"},],
+ [
+ {"date": "Year 2010 Feb", "dateType": "Issued"},
+ ],
(None, 2, 2010),
),
Case(
"fuzzy year, month, day",
- [{"date": "Year 2010 Feb 24", "dateType": "Issued"},],
+ [
+ {"date": "Year 2010 Feb 24", "dateType": "Issued"},
+ ],
(datetime.date(2010, 2, 24), 2, 2010),
),
Case(
"ignore broken date",
- [{"date": "Febrrr 45", "dateType": "Updated"},],
+ [
+ {"date": "Febrrr 45", "dateType": "Updated"},
+ ],
(None, None, None),
),
]
@@ -317,13 +337,19 @@ def test_parse_datacite_dates():
result = parse_datacite_dates(case.input)
assert result == case.result, case.about
+
def test_datacite_spammy_title(datacite_importer):
- r = datacite_importer.parse_record({"title": """HD! My Hero academia
+ r = datacite_importer.parse_record(
+ {
+ "title": """HD! My Hero academia
Heroes: Rising [2020]Full Movie Watch
Online And Free Download""",
- "attributes": {"doi": "10.1234/1234"}})
+ "attributes": {"doi": "10.1234/1234"},
+ }
+ )
assert r is False
+
def test_datacite_importer(datacite_importer):
last_index = datacite_importer.api.get_changelog(limit=1)[0].index
with open("tests/files/datacite_sample.jsonl", "r") as f:
@@ -361,9 +387,7 @@ def test_datacite_dict_parse(datacite_importer):
print(r.extra)
assert r.title == "Triticum turgidum L. subsp. durum (Desf.) Husn. 97090"
- assert (
- r.publisher == "International Centre for Agricultural Research in Dry Areas"
- )
+ assert r.publisher == "International Centre for Agricultural Research in Dry Areas"
assert r.release_type == "article"
assert r.release_stage == "published"
assert r.license_slug is None
@@ -424,9 +448,7 @@ def test_index_form_to_display_name():
),
Case("Solomon, P. M.", "P. M. Solomon"),
Case("Sujeevan Ratnasingham", "Sujeevan Ratnasingham"),
- Case(
- "Paul Stöckli (1906-1991), Künstler", "Paul Stöckli (1906-1991), Künstler"
- ),
+ Case("Paul Stöckli (1906-1991), Künstler", "Paul Stöckli (1906-1991), Künstler"),
]
for c in cases:
@@ -450,9 +472,7 @@ def test_lookup_license_slug():
"https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess.xhtml",
"ADS-UK",
),
- Case(
- "https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess", "ADS-UK"
- ),
+ Case("https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess", "ADS-UK"),
Case("https://creativecommons.org/public-domain/cc0", "CC-0"),
Case("https://creativecommons.org/publicdomain/zero/1.0", "CC-0"),
Case("https://creativecommons.org/share-your-work/public-domain/cc0", "CC-0"),