aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2021-11-11 01:12:18 +0000
committerbnewbold <bnewbold@archive.org>2021-11-11 01:12:18 +0000
commit6ad9d24e4d7d901d6fc394e6e91575f6acba7ff4 (patch)
tree1b80344125152b46ae727dc8bbff73cc12abfd3e /python/tests
parent7e3f91f1a49ea85707cae31125021ba761f5373d (diff)
parent6eaf4f57c1f92b6f4f46adc38e5b39fd30b65d81 (diff)
downloadfatcat-6ad9d24e4d7d901d6fc394e6e91575f6acba7ff4.tar.gz
fatcat-6ad9d24e4d7d901d6fc394e6e91575f6acba7ff4.zip
Merge branch 'bnewbold-import-refactors' into 'master'
import refactors and deprecations Some of these are from old stale branches (the datacite subject metadata patch), but most are from yesterday and today. Sort of a hodge-podge, but the general theme is getting around to deferred cleanups and refactors specific to importer code before making some behavioral changes. The Datacite-specific stuff could use review here. Remove unused/deprecated/dead code: - cdl_dash_dat and wayback_static importers, which were for specific early example entities and have been superseded by other importers - "extid map" sqlite3 feature from several importers, was only used for initial bulk imports (and maybe should not have been used) Refactors: - moved a number of large datastructures out of importer code and into a dedicated static file (`biblio_lookup_tables.py`). Didn't move all, just the ones that were either generic or very large (making it hard to read code) - shuffled around relative imports and some function names ("clean_str" vs. "clean") Some actual behavioral changes: - remove some Datacite-specific license slugs - stop trying to fix double-slashes in DOIs, that was causing more harm than help (some DOIs do actually have double-slashes!) - remove some excess metadata from datacite 'extra' fields
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/files/datacite/datacite_result_00.json3
-rw-r--r--python/tests/import_crossref.py8
-rw-r--r--python/tests/import_datacite.py14
-rw-r--r--python/tests/import_jalc.py8
-rw-r--r--python/tests/import_jstor.py8
-rw-r--r--python/tests/import_pubmed.py2
6 files changed, 13 insertions, 30 deletions
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index 130a46ff..06147cfc 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -87,6 +87,5 @@
"release_type": "article-journal",
"release_year": 2019,
"title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea",
- "volume": "38",
- "license_slug": "SPRINGER-TDM"
+ "volume": "38"
}
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index eb931eb1..5f38e73e 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -10,17 +10,13 @@ from fatcat_tools.importers import CrossrefImporter, JsonLinePusher
@pytest.fixture(scope="function")
def crossref_importer(api):
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
- yield CrossrefImporter(
- api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True
- )
+ yield CrossrefImporter(api, issn_file, bezerk_mode=True)
@pytest.fixture(scope="function")
def crossref_importer_existing(api):
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
- yield CrossrefImporter(
- api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False
- )
+ yield CrossrefImporter(api, issn_file, bezerk_mode=False)
@pytest.mark.skip(
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 220dc0f6..28884cda 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -15,9 +15,9 @@ from fatcat_tools.importers import DataciteImporter, JsonLinePusher
from fatcat_tools.importers.datacite import (
clean_doi,
contributor_list_contains_contributor,
+ datacite_lookup_license_slug,
find_original_language_title,
index_form_to_display_name,
- lookup_license_slug,
parse_datacite_dates,
parse_datacite_titles,
)
@@ -30,7 +30,6 @@ def datacite_importer(api):
yield DataciteImporter(
api,
issn_file,
- extid_map_file="tests/files/example_map.sqlite3",
bezerk_mode=True,
)
@@ -41,7 +40,6 @@ def datacite_importer_existing(api):
yield DataciteImporter(
api,
issn_file,
- extid_map_file="tests/files/example_map.sqlite3",
bezerk_mode=False,
)
@@ -465,9 +463,9 @@ def test_lookup_license_slug():
Case("http://creativecommons.org/licenses/by-nd/4.0/legalcode", "CC-BY-ND"),
Case("http://creativecommons.org/licenses/by/2.0/uk/legalcode", "CC-BY"),
Case("http://creativecommons.org/publicdomain/zero/1.0/legalcode", "CC-0"),
- Case("http://doi.wiley.com/10.1002/tdm_license_1.1", "WILEY-TDM-1.1"),
+ Case("http://doi.wiley.com/10.1002/tdm_license_1.1", None),
Case("http://homepage.data-planet.com/terms-use", "SAGE-DATA-PLANET"),
- Case("http://www.springer.com/tdm", "SPRINGER-TDM"),
+ Case("http://www.springer.com/tdm", None),
Case(
"https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess.xhtml",
"ADS-UK",
@@ -479,11 +477,11 @@ def test_lookup_license_slug():
Case("https://www.elsevier.com/tdm/userlicense/1.0", "ELSEVIER-USER-1.0"),
Case("https://www.gnu.org/licenses/gpl-3.0.html", "GPL-3.0"),
Case("http://rightsstatements.org/page/InC/1.0?language=en", "RS-INC"),
- Case("http://onlinelibrary.wiley.com/termsAndConditions", "WILEY"),
+ Case("http://onlinelibrary.wiley.com/termsAndConditions", None),
Case("https://publikationen.bibliothek.kit.edu/kitopen-lizenz", "KIT-OPEN"),
Case(
"http://journals.sagepub.com/page/policies/text-and-data-mining-license",
- "SAGE-TDM",
+ None,
),
Case(
"https://creativecommons.org/publicdomain/mark/1.0/deed.de",
@@ -508,7 +506,7 @@ def test_lookup_license_slug():
]
for c in cases:
- got = lookup_license_slug(c.input)
+ got = datacite_lookup_license_slug(c.input)
assert c.output == got, "{}: got {}, want {}".format(c.input, got, c.output)
diff --git a/python/tests/import_jalc.py b/python/tests/import_jalc.py
index 4ebc87b4..8281b9a1 100644
--- a/python/tests/import_jalc.py
+++ b/python/tests/import_jalc.py
@@ -8,17 +8,13 @@ from fatcat_tools.importers import Bs4XmlFilePusher, Bs4XmlLinesPusher, JalcImpo
@pytest.fixture(scope="function")
def jalc_importer(api):
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
- yield JalcImporter(
- api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True
- )
+ yield JalcImporter(api, issn_file, bezerk_mode=True)
@pytest.fixture(scope="function")
def jalc_importer_existing(api):
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
- yield JalcImporter(
- api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False
- )
+ yield JalcImporter(api, issn_file, bezerk_mode=False)
def test_jalc_importer(jalc_importer):
diff --git a/python/tests/import_jstor.py b/python/tests/import_jstor.py
index 8ad550b3..7e13c8b0 100644
--- a/python/tests/import_jstor.py
+++ b/python/tests/import_jstor.py
@@ -8,17 +8,13 @@ from fatcat_tools.importers import Bs4XmlFilePusher, JstorImporter
@pytest.fixture(scope="function")
def jstor_importer(api):
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
- yield JstorImporter(
- api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True
- )
+ yield JstorImporter(api, issn_file, bezerk_mode=True)
@pytest.fixture(scope="function")
def jstor_importer_existing(api):
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
- yield JstorImporter(
- api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False
- )
+ yield JstorImporter(api, issn_file, bezerk_mode=False)
def test_jstor_importer(jstor_importer):
diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py
index a5301f29..e783db48 100644
--- a/python/tests/import_pubmed.py
+++ b/python/tests/import_pubmed.py
@@ -11,7 +11,6 @@ def pubmed_importer(api):
yield PubmedImporter(
api,
issn_file,
- extid_map_file="tests/files/example_map.sqlite3",
bezerk_mode=True,
lookup_refs=True,
)
@@ -23,7 +22,6 @@ def pubmed_importer_existing(api):
yield PubmedImporter(
api,
issn_file,
- extid_map_file="tests/files/example_map.sqlite3",
bezerk_mode=False,
lookup_refs=True,
)