summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-29 16:58:04 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-29 16:58:06 -0800
commit1305a503b116e8f8081cbc48dc14c92b4b64b17d (patch)
tree0f69fba9f48470037b221174c42085b2e1c89272
parent26d30331174087a57591c9860c1a84d7010b3f20 (diff)
downloadfatcat-1305a503b116e8f8081cbc48dc14c92b4b64b17d.tar.gz
fatcat-1305a503b116e8f8081cbc48dc14c92b4b64b17d.zip
crossref import tweaks/fixes
- refs: article-title not title; save unstructured; authors not author - save 'language' field (already an ISO code)
-rw-r--r--python/fatcat_tools/importers/crossref.py16
-rw-r--r--python/tests/files/crossref-works.single.json1
-rw-r--r--python/tests/import_crossref.py3
3 files changed, 12 insertions, 8 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index c4e55962..f9a4fe46 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -239,11 +239,13 @@ class CrossrefImporter(EntityImporter):
ref_extra['journal-title'] = rm['journal-title']
if rm.get('DOI'):
ref_extra['doi'] = rm.get('DOI').lower()
- # TODO: what fields here? CSL citation stuff
- for k in ('author', 'editor', 'edition', 'authority', 'version',
- 'genre', 'url', 'event', 'issue', 'volume', 'date',
- 'accessed_date', 'issued', 'page', 'medium',
- 'collection_title', 'chapter_number'):
+ author = clean(rm.get('author'))
+ if author:
+ ref_extra['authors'] = [author]
+ for k in ('editor', 'edition', 'authority', 'version', 'genre',
+ 'url', 'event', 'issue', 'volume', 'date', 'accessed_date',
+ 'issued', 'page', 'medium', 'collection_title', 'chapter_number',
+ 'unstructured', 'series-title', 'volume-title'):
if clean(rm.get(k)):
ref_extra[k] = clean(rm[k])
if not ref_extra:
@@ -255,7 +257,7 @@ class CrossrefImporter(EntityImporter):
key=key,
year=year,
container_name=clean(container_name),
- title=clean(rm.get('title')),
+ title=clean(rm.get('article-title')),
locator=clean(rm.get('first-page')),
# TODO: just dump JSON somewhere here?
extra=ref_extra))
@@ -378,7 +380,7 @@ class CrossrefImporter(EntityImporter):
volume=clean(obj.get('volume')),
issue=clean(obj.get('issue')),
pages=clean(obj.get('page')),
- language=None, # crossref doesn't supply language info
+ language=clean(obj.get('language')),
license_slug=license_slug,
extra=extra,
abstracts=abstracts,
diff --git a/python/tests/files/crossref-works.single.json b/python/tests/files/crossref-works.single.json
index f00c2142..2b775647 100644
--- a/python/tests/files/crossref-works.single.json
+++ b/python/tests/files/crossref-works.single.json
@@ -15,6 +15,7 @@
"$numberLong": "1508768117199"
}
},
+ "language": "fr",
"reference-count": 25,
"publisher": "Wiley-Blackwell",
"issue": "4",
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index cb2143b3..d439137d 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -82,6 +82,7 @@ def test_crossref_dict_parse(crossref_importer):
assert r.license_slug == "CC-BY-NC-ND"
assert r.original_title == "Renormalized perturbation theory auf deutsch"
assert r.isbn13 == "978-3-16-148410-0"
+ assert r.language == "fr"
assert 'subtitle' not in r.extra
assert 'subtitle' not in r.extra['crossref']
assert 'funder' not in r.extra
@@ -107,7 +108,7 @@ def test_crossref_dict_parse(crossref_importer):
assert r.refs[0].year == 1972
assert r.refs[0].locator == "1734"
assert r.refs[0].container_name == "J. Chem. Phys."
- assert r.refs[0].extra == {"volume": "57", "author": "Swenson", "doi": "10.1063/1.1678462", "medium": "DVD"}
+ assert r.refs[0].extra == {"volume": "57", "authors": ["Swenson"], "doi": "10.1063/1.1678462", "medium": "DVD"}
assert r.refs[2].key == 'BIB3'
assert r.refs[2].extra.get('author') is None
assert r.refs[2].container_name == "Hypervirial Theorem's, Lecture Notes in Chemistry <3"