aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-30 13:36:01 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-30 13:36:01 +0100
commit7dec2d1560ebf5ca6d0d337eb246fe345f6ec0bb (patch)
tree7acfda698ff56ce2e9690a4026fbc212fd411895 /python/tests
parent55a4f211532c93d8164b0d4719dc0413005941ea (diff)
downloadfatcat-7dec2d1560ebf5ca6d0d337eb246fe345f6ec0bb.tar.gz
fatcat-7dec2d1560ebf5ca6d0d337eb246fe345f6ec0bb.zip
datacite: improve date handling and minor tweak
Records from https://www.micropublication.org/ did not have a date in FC, although raw data contained date strings - they were not using the finer-grained "attributes.date" but "attributes.published" and/or "attributes.publicationYear". Support for those fields has been added, including a test case. During this test (#30) a processing gap for names became clear (author may have "given_name" and "surname", but no "name"). This bug has been fixed, too.
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/files/datacite/datacite_doc_30.json72
-rw-r--r--python/tests/files/datacite/datacite_result_30.json38
-rw-r--r--python/tests/import_datacite.py3
3 files changed, 111 insertions, 2 deletions
diff --git a/python/tests/files/datacite/datacite_doc_30.json b/python/tests/files/datacite/datacite_doc_30.json
new file mode 100644
index 00000000..5f851bbb
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_30.json
@@ -0,0 +1,72 @@
+{
+ "id": "10.17912/micropub.biology.000143",
+ "type": "dois",
+ "attributes": {
+ "doi": "10.17912/micropub.biology.000143",
+ "identifiers": null,
+ "creators": [
+ {
+ "raw_name": "Celja J Uebel",
+ "givenName": "Celja J",
+ "familyName": "Uebel",
+ "affiliation": [],
+ "role": "author"
+ },
+ {
+ "raw_name": "Carolyn M Phillips",
+ "givenName": "Carolyn M",
+ "familyName": "Phillips",
+ "affiliation": [],
+ "role": "author"
+ }
+ ],
+ "titles": [
+ {
+ "title": "Phase-separated protein dynamics are affected by fluorescent tag choice"
+ }
+ ],
+ "publisher": "microPublication Biology",
+ "container": {},
+ "publicationYear": 2019,
+ "subjects": [],
+ "contributors": [],
+ "dates": null,
+ "language": null,
+ "types": {
+ "resourceTypeGeneral": "DataPaper"
+ },
+ "relatedIdentifiers": [],
+ "sizes": [],
+ "formats": [],
+ "version": null,
+ "rightsList": [],
+ "descriptions": [
+ {
+ "description": "Biological liquid-liquid phase separation",
+ "descriptionType": "Abstract"
+ }
+ ],
+ "geoLocations": [],
+ "fundingReferences": [],
+ "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143",
+ "contentUrl": null,
+ "metadataVersion": 0,
+ "schemaVersion": null,
+ "source": null,
+ "isActive": true,
+ "state": "findable",
+ "reason": null,
+ "created": "2019-08-19T14:43:08.000Z",
+ "registered": "2019-08-19T14:43:09.000Z",
+ "published": "2019",
+ "updated": "2019-11-09T12:32:02.000Z"
+ },
+ "relationships": {
+ "client": {
+ "data": {
+ "id": "caltech.micropub",
+ "type": "clients"
+ }
+ }
+ }
+}
diff --git a/python/tests/files/datacite/datacite_result_30.json b/python/tests/files/datacite/datacite_result_30.json
new file mode 100644
index 00000000..f7d1bb2c
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_30.json
@@ -0,0 +1,38 @@
+{
+ "abstracts": [
+ {
+ "content": "Biological liquid-liquid phase separation",
+ "lang": "fr",
+ "mimetype": "text/plain"
+ }
+ ],
+ "contribs": [
+ {
+ "index": 0,
+ "given_name": "Celja J",
+ "surname": "Uebel",
+ "raw_name": "Celja J Uebel",
+ "role": "author"
+ },
+ {
+ "index": 1,
+ "given_name": "Carolyn M",
+ "raw_name": "Carolyn M Phillips",
+ "surname": "Phillips",
+ "role": "author"
+ }
+ ],
+ "ext_ids": {
+ "doi": "10.17912/micropub.biology.000143"
+ },
+ "extra": {
+ "datacite": {
+ "resourceTypeGeneral": "DataPaper"
+ }
+ },
+ "refs": [],
+ "release_stage": "published",
+ "release_year": 2019,
+ "publisher": "microPublication Biology",
+ "title": "Phase-separated protein dynamics are affected by fluorescent tag choice"
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 669a6984..15650375 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -287,10 +287,9 @@ def test_datacite_conversions(datacite_importer):
for now.
"""
datacite_importer.debug = True
- for i in range(30):
+ for i in range(31):
src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
- print('testing mapping from {} => {}'.format(src, dst))
with open(src, 'r') as f:
re = datacite_importer.parse_record(json.load(f))
result = entity_to_dict(re)