From 4a82a0763bf927248f22e47ab5187af4beff83ee Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Mon, 9 Dec 2019 01:03:43 +0100 Subject: datacite: importer skeleton * contributors, title, date, publisher, container, license Field and value analysis via https://github.com/miku/indigo. --- python/tests/import_datacite.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 python/tests/import_datacite.py (limited to 'python/tests') diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py new file mode 100644 index 00000000..0bbaba2e --- /dev/null +++ b/python/tests/import_datacite.py @@ -0,0 +1,25 @@ +""" +Test datacite importer. + +Datacite is a aggregator, hence inputs are quite varied. + +Here is small sample of ID types taken from a sample: + + 497344 "DOI" + 65013 "URL" + 22210 "CCDC" + 17853 "GBIF" + 17635 "Other" + 11474 "uri" + 9170 "Publisher ID" + 7775 "URN" + 6196 "DUCHAS" + 5624 "Handle" + 5056 "publisherId" + +A nice tool, not yet existing tool (maybe named indigo) would do the following: + + $ shuf -n 100000 datacite.ndjson | indigo -t md > data.md + +TODO(martin): Write tests. +""" -- cgit v1.2.3