diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-13 17:08:28 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-13 17:08:28 -0700 |
commit | c67fb5d850ec6bd6659ada8ce8162a8859dafe15 (patch) | |
tree | 0fad6d0d486854ecda4604127b458568cca15af5 /python | |
parent | c9e067e4b7c23b4d871ac091c7a9ec5a6650e909 (diff) | |
download | fatcat-c67fb5d850ec6bd6659ada8ce8162a8859dafe15.tar.gz fatcat-c67fb5d850ec6bd6659ada8ce8162a8859dafe15.zip |
importer code updates
Diffstat (limited to 'python')
-rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 16 | ||||
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 2 | ||||
-rwxr-xr-x | python/fatcat_tools/importers/wayback_static.py | 1 | ||||
-rw-r--r-- | python/tests/files/example_grobid_metadata_lines.tsv | 2 | ||||
-rw-r--r-- | python/tests/import_crossref.py | 21 | ||||
-rw-r--r-- | python/tests/import_grobid_metadata.py | 2 |
7 files changed, 42 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index e1252b6d..c1ea075d 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -70,7 +70,6 @@ def cdl_dash_release(meta, extra=None): if extid['value'].startswith('ark:'): ark_id = extid['value'] assert ark_id - extra['ark_id'] = ark_id license_slug = lookup_license_slug(meta['rights']['uri']) @@ -98,6 +97,7 @@ def cdl_dash_release(meta, extra=None): r = ReleaseEntity( ext_ids=ReleaseEntityExtIds( doi=doi, + ark=ark_id, ), title=clean(meta['title'], force_xml=True), publisher=clean(meta['publisher']), diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 999ce13f..c875010c 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -218,6 +218,8 @@ class CrossrefImporter(EntityImporter): creator_id=creator_id, index=index, raw_name=raw_name, + given_name=clean(am.get('given')), + surname=clean(am.get('family')), raw_affiliation=clean(raw_affiliation), role=ctype, extra=extra)) @@ -318,13 +320,15 @@ class CrossrefImporter(EntityImporter): if not container_id: if obj.get('container-title'): extra['container_name'] = clean(obj['container-title'][0]) - for key in ('group-title', 'subtitle'): + for key in ('group-title'): val = obj.get(key) if val: if type(val) == list: val = val[0] if type(val) == str: - extra[key] = clean(val) + val = clean(val) + if val: + extra[key] = clean(val) else: extra[key] = val # crossref-nested extra keys @@ -397,6 +401,13 @@ class CrossrefImporter(EntityImporter): # title can't be just a single character return None + subtitle = None + if obj.get('subtitle'): + subtitle = clean(obj.get('subtitle')[0], force_xml=True) + if not subtitle or len(subtitle) <= 1: + # subtitle can't be just a single character + return None + if extra_crossref: extra['crossref'] = extra_crossref if not extra: @@ -406,6 +417,7 @@ class CrossrefImporter(EntityImporter): work_id=None, container_id=container_id, title=title, + subtitle=subtitle, original_title=original_title, release_type=release_type, release_stage=release_stage, diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index ba91d183..9e99bc0a 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -94,6 +94,8 @@ class GrobidMetadataImporter(EntityImporter): contribs.append(fatcat_client.ReleaseContrib( index=i, raw_name=clean(a['name']), + given_name=clean(a.get('given_name')), + surname=clean(a.get('surname')), role="author", extra=None)) diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index 114920f7..c4f4f21e 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -120,6 +120,7 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None): resp.raise_for_status() assert webcapture_cdx.sha1 == hashlib.sha1(resp.content).digest().hex() webcapture_cdx.sha256 = hashlib.sha256(resp.content).digest().hex() + webcapture_cdx.size_bytes = len(resp.content) return webcapture_cdx else: return None diff --git a/python/tests/files/example_grobid_metadata_lines.tsv b/python/tests/files/example_grobid_metadata_lines.tsv index 75ec75ea..6568d999 100644 --- a/python/tests/files/example_grobid_metadata_lines.tsv +++ b/python/tests/files/example_grobid_metadata_lines.tsv @@ -1,4 +1,4 @@ -sha1:HDLSKETSI2EVG2HE3H4VBY3XWTZBW3LV {"c_size": 238713, "dt": "20180413120550", "offset": 235803370, "surt": "id,ac,hamzanwadi,e-journal)/index.php/edc/article/viewfile/24/21", "url": "http://e-journal.hamzanwadi.ac.id/index.php/edc/article/viewFile/24/21", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180413114556059-01022-01031-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180413120011074-01024-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 260608 {"title": "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA", "date": "2011-06", "doi": null, "abstract": "The purposes of this study are to know: (1) the effect of learning media toward students achievement; (2) the effect high and low motivation toward students achievement; (3) the effect of concrete and abstract thinking styles toward students achievement; (4) the interaction between the learning media and motivation toward students achievement; (5) the interaction between the learning media and thinking styles toward students achievement; (6) the interaction between motivation and thinking styles towards students achievement; (7) the interaction among learning media, motivation and thinking styles toward students achievement. The research used experimental method, was conducted from January to May 2010. Population was all students at grade VII SMP 2 Girisubo academic year 2009/2010 that consisted of four classes. Sample was taken using cluster random sampling technique and consisted of two classes. The fist class was treated using virtual demonstration and second class was treated real demonstration. The data was collected using test for students achievement, and questioner for students motivation and thinking style, and observation sheet for students affective achievements. The research hypotheses was tested using ANOVA with 2x2x2 factorial design and calculated with software minitab 15. From data analysis can be concluded that: (1) there is no effects of learning media toward students cognitive (p value = 0.618) and affective (p value = 0.822) achievement, (2) there is an effect students high motivation towards cognitive (p value = 0.000) and affective (p value = 0.008) achievement, (3) there is no effect of concrete and abstract thinking styles toward students cognitive (p value = 0.233) and affective (p value = 0.233) achievement, (4) There is no interactions between the learning media and students motivation toward (p value = 0.365) but there is interaction between the learning media and the motivation toward affective (p value = 0.037) achievement, (5) there is no interaction between the learning media and students thinking styles toward cognitive (p value = 0.112) and affective (p value = 0.256) achievement, (6) there is interaction between motivation and students thinking style toward cognitive (p value = 0.042) but there is no interaction between motivation and students thinking styles on affective (p value = 0.780) achievement, (7) There are no interactions among", "authors": [{"name": "Wahyu Ary"}, {"name": "Guru Kurnianto"}, {"name": "Smpn"}, {"name": "Gunung Girisubo"}, {"name": "Jogjakarta Kidul"}], "citations": [{"index": 0, "publisher": null, "volume": null, "title": "IPA Terpadu untuk Kelas VII SMP/MTs. Jakarta: Pusat Perbukuan Nasional Departemen Pendidikan Nasional", "id": "b0", "date": "2008", "issue": null, "authors": [{"name": "Ani Winarsih"}], "url": null, "journal": null}, {"index": 1, "publisher": null, "volume": null, "title": null, "id": "b1", "date": "2008", "issue": null, "authors": [{"name": "Akhmad Sudrajat"}], "url": null}, {"index": 2, "publisher": null, "volume": null, "title": null, "id": "b2", "date": false, "issue": null, "authors": [{"name": "Teori-Teori Belajar"}], "url": null}, {"index": 3, "publisher": null, "volume": null, "title": "Macam-Macam Pendekatan dan Metode Pembelajaran. Bandung Dikdasmen P3G IPA", "id": "b3", "date": "2004", "issue": null, "authors": [{"name": "Arief Sidharta"}], "url": null, "journal": null}, {"index": 4, "publisher": null, "volume": null, "title": "Balajar dan Pembelajaran", "id": "b4", "date": "2005", "issue": null, "authors": [{"name": "Asri Budiningsih"}, {"name": "C"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": null, "title": "Statistik untuk Penelitian", "id": "b5", "date": "2004", "issue": null, "authors": [{"name": "Budiyono"}], "url": null, "journal": null}, {"index": 6, "publisher": null, "volume": null, "title": "Pendekatan Keterampilan Proses. Jakrta: PT Gramedia", "id": "b6", "date": "1998", "issue": null, "authors": [{"name": "Conny Semiawan"}, {"name": "A Tangyong"}, {"name": "S Belen"}, {"name": "Yulaelawati Matahelemual"}, {"name": "Wahjudi Suselordjo"}], "url": null, "journal": null}, {"index": 7, "publisher": null, "volume": null, "title": "Quantum Learning. Bandung: Mizan Media Utama", "id": "b7", "date": "2008", "issue": null, "authors": [{"name": "Bobbi Deporter"}, {"name": "Mike Dan Hernacki"}], "url": null, "journal": null}, {"index": 8, "publisher": null, "volume": null, "title": "Belajar dan Pembelajaran. Jakarta: Rineka Cipta", "id": "b8", "date": "2006", "issue": null, "authors": [{"name": "Mudjiono Dimyati Dan"}], "url": null, "journal": null}, {"index": 9, "publisher": null, "volume": null, "title": "Psikologi Pengajaran. Jakarta: Gramedia", "id": "b9", "date": "2002", "issue": null, "authors": [{"name": "Ary Wahyu Kurnianto Djamarah"}], "url": null, "journal": null}, {"index": 10, "publisher": null, "volume": null, "title": "Ketrampilas Preoses Sains. Jakarta: Depertemen Pendidikan Nasional", "id": "b10", "date": "2003", "issue": null, "authors": [{"name": "Elok Sudibyo"}], "url": null, "journal": null}, {"index": 11, "publisher": null, "volume": null, "title": "Principles of Instructions Design", "id": "b11", "date": "1992", "issue": null, "authors": [{"name": "Gagne"}, {"name": "M Robert"}, {"name": "Brigss"}, {"name": "J Leslie"}, {"name": "Walter Wagner"}], "url": null, "journal": null}, {"index": 12, "publisher": null, "volume": "1", "title": "Seeing is Believing: Clasroom Demonstration As Scintific Inkquiry", "id": "b12", "date": "2002-12", "issue": "3", "authors": [{"name": "Jerod Groos"}], "url": null, "journal": "Illinois: Jounal Physic Teacher Online"}, {"index": 13, "publisher": null, "volume": null, "title": "Pembelajaran Fisika Dengan Metode Demonstrasi Menggunakan Media Audio Visual dan Labortorium Ditinjau Dari Kemampuan Visiospasial", "id": "b13", "date": "2009", "issue": null, "authors": [{"name": "Hartanto"}], "url": null, "journal": null}, {"index": 14, "publisher": null, "volume": null, "title": "Startegi Belajar Mengajar. Bandung: CV Maulana", "id": "b14", "date": "2001", "issue": null, "authors": [{"name": "Mulyani Sumantri"}, {"name": "& Johan Permana"}], "url": null, "journal": null}, {"index": 15, "publisher": null, "volume": null, "title": "Pembelajaran Kontekstual dan Penerapannya dalam KBK", "id": "b15", "date": "2004", "issue": null, "authors": [{"name": "Dkk Nurhadi"}], "url": null, "journal": null}, {"index": 16, "publisher": null, "volume": null, "title": "Filsafat Konstruktivisme Dalam Pendidikan", "id": "b16", "date": "1997", "issue": null, "authors": [{"name": "Paul Suparno"}], "url": null, "journal": null}, {"index": 17, "publisher": null, "volume": null, "title": "Pembelajaran Fisika Menggunakan Metode Demonstrasi Dengan Observasi Langsung dan Visualisasi Ditinjau Dari Kemampuan Spasial Siswa", "id": "b17", "date": "2009", "issue": null, "authors": [{"name": "Ponimin"}], "url": null, "journal": null}, {"index": 18, "publisher": null, "volume": null, "title": "Interaksi Belajar Mengajar IPA", "id": "b18", "date": "1988", "issue": null, "authors": [{"name": "Ratna Wilis Dahar"}], "url": null, "journal": null}, {"index": 19, "publisher": null, "volume": null, "title": "Strategi Belajar Mengajar. Jakarta: PT. Rineka Cipta", "id": "b19", "date": "2008", "issue": null, "authors": [{"name": "N Roestiyah"}], "url": null, "journal": null}, {"index": 20, "publisher": null, "volume": null, "title": "Interaksi dan Motivasi Belajar Mengajar. Jakarta: PT Raja Grafindo Persada", "id": "b20", "date": "2005", "issue": null, "authors": [{"name": "A Sardiman"}], "url": null, "journal": null}, {"index": 21, "publisher": null, "volume": null, "title": "Evaluasi pendidikan ilmu Pengetahuan Alam. Jakarta: Departemen Pendidikan dan Kebudayaan", "id": "b21", "date": "1988", "issue": null, "authors": [{"name": "Subiyanto"}], "url": null, "journal": null}, {"index": 22, "publisher": null, "volume": null, "title": "Prosedur Penelitian Suatu Pendekatan Praktik", "id": "b22", "date": "2006", "issue": null, "authors": [{"name": "Suharsimi Arikunto"}], "url": null, "journal": null}, {"index": 23, "publisher": null, "volume": null, "title": "Metoda Statistika", "id": "b23", "date": "2006", "issue": null, "authors": [{"name": "Sudjana"}], "url": null, "journal": null}, {"index": 24, "publisher": null, "volume": null, "title": "Belajar dan Faktor-Faktor Yang Mempengaruhinya. Jakarta: Rineka Cipta", "id": "b24", "date": "1995", "issue": null, "authors": [{"name": "Slameto"}], "url": null, "journal": null}, {"index": 25, "publisher": null, "volume": null, "title": "IPA Terpadu untuk SMP Kelas VII", "id": "b25", "date": "2007", "issue": null, "authors": [{"name": "Tim Guru"}], "url": null, "journal": null}, {"index": 26, "publisher": null, "volume": null, "title": "Psikologi Pengajaran. Jakarta: Gramedia", "id": "b26", "date": "1987", "issue": null, "authors": [{"name": "W Winkel"}], "url": null, "journal": null}], "journal": {"issn": null, "publisher": null, "volume": "6", "eissn": null, "issue": "1", "name": null}, "acknowledgement": null} +sha1:HDLSKETSI2EVG2HE3H4VBY3XWTZBW3LV {"c_size": 238713, "dt": "20180413120550", "offset": 235803370, "surt": "id,ac,hamzanwadi,e-journal)/index.php/edc/article/viewfile/24/21", "url": "http://e-journal.hamzanwadi.ac.id/index.php/edc/article/viewFile/24/21", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180413114556059-01022-01031-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180413120011074-01024-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 260608 {"title": "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA", "date": "2011-06", "doi": null, "abstract": "The purposes of this study are to know: (1) the effect of learning media toward students achievement; (2) the effect high and low motivation toward students achievement; (3) the effect of concrete and abstract thinking styles toward students achievement; (4) the interaction between the learning media and motivation toward students achievement; (5) the interaction between the learning media and thinking styles toward students achievement; (6) the interaction between motivation and thinking styles towards students achievement; (7) the interaction among learning media, motivation and thinking styles toward students achievement. The research used experimental method, was conducted from January to May 2010. Population was all students at grade VII SMP 2 Girisubo academic year 2009/2010 that consisted of four classes. Sample was taken using cluster random sampling technique and consisted of two classes. The fist class was treated using virtual demonstration and second class was treated real demonstration. The data was collected using test for students achievement, and questioner for students motivation and thinking style, and observation sheet for students affective achievements. The research hypotheses was tested using ANOVA with 2x2x2 factorial design and calculated with software minitab 15. From data analysis can be concluded that: (1) there is no effects of learning media toward students cognitive (p value = 0.618) and affective (p value = 0.822) achievement, (2) there is an effect students high motivation towards cognitive (p value = 0.000) and affective (p value = 0.008) achievement, (3) there is no effect of concrete and abstract thinking styles toward students cognitive (p value = 0.233) and affective (p value = 0.233) achievement, (4) There is no interactions between the learning media and students motivation toward (p value = 0.365) but there is interaction between the learning media and the motivation toward affective (p value = 0.037) achievement, (5) there is no interaction between the learning media and students thinking styles toward cognitive (p value = 0.112) and affective (p value = 0.256) achievement, (6) there is interaction between motivation and students thinking style toward cognitive (p value = 0.042) but there is no interaction between motivation and students thinking styles on affective (p value = 0.780) achievement, (7) There are no interactions among", "authors": [{"name": "Wahyu Ary", "given_name": "Wahyu", "surname": "Ary"}, {"name": "Guru Kurnianto"}, {"name": "Smpn"}, {"name": "Gunung Girisubo"}, {"name": "Jogjakarta Kidul"}], "citations": [{"index": 0, "publisher": null, "volume": null, "title": "IPA Terpadu untuk Kelas VII SMP/MTs. Jakarta: Pusat Perbukuan Nasional Departemen Pendidikan Nasional", "id": "b0", "date": "2008", "issue": null, "authors": [{"name": "Ani Winarsih"}], "url": null, "journal": null}, {"index": 1, "publisher": null, "volume": null, "title": null, "id": "b1", "date": "2008", "issue": null, "authors": [{"name": "Akhmad Sudrajat"}], "url": null}, {"index": 2, "publisher": null, "volume": null, "title": null, "id": "b2", "date": false, "issue": null, "authors": [{"name": "Teori-Teori Belajar"}], "url": null}, {"index": 3, "publisher": null, "volume": null, "title": "Macam-Macam Pendekatan dan Metode Pembelajaran. Bandung Dikdasmen P3G IPA", "id": "b3", "date": "2004", "issue": null, "authors": [{"name": "Arief Sidharta"}], "url": null, "journal": null}, {"index": 4, "publisher": null, "volume": null, "title": "Balajar dan Pembelajaran", "id": "b4", "date": "2005", "issue": null, "authors": [{"name": "Asri Budiningsih"}, {"name": "C"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": null, "title": "Statistik untuk Penelitian", "id": "b5", "date": "2004", "issue": null, "authors": [{"name": "Budiyono"}], "url": null, "journal": null}, {"index": 6, "publisher": null, "volume": null, "title": "Pendekatan Keterampilan Proses. Jakrta: PT Gramedia", "id": "b6", "date": "1998", "issue": null, "authors": [{"name": "Conny Semiawan"}, {"name": "A Tangyong"}, {"name": "S Belen"}, {"name": "Yulaelawati Matahelemual"}, {"name": "Wahjudi Suselordjo"}], "url": null, "journal": null}, {"index": 7, "publisher": null, "volume": null, "title": "Quantum Learning. Bandung: Mizan Media Utama", "id": "b7", "date": "2008", "issue": null, "authors": [{"name": "Bobbi Deporter"}, {"name": "Mike Dan Hernacki"}], "url": null, "journal": null}, {"index": 8, "publisher": null, "volume": null, "title": "Belajar dan Pembelajaran. Jakarta: Rineka Cipta", "id": "b8", "date": "2006", "issue": null, "authors": [{"name": "Mudjiono Dimyati Dan"}], "url": null, "journal": null}, {"index": 9, "publisher": null, "volume": null, "title": "Psikologi Pengajaran. Jakarta: Gramedia", "id": "b9", "date": "2002", "issue": null, "authors": [{"name": "Ary Wahyu Kurnianto Djamarah"}], "url": null, "journal": null}, {"index": 10, "publisher": null, "volume": null, "title": "Ketrampilas Preoses Sains. Jakarta: Depertemen Pendidikan Nasional", "id": "b10", "date": "2003", "issue": null, "authors": [{"name": "Elok Sudibyo"}], "url": null, "journal": null}, {"index": 11, "publisher": null, "volume": null, "title": "Principles of Instructions Design", "id": "b11", "date": "1992", "issue": null, "authors": [{"name": "Gagne"}, {"name": "M Robert"}, {"name": "Brigss"}, {"name": "J Leslie"}, {"name": "Walter Wagner"}], "url": null, "journal": null}, {"index": 12, "publisher": null, "volume": "1", "title": "Seeing is Believing: Clasroom Demonstration As Scintific Inkquiry", "id": "b12", "date": "2002-12", "issue": "3", "authors": [{"name": "Jerod Groos"}], "url": null, "journal": "Illinois: Jounal Physic Teacher Online"}, {"index": 13, "publisher": null, "volume": null, "title": "Pembelajaran Fisika Dengan Metode Demonstrasi Menggunakan Media Audio Visual dan Labortorium Ditinjau Dari Kemampuan Visiospasial", "id": "b13", "date": "2009", "issue": null, "authors": [{"name": "Hartanto"}], "url": null, "journal": null}, {"index": 14, "publisher": null, "volume": null, "title": "Startegi Belajar Mengajar. Bandung: CV Maulana", "id": "b14", "date": "2001", "issue": null, "authors": [{"name": "Mulyani Sumantri"}, {"name": "& Johan Permana"}], "url": null, "journal": null}, {"index": 15, "publisher": null, "volume": null, "title": "Pembelajaran Kontekstual dan Penerapannya dalam KBK", "id": "b15", "date": "2004", "issue": null, "authors": [{"name": "Dkk Nurhadi"}], "url": null, "journal": null}, {"index": 16, "publisher": null, "volume": null, "title": "Filsafat Konstruktivisme Dalam Pendidikan", "id": "b16", "date": "1997", "issue": null, "authors": [{"name": "Paul Suparno"}], "url": null, "journal": null}, {"index": 17, "publisher": null, "volume": null, "title": "Pembelajaran Fisika Menggunakan Metode Demonstrasi Dengan Observasi Langsung dan Visualisasi Ditinjau Dari Kemampuan Spasial Siswa", "id": "b17", "date": "2009", "issue": null, "authors": [{"name": "Ponimin"}], "url": null, "journal": null}, {"index": 18, "publisher": null, "volume": null, "title": "Interaksi Belajar Mengajar IPA", "id": "b18", "date": "1988", "issue": null, "authors": [{"name": "Ratna Wilis Dahar"}], "url": null, "journal": null}, {"index": 19, "publisher": null, "volume": null, "title": "Strategi Belajar Mengajar. Jakarta: PT. Rineka Cipta", "id": "b19", "date": "2008", "issue": null, "authors": [{"name": "N Roestiyah"}], "url": null, "journal": null}, {"index": 20, "publisher": null, "volume": null, "title": "Interaksi dan Motivasi Belajar Mengajar. Jakarta: PT Raja Grafindo Persada", "id": "b20", "date": "2005", "issue": null, "authors": [{"name": "A Sardiman"}], "url": null, "journal": null}, {"index": 21, "publisher": null, "volume": null, "title": "Evaluasi pendidikan ilmu Pengetahuan Alam. Jakarta: Departemen Pendidikan dan Kebudayaan", "id": "b21", "date": "1988", "issue": null, "authors": [{"name": "Subiyanto"}], "url": null, "journal": null}, {"index": 22, "publisher": null, "volume": null, "title": "Prosedur Penelitian Suatu Pendekatan Praktik", "id": "b22", "date": "2006", "issue": null, "authors": [{"name": "Suharsimi Arikunto"}], "url": null, "journal": null}, {"index": 23, "publisher": null, "volume": null, "title": "Metoda Statistika", "id": "b23", "date": "2006", "issue": null, "authors": [{"name": "Sudjana"}], "url": null, "journal": null}, {"index": 24, "publisher": null, "volume": null, "title": "Belajar dan Faktor-Faktor Yang Mempengaruhinya. Jakarta: Rineka Cipta", "id": "b24", "date": "1995", "issue": null, "authors": [{"name": "Slameto"}], "url": null, "journal": null}, {"index": 25, "publisher": null, "volume": null, "title": "IPA Terpadu untuk SMP Kelas VII", "id": "b25", "date": "2007", "issue": null, "authors": [{"name": "Tim Guru"}], "url": null, "journal": null}, {"index": 26, "publisher": null, "volume": null, "title": "Psikologi Pengajaran. Jakarta: Gramedia", "id": "b26", "date": "1987", "issue": null, "authors": [{"name": "W Winkel"}], "url": null, "journal": null}], "journal": {"issn": null, "publisher": null, "volume": "6", "eissn": null, "issue": "1", "name": null}, "acknowledgement": null} sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V {"c_size": 101956, "dt": "20180518000206", "offset": 403989190, "surt": "edu,depaul,library,via)/cgi/viewcontent.cgi?article=2947&context=vincentiana", "url": "http://via.library.depaul.edu/cgi/viewcontent.cgi?article=2947&context=vincentiana", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180517225512190-07393-07402-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180517234518099-07397-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 142710 {"title": "Reflexiones Sobre el Discernimiento y el Acompa\u00f1amiento", "date": "2001", "doi": null, "abstract": null, "authors": [{"name": "Charles Bonnet"}], "citations": [], "journal": {"issn": null, "publisher": null, "volume": "45", "eissn": null, "issue": "4", "name": null}, "acknowledgement": null} sha1:L6F7D5B4RQWO4P2DPTTSC46IL4JRLKHP {"c_size": 422526, "dt": "20180423223723", "offset": 643898352, "surt": "com,ijiras)/2016/vol_3-issue_9/paper_32.pdf", "url": "http://www.ijiras.com/2016/Vol_3-Issue_9/paper_32.pdf", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180423214236018-03264-03273-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180423222446077-03270-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 438820 {"title": "The Role Of Agricultural Production On Nigeria's Economy", "date": "2016-08", "doi": null, "abstract": null, "authors": [{"name": "Adams Kemi"}], "citations": [{"index": 0, "publisher": null, "volume": null, "title": "The importance of Agriculture towards the Development of Nigeria Economy", "id": "b0", "date": "2010", "issue": null, "authors": [{"name": "Agric Slider"}], "url": null, "journal": null}, {"index": 1, "publisher": null, "volume": null, "title": "The Changing Structure of the Nigerian Economy and Implications for Development", "id": "b1", "date": "2000-08", "issue": null, "authors": [], "url": null, "journal": null}, {"index": 2, "publisher": null, "volume": null, "title": "Growth and poverty reduction: the role of agriculture. A DFID policy paper. Department for International Development", "id": "b2", "date": "2005", "issue": null, "authors": [], "url": null, "journal": null}, {"index": 3, "publisher": null, "volume": "7", "title": "The Impact of Agriculture and Agro-Based Industries on Economic Development in Nigeria: An Econometric Assessment Retrieved from", "id": "b3", "date": "2009-06", "issue": "1", "authors": [{"name": "Edoumiekumo"}], "url": null, "journal": "Journal of Research in National Development"}, {"index": 4, "publisher": null, "volume": null, "title": "The Role of Agriculture in the Economic Development of Nigeria", "id": "b4", "date": "2015", "issue": null, "authors": [{"name": "I-Farm"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": null, "title": "Leaders In African Work Force Development", "id": "b5", "date": "2010", "issue": null, "authors": [{"name": "Oicd"}], "url": null, "journal": null}, {"index": 6, "publisher": null, "volume": null, "title": "Olumola 2007 strategies for managing the challenges of agriculture in Nigeria", "id": "b6", "date": false, "issue": null, "authors": [], "url": null, "journal": null}, {"index": 7, "publisher": null, "volume": null, "title": "Opportunity International (2012) Invest in Nigeria farmers", "id": "b7", "date": false, "issue": null, "authors": [], "url": null, "journal": null}, {"index": 8, "publisher": null, "volume": null, "title": "World Population Prospects: The 2010 Revision. United Nations Department of Economic and Social Affairs, Population Division", "id": "b8", "date": "2011", "issue": null, "authors": [], "url": null, "journal": null}], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null} sha1:LNDE2NJE5ZCKSPGT72JV5J4YEIPWFVJF {"c_size": 7308859, "dt": "20180422054242", "offset": 443789750, "surt": "fr,chu-limoges,hemato)/hematolim/portals/0/enseignement/items_ecn/6-lymphome%20%20-%20item%20316a%20ue9%20-%20pr.%20bordessoule%20-%202017.pdf?ver=2017-03-29-111031-597", "url": "https://hemato.chu-limoges.fr/hematolim/Portals/0/Enseignement/Items_ECN/6-Lymphome%20%20-%20Item%20316a%20UE9%20-%20Pr.%20Bordessoule%20-%202017.pdf?ver=2017-03-29-111031-597", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180422050335049-02780-02789-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180422053334855-02787-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 7629821 {"title": "UE N\u00b0 9 Canc\u00e9ro-onco h\u00e9matologie Objectif ECN: N\u00b0 316a LYMPHOME MALIN NON HODGKINIEN", "date": false, "doi": null, "abstract": null, "authors": [{"name": "D Bordessoule"}], "citations": [], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null} diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index e090da22..866b4a83 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -83,6 +83,7 @@ def test_crossref_dict_parse(crossref_importer): assert r.ext_ids.doi == "10.1002/(sici)1097-461x(1998)66:4<261::aid-qua1>3.0.co;2-t" assert r.ext_ids.isbn13 == "978-3-16-148410-0" assert r.language == "fr" + assert r.subtitle == None assert 'subtitle' not in r.extra assert 'subtitle' not in r.extra['crossref'] assert 'funder' not in r.extra @@ -93,6 +94,8 @@ def test_crossref_dict_parse(crossref_importer): assert r.extra['crossref']['archive'] == ['Portico', 'LOCKSS'] assert len(r.contribs) == 6 assert r.contribs[0].raw_name == "Marcelo D. Radicioni" + assert r.contribs[0].given_name == "Marcelo D." + assert r.contribs[0].surname == "Radicioni" assert r.contribs[0].index == 0 assert r.contribs[0].extra['seq'] == "first" assert r.contribs[1].raw_affiliation == "Some University" @@ -114,6 +117,24 @@ def test_crossref_dict_parse(crossref_importer): assert r.refs[2].container_name == "Hypervirial Theorem's, Lecture Notes in Chemistry <3" assert r.refs[3].container_name == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry" +def test_crossref_subtitle(crossref_importer): + """ + Tests new subtitle field, explicitly + """ + with open('tests/files/crossref-works.single.json', 'r') as f: + # not a single line + raw = json.loads(f.read()) + raw['subtitle'] = ["some bogus subtitle", "blah"] + r = crossref_importer.parse_record(raw) + # ensure the API server is ok with format + JsonLinePusher(crossref_importer, [json.dumps(raw)]).run() + + print(r.extra) + assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" + assert r.subtitle == "some bogus subtitle" + assert 'subtitle' not in r.extra + assert 'subtitle' not in r.extra['crossref'] + def test_stateful_checking(crossref_importer_existing): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line, a whole document diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 4fed4aaa..51ab3faa 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -25,6 +25,8 @@ def test_grobid_metadata_parse(grobid_metadata_importer): assert len(re.contribs) == 5 print(re.contribs) assert re.contribs[0].raw_name == "Wahyu Ary" + assert re.contribs[0].given_name == "Wahyu" + assert re.contribs[0].surname == "Ary" assert re.publisher == None if re.extra: assert re.extra.get('container_name') == None |