aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-09-27 17:06:15 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-09-27 17:06:17 -0700
commitc7573bf142be405f8cb9003400c6860aeb700457 (patch)
tree552ba89d2dc80e6557519e4bf4ddbcceef058c3b
parent72d14a1ea8113d715e3f7933332829876a438618 (diff)
downloadfatcat-c7573bf142be405f8cb9003400c6860aeb700457.tar.gz
fatcat-c7573bf142be405f8cb9003400c6860aeb700457.zip
improvements to grobid_metadata importer
But still fails tests due to database collision/side-effect on sha1 lookup.
-rwxr-xr-xpython/fatcat/grobid_metadata_importer.py233
-rw-r--r--python/tests/files/example_grobid_metadata_lines.tsv10
-rw-r--r--python/tests/grobid_metadata_importer.py47
3 files changed, 211 insertions, 79 deletions
diff --git a/python/fatcat/grobid_metadata_importer.py b/python/fatcat/grobid_metadata_importer.py
index 4d8d6fa3..95cc285e 100755
--- a/python/fatcat/grobid_metadata_importer.py
+++ b/python/fatcat/grobid_metadata_importer.py
@@ -2,92 +2,167 @@
import sys
import json
+import base64
import datetime
+import fatcat_client
+from fatcat.importer_common import FatcatImporter
MAX_ABSTRACT_BYTES=4096
-def parse_grobid_json(obj):
-
- if not obj.get('title'):
- return None
-
- release = dict()
- extra = dict()
-
- if obj.get('abstract') and len(obj.get('abstract')) < MAX_ABSTRACT_BYTES:
- abobj = dict(
- mimetype="text/plain",
- language=None,
- content=obj.get('abstract').strip())
- abstracts = [abobj]
- else:
- abstracts = None
-
- contribs = []
- for a in obj.get('authors', []):
- c = dict(raw_name=a, role="author")
- contribs.append(c)
-
- refs = []
- for raw in obj.get('citations', []):
+
+class FatcatGrobidMetadataImporter(FatcatImporter):
+
+ def __init__(self, host_url, default_link_rel="web"):
+ super().__init__(host_url)
+ self.default_link_rel = default_link_rel
+
+ def parse_grobid_json(self, obj):
+
+ if not obj.get('title'):
+ return None
+
+ release = dict()
extra = dict()
- ref = dict()
- ref['key'] = raw.get('id')
- if raw.get('title'):
- ref['title'] = raw['title'].strip()
- if raw.get('date'):
- try:
- year = int(raw['date'].strip()[:4])
- ref['year'] = year
- except:
- pass
- for key in ('volume', 'url', 'issue', 'publisher'):
- if raw.get(key):
- extra[key] = raw[key].strip()
- if raw.get('authors'):
- extra['authors'] = [a['name'] for a in raw['authors']]
+
+ if obj.get('abstract') and len(obj.get('abstract')) < MAX_ABSTRACT_BYTES:
+ abobj = dict(
+ mimetype="text/plain",
+ language=None,
+ content=obj.get('abstract').strip())
+ abstracts = [abobj]
+ else:
+ abstracts = None
+
+ contribs = []
+ for i, a in enumerate(obj.get('authors', [])):
+ c = dict(raw_name=a['name'], role="author")
+ contribs.append(fatcat_client.ReleaseContrib(
+ index=i,
+ raw_name=a['name'],
+ role="author",
+ extra=None))
+
+ refs = []
+ for raw in obj.get('citations', []):
+ cite_extra = dict()
+ ref = dict()
+ ref['key'] = raw.get('id')
+ if raw.get('title'):
+ ref['title'] = raw['title'].strip()
+ if raw.get('date'):
+ try:
+ year = int(raw['date'].strip()[:4])
+ ref['year'] = year
+ except:
+ pass
+ for key in ('volume', 'url', 'issue', 'publisher'):
+ if raw.get(key):
+ cite_extra[key] = raw[key].strip()
+ if raw.get('authors'):
+ cite_extra['authors'] = [a['name'] for a in raw['authors']]
+ if cite_extra:
+ cite_extra = dict(grobid=cite_extra)
+ else:
+ cite_extra = None
+ ref['extra'] = cite_extra
+ refs.append(ref)
+
+ release_type = "journal-article"
+ release_date = None
+ if obj.get('date'):
+ # TODO: only returns year, ever? how to handle?
+ release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1)
+
+ if obj.get('doi'):
+ extra['doi'] = obj['doi']
+ if obj['journal'] and obj['journal'].get('name'):
+ extra['container_name'] = obj['journal']['name']
+
+ extra['is_longtail_oa'] = True
+
+ # TODO: ISSN/eISSN handling? or just journal name lookup?
+
if extra:
extra = dict(grobid=extra)
else:
extra = None
- ref['extra'] = extra
- refs.append(ref)
-
- release_type = "journal-article"
- release_date = None
- if raw.get('date'):
- # TODO: only returns year, ever? how to handle?
- release_date = datetime.datetime(year=raw['date'], month=1, day=1)
-
- if raw.get('doi'):
- extra['doi'] = raw['doi']
- if raw['journal'].get('name'):
- extra['container_name'] = raw['journal']['name']
+
+ re = fatcat_client.ReleaseEntity(
+ title=obj['title'].strip(),
+ contribs=contribs,
+ refs=refs,
+ publisher=obj['journal'].get('publisher'),
+ volume=obj['journal'].get('volume'),
+ issue=obj['journal'].get('issue'),
+ abstracts=abstracts,
+ extra=extra)
+ return re
- extra['is_longtail_oa'] = True
-
- # TODO: ISSN/eISSN handling? or just journal name lookup?
-
- if extra:
- extra = dict(grobid=extra)
- else:
- extra = None
-
- return dict(
- title=obj['title'].strip(),
- contribs=contribs,
- publisher=obj['journal'].get('publisher'),
- volume=obj['journal'].get('volume'),
- issue=obj['journal'].get('issue'),
- abstracts=abstracts,
- extra=extra)
-
-def run():
- for line in sys.stdin:
- obj = json.loads(line)
- out = parse_grobid_json(obj)
- if out:
- print(out)
-
-if __name__=="__main__":
- run()
+ # TODO: make this a common function somewhere
+ def make_url(self, raw):
+ rel = self.default_link_rel
+ # TODO: this is where we could map specific domains to rel types,
+ # and also filter out bad domains, invalid URLs, etc
+ if "//archive.org/" in raw or "//arxiv.org/" in raw:
+ # TODO: special-case the arxiv.org bulk mirror?
+ rel = "repository"
+ elif "//web.archive.org/" in raw or "//archive.is/" in raw:
+ rel = "webarchive"
+ return fatcat_client.FileEntityUrls(url=raw, rel=rel)
+
+ def parse_file_metadata(self, sha1_key, cdx, mimetype, file_size):
+
+ sha1 = base64.b16encode(base64.b32decode(sha1_key.replace('sha1:', ''))).decode('ascii').lower()
+
+ # lookup existing SHA1, or create new entity
+ try:
+ existing_file = self.api.lookup_file(sha1=sha1)
+ except fatcat_client.rest.ApiException as err:
+ if err.status != 404:
+ raise err
+ existing_file = None
+
+ if existing_file:
+ # if file is already in here, presumably not actually long-tail
+ return None
+ fe = fatcat_client.FileEntity(
+ sha1=sha1,
+ size=int(file_size),
+ mimetype=mimetype,
+ releases=[],
+ urls=[],
+ )
+
+ # parse URLs and CDX
+ original = cdx['url']
+ wayback = "https://web.archive.org/web/{}/{}".format(
+ cdx['dt'],
+ original)
+ fe.urls.append(
+ fatcat_client.FileEntityUrls(url=wayback, rel="webarchive"))
+ original_url = self.make_url(original)
+ if original_url != None:
+ fe.urls.append(original_url)
+
+ return fe
+
+ def create_row(self, row, editgroup=None):
+ if not row:
+ return
+ fields = row.split('\t')
+ sha1_key = fields[0]
+ cdx = json.loads(fields[1])
+ mimetype = fields[2]
+ file_size = int(fields[3])
+ grobid_meta = json.loads(fields[4])
+ fe = self.parse_file_metadata(sha1_key, cdx, mimetype, file_size)
+ re = self.parse_grobid_json(grobid_meta)
+ if fe and re:
+ release_entity = self.api.create_release(re, editgroup=editgroup)
+ # release ident can't already be in release list because we just
+ # created it
+ fe.releases.append(release_entity.ident)
+ file_entity = self.api.create_file(fe, editgroup=editgroup)
+ self.insert_count = self.insert_count + 1
+
+ # NB: batch mode not implemented
diff --git a/python/tests/files/example_grobid_metadata_lines.tsv b/python/tests/files/example_grobid_metadata_lines.tsv
new file mode 100644
index 00000000..75ec75ea
--- /dev/null
+++ b/python/tests/files/example_grobid_metadata_lines.tsv
@@ -0,0 +1,10 @@
+sha1:HDLSKETSI2EVG2HE3H4VBY3XWTZBW3LV {"c_size": 238713, "dt": "20180413120550", "offset": 235803370, "surt": "id,ac,hamzanwadi,e-journal)/index.php/edc/article/viewfile/24/21", "url": "http://e-journal.hamzanwadi.ac.id/index.php/edc/article/viewFile/24/21", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180413114556059-01022-01031-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180413120011074-01024-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 260608 {"title": "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA", "date": "2011-06", "doi": null, "abstract": "The purposes of this study are to know: (1) the effect of learning media toward students achievement; (2) the effect high and low motivation toward students achievement; (3) the effect of concrete and abstract thinking styles toward students achievement; (4) the interaction between the learning media and motivation toward students achievement; (5) the interaction between the learning media and thinking styles toward students achievement; (6) the interaction between motivation and thinking styles towards students achievement; (7) the interaction among learning media, motivation and thinking styles toward students achievement. The research used experimental method, was conducted from January to May 2010. Population was all students at grade VII SMP 2 Girisubo academic year 2009/2010 that consisted of four classes. Sample was taken using cluster random sampling technique and consisted of two classes. The fist class was treated using virtual demonstration and second class was treated real demonstration. The data was collected using test for students achievement, and questioner for students motivation and thinking style, and observation sheet for students affective achievements. The research hypotheses was tested using ANOVA with 2x2x2 factorial design and calculated with software minitab 15. From data analysis can be concluded that: (1) there is no effects of learning media toward students cognitive (p value = 0.618) and affective (p value = 0.822) achievement, (2) there is an effect students high motivation towards cognitive (p value = 0.000) and affective (p value = 0.008) achievement, (3) there is no effect of concrete and abstract thinking styles toward students cognitive (p value = 0.233) and affective (p value = 0.233) achievement, (4) There is no interactions between the learning media and students motivation toward (p value = 0.365) but there is interaction between the learning media and the motivation toward affective (p value = 0.037) achievement, (5) there is no interaction between the learning media and students thinking styles toward cognitive (p value = 0.112) and affective (p value = 0.256) achievement, (6) there is interaction between motivation and students thinking style toward cognitive (p value = 0.042) but there is no interaction between motivation and students thinking styles on affective (p value = 0.780) achievement, (7) There are no interactions among", "authors": [{"name": "Wahyu Ary"}, {"name": "Guru Kurnianto"}, {"name": "Smpn"}, {"name": "Gunung Girisubo"}, {"name": "Jogjakarta Kidul"}], "citations": [{"index": 0, "publisher": null, "volume": null, "title": "IPA Terpadu untuk Kelas VII SMP/MTs. Jakarta: Pusat Perbukuan Nasional Departemen Pendidikan Nasional", "id": "b0", "date": "2008", "issue": null, "authors": [{"name": "Ani Winarsih"}], "url": null, "journal": null}, {"index": 1, "publisher": null, "volume": null, "title": null, "id": "b1", "date": "2008", "issue": null, "authors": [{"name": "Akhmad Sudrajat"}], "url": null}, {"index": 2, "publisher": null, "volume": null, "title": null, "id": "b2", "date": false, "issue": null, "authors": [{"name": "Teori-Teori Belajar"}], "url": null}, {"index": 3, "publisher": null, "volume": null, "title": "Macam-Macam Pendekatan dan Metode Pembelajaran. Bandung Dikdasmen P3G IPA", "id": "b3", "date": "2004", "issue": null, "authors": [{"name": "Arief Sidharta"}], "url": null, "journal": null}, {"index": 4, "publisher": null, "volume": null, "title": "Balajar dan Pembelajaran", "id": "b4", "date": "2005", "issue": null, "authors": [{"name": "Asri Budiningsih"}, {"name": "C"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": null, "title": "Statistik untuk Penelitian", "id": "b5", "date": "2004", "issue": null, "authors": [{"name": "Budiyono"}], "url": null, "journal": null}, {"index": 6, "publisher": null, "volume": null, "title": "Pendekatan Keterampilan Proses. Jakrta: PT Gramedia", "id": "b6", "date": "1998", "issue": null, "authors": [{"name": "Conny Semiawan"}, {"name": "A Tangyong"}, {"name": "S Belen"}, {"name": "Yulaelawati Matahelemual"}, {"name": "Wahjudi Suselordjo"}], "url": null, "journal": null}, {"index": 7, "publisher": null, "volume": null, "title": "Quantum Learning. Bandung: Mizan Media Utama", "id": "b7", "date": "2008", "issue": null, "authors": [{"name": "Bobbi Deporter"}, {"name": "Mike Dan Hernacki"}], "url": null, "journal": null}, {"index": 8, "publisher": null, "volume": null, "title": "Belajar dan Pembelajaran. Jakarta: Rineka Cipta", "id": "b8", "date": "2006", "issue": null, "authors": [{"name": "Mudjiono Dimyati Dan"}], "url": null, "journal": null}, {"index": 9, "publisher": null, "volume": null, "title": "Psikologi Pengajaran. Jakarta: Gramedia", "id": "b9", "date": "2002", "issue": null, "authors": [{"name": "Ary Wahyu Kurnianto Djamarah"}], "url": null, "journal": null}, {"index": 10, "publisher": null, "volume": null, "title": "Ketrampilas Preoses Sains. Jakarta: Depertemen Pendidikan Nasional", "id": "b10", "date": "2003", "issue": null, "authors": [{"name": "Elok Sudibyo"}], "url": null, "journal": null}, {"index": 11, "publisher": null, "volume": null, "title": "Principles of Instructions Design", "id": "b11", "date": "1992", "issue": null, "authors": [{"name": "Gagne"}, {"name": "M Robert"}, {"name": "Brigss"}, {"name": "J Leslie"}, {"name": "Walter Wagner"}], "url": null, "journal": null}, {"index": 12, "publisher": null, "volume": "1", "title": "Seeing is Believing: Clasroom Demonstration As Scintific Inkquiry", "id": "b12", "date": "2002-12", "issue": "3", "authors": [{"name": "Jerod Groos"}], "url": null, "journal": "Illinois: Jounal Physic Teacher Online"}, {"index": 13, "publisher": null, "volume": null, "title": "Pembelajaran Fisika Dengan Metode Demonstrasi Menggunakan Media Audio Visual dan Labortorium Ditinjau Dari Kemampuan Visiospasial", "id": "b13", "date": "2009", "issue": null, "authors": [{"name": "Hartanto"}], "url": null, "journal": null}, {"index": 14, "publisher": null, "volume": null, "title": "Startegi Belajar Mengajar. Bandung: CV Maulana", "id": "b14", "date": "2001", "issue": null, "authors": [{"name": "Mulyani Sumantri"}, {"name": "&amp; Johan Permana"}], "url": null, "journal": null}, {"index": 15, "publisher": null, "volume": null, "title": "Pembelajaran Kontekstual dan Penerapannya dalam KBK", "id": "b15", "date": "2004", "issue": null, "authors": [{"name": "Dkk Nurhadi"}], "url": null, "journal": null}, {"index": 16, "publisher": null, "volume": null, "title": "Filsafat Konstruktivisme Dalam Pendidikan", "id": "b16", "date": "1997", "issue": null, "authors": [{"name": "Paul Suparno"}], "url": null, "journal": null}, {"index": 17, "publisher": null, "volume": null, "title": "Pembelajaran Fisika Menggunakan Metode Demonstrasi Dengan Observasi Langsung dan Visualisasi Ditinjau Dari Kemampuan Spasial Siswa", "id": "b17", "date": "2009", "issue": null, "authors": [{"name": "Ponimin"}], "url": null, "journal": null}, {"index": 18, "publisher": null, "volume": null, "title": "Interaksi Belajar Mengajar IPA", "id": "b18", "date": "1988", "issue": null, "authors": [{"name": "Ratna Wilis Dahar"}], "url": null, "journal": null}, {"index": 19, "publisher": null, "volume": null, "title": "Strategi Belajar Mengajar. Jakarta: PT. Rineka Cipta", "id": "b19", "date": "2008", "issue": null, "authors": [{"name": "N Roestiyah"}], "url": null, "journal": null}, {"index": 20, "publisher": null, "volume": null, "title": "Interaksi dan Motivasi Belajar Mengajar. Jakarta: PT Raja Grafindo Persada", "id": "b20", "date": "2005", "issue": null, "authors": [{"name": "A Sardiman"}], "url": null, "journal": null}, {"index": 21, "publisher": null, "volume": null, "title": "Evaluasi pendidikan ilmu Pengetahuan Alam. Jakarta: Departemen Pendidikan dan Kebudayaan", "id": "b21", "date": "1988", "issue": null, "authors": [{"name": "Subiyanto"}], "url": null, "journal": null}, {"index": 22, "publisher": null, "volume": null, "title": "Prosedur Penelitian Suatu Pendekatan Praktik", "id": "b22", "date": "2006", "issue": null, "authors": [{"name": "Suharsimi Arikunto"}], "url": null, "journal": null}, {"index": 23, "publisher": null, "volume": null, "title": "Metoda Statistika", "id": "b23", "date": "2006", "issue": null, "authors": [{"name": "Sudjana"}], "url": null, "journal": null}, {"index": 24, "publisher": null, "volume": null, "title": "Belajar dan Faktor-Faktor Yang Mempengaruhinya. Jakarta: Rineka Cipta", "id": "b24", "date": "1995", "issue": null, "authors": [{"name": "Slameto"}], "url": null, "journal": null}, {"index": 25, "publisher": null, "volume": null, "title": "IPA Terpadu untuk SMP Kelas VII", "id": "b25", "date": "2007", "issue": null, "authors": [{"name": "Tim Guru"}], "url": null, "journal": null}, {"index": 26, "publisher": null, "volume": null, "title": "Psikologi Pengajaran. Jakarta: Gramedia", "id": "b26", "date": "1987", "issue": null, "authors": [{"name": "W Winkel"}], "url": null, "journal": null}], "journal": {"issn": null, "publisher": null, "volume": "6", "eissn": null, "issue": "1", "name": null}, "acknowledgement": null}
+sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V {"c_size": 101956, "dt": "20180518000206", "offset": 403989190, "surt": "edu,depaul,library,via)/cgi/viewcontent.cgi?article=2947&context=vincentiana", "url": "http://via.library.depaul.edu/cgi/viewcontent.cgi?article=2947&context=vincentiana", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180517225512190-07393-07402-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180517234518099-07397-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 142710 {"title": "Reflexiones Sobre el Discernimiento y el Acompa\u00f1amiento", "date": "2001", "doi": null, "abstract": null, "authors": [{"name": "Charles Bonnet"}], "citations": [], "journal": {"issn": null, "publisher": null, "volume": "45", "eissn": null, "issue": "4", "name": null}, "acknowledgement": null}
+sha1:L6F7D5B4RQWO4P2DPTTSC46IL4JRLKHP {"c_size": 422526, "dt": "20180423223723", "offset": 643898352, "surt": "com,ijiras)/2016/vol_3-issue_9/paper_32.pdf", "url": "http://www.ijiras.com/2016/Vol_3-Issue_9/paper_32.pdf", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180423214236018-03264-03273-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180423222446077-03270-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 438820 {"title": "The Role Of Agricultural Production On Nigeria's Economy", "date": "2016-08", "doi": null, "abstract": null, "authors": [{"name": "Adams Kemi"}], "citations": [{"index": 0, "publisher": null, "volume": null, "title": "The importance of Agriculture towards the Development of Nigeria Economy", "id": "b0", "date": "2010", "issue": null, "authors": [{"name": "Agric Slider"}], "url": null, "journal": null}, {"index": 1, "publisher": null, "volume": null, "title": "The Changing Structure of the Nigerian Economy and Implications for Development", "id": "b1", "date": "2000-08", "issue": null, "authors": [], "url": null, "journal": null}, {"index": 2, "publisher": null, "volume": null, "title": "Growth and poverty reduction: the role of agriculture. A DFID policy paper. Department for International Development", "id": "b2", "date": "2005", "issue": null, "authors": [], "url": null, "journal": null}, {"index": 3, "publisher": null, "volume": "7", "title": "The Impact of Agriculture and Agro-Based Industries on Economic Development in Nigeria: An Econometric Assessment Retrieved from", "id": "b3", "date": "2009-06", "issue": "1", "authors": [{"name": "Edoumiekumo"}], "url": null, "journal": "Journal of Research in National Development"}, {"index": 4, "publisher": null, "volume": null, "title": "The Role of Agriculture in the Economic Development of Nigeria", "id": "b4", "date": "2015", "issue": null, "authors": [{"name": "I-Farm"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": null, "title": "Leaders In African Work Force Development", "id": "b5", "date": "2010", "issue": null, "authors": [{"name": "Oicd"}], "url": null, "journal": null}, {"index": 6, "publisher": null, "volume": null, "title": "Olumola 2007 strategies for managing the challenges of agriculture in Nigeria", "id": "b6", "date": false, "issue": null, "authors": [], "url": null, "journal": null}, {"index": 7, "publisher": null, "volume": null, "title": "Opportunity International (2012) Invest in Nigeria farmers", "id": "b7", "date": false, "issue": null, "authors": [], "url": null, "journal": null}, {"index": 8, "publisher": null, "volume": null, "title": "World Population Prospects: The 2010 Revision. United Nations Department of Economic and Social Affairs, Population Division", "id": "b8", "date": "2011", "issue": null, "authors": [], "url": null, "journal": null}], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null}
+sha1:LNDE2NJE5ZCKSPGT72JV5J4YEIPWFVJF {"c_size": 7308859, "dt": "20180422054242", "offset": 443789750, "surt": "fr,chu-limoges,hemato)/hematolim/portals/0/enseignement/items_ecn/6-lymphome%20%20-%20item%20316a%20ue9%20-%20pr.%20bordessoule%20-%202017.pdf?ver=2017-03-29-111031-597", "url": "https://hemato.chu-limoges.fr/hematolim/Portals/0/Enseignement/Items_ECN/6-Lymphome%20%20-%20Item%20316a%20UE9%20-%20Pr.%20Bordessoule%20-%202017.pdf?ver=2017-03-29-111031-597", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180422050335049-02780-02789-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180422053334855-02787-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 7629821 {"title": "UE N\u00b0 9 Canc\u00e9ro-onco h\u00e9matologie Objectif ECN: N\u00b0 316a LYMPHOME MALIN NON HODGKINIEN", "date": false, "doi": null, "abstract": null, "authors": [{"name": "D Bordessoule"}], "citations": [], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null}
+sha1:UW44ACMDY6ANJZHXQD3XZQZPTL5GYJSR {"c_size": 429937, "dt": "20180423232743", "offset": 978890945, "surt": "org,fljs)/sites/www.fljs.org/files/publications/mudde_0.pdf", "url": "http://www.fljs.org/sites/www.fljs.org/files/publications/Mudde_0.pdf", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180423224427413-03274-03283-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180423230554073-03279-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 492703 {"title": "Are Populists Friends or Foes of Constitutionalism? The Social and Political Foundations of Constitutions Policy Brief The Foundation for Law, Justice and Society", "date": false, "doi": null, "abstract": null, "authors": [{"name": "Cas Mudde"}], "citations": [], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null}
+sha1:HSFTGWSP4NVBYHT5YCVWFVBFKOQQJQLC {"c_size": 247983, "dt": "20180422050817", "offset": 258971826, "surt": "com,allsciencejournal)/archives/2018/vol3issue2/3-2-19-903.pdf", "url": "http://www.allsciencejournal.com/archives/2018/vol3issue2/3-2-19-903.pdf", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180422050335049-02780-02789-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180422050335049-02780-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 269587 {"title": "Transposon Tn5-induced mutagenesis of kanamycin resistance Rhizobium japonicum", "date": "2018-03", "doi": null, "abstract": "Background: Transposase is an enzyme that binds to the ends of a transposon and catalyzes the movement of the transposon to another part of the genome by a cut and paste mechanism or a replicative transposition mechanism. Rhizobium is a genus of Gram-negative soil bacteria, many of which fix nitrogen. Nitrogen fixation is an important part of the nitrogen cycle. Objectives: The objective of this research was to determine whether at random transposon mutagenesis could be applied in genetic studies of Rhizobium japonicum. One major reason for this is that a range of defined mutations is not available. Methods: In present study R. japonicum at frequencies sufficient to allow the isolation of large numbers of insertion mutants. The selection of Tn5 mutants was facilitated by the expression, in all the R. japonicum strains we have tested, of the Tn5 encoded kanamycin resistance. A number of auxotrophic and symbiotically defective, single, random transposon (Tn5) mutants were obtained in three slow-growing strains of different DNA homology and serogroups and the single fast growing strain of R. japonicum. Conclusions: In conclusion, the Tn5 can be used as a generalized mutagen to isolate a variety of mutants with defects in symbiotic nitrogen fixation. The analysis of such mutants should prove to be useful in elucidating the biochemical, genetic, and regulatory events involved in the R. japonicum which effectively nodulates certain Indian soybean cultivars.", "authors": [{"name": "Bhutada Sa"}], "citations": [{"index": 0, "publisher": null, "volume": null, "title": "The Sleeping Beauty transposon system-A non-viral vector for gene therapy", "id": "b0", "date": "2011", "issue": null, "authors": [{"name": "Aron Aronovich"}, {"name": "E Mcivor"}, {"name": "R Hackett"}, {"name": "P"}], "url": null, "journal": "Hum. Mol. Genet"}, {"index": 1, "publisher": null, "volume": "38", "title": "Transposases are the most abundant, most ubiquitous genes in nature", "id": "b1", "date": "2010", "issue": "13", "authors": [{"name": "R Aziz"}, {"name": "M Breitbart"}, {"name": "R Edwards"}], "url": null, "journal": "Nucleic Acid Research"}, {"index": 2, "publisher": null, "volume": "77", "title": "Control of gene expression by a mobile recombinational switch", "id": "b2", "date": "1980", "issue": null, "authors": [{"name": "D Berg"}], "url": null, "journal": "Proc. Natl. Acad. Sci. U.S.A"}, {"index": 3, "publisher": null, "volume": "45", "title": "Insertion, excision, and inversion of Tn5. Cold Spring Harbor Symp", "id": "b3", "date": "1980", "issue": null, "authors": [{"name": "D Berg"}, {"name": "C Egner"}, {"name": "B Hirschel"}, {"name": "J Howard"}, {"name": "Johnstrud Jorgensen"}, {"name": "Lra Tisty"}, {"name": "T"}], "url": null, "journal": "Quant. Biol"}, {"index": 4, "publisher": null, "volume": "52", "title": "Nitrate reductase in soybean root nodules. Biocltiniica et biopltysica acta", "id": "b4", "date": "1961", "issue": null, "authors": [{"name": "F Bergersen"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": "276", "title": "Transfer of the drug-resistance transposon Tn5 to Rhizobium", "id": "b5", "date": "1978", "issue": null, "authors": [{"name": "J Beringer"}, {"name": "J Benyon"}, {"name": "A Buchanan-Wollaston"}, {"name": "Awb Johnston"}], "url": null, "journal": "Nature"}, {"index": 6, "publisher": null, "volume": "57", "title": "Relation between glutamine synthetase and nitrogenase activities in the symbiotic association between Rhizobium japonicum and Glycine max", "id": "b6", "date": "1976", "issue": null, "authors": [{"name": "P Bishop"}, {"name": "J Guevara"}, {"name": "F Engelke"}, {"name": "H Evans"}], "url": null, "journal": "Plant Physiol"}, {"index": 7, "publisher": null, "volume": "122", "title": "Properties of TnS-induced carbohydrate mutants in Rhizobuin meliloti", "id": "b7", "date": "1981", "issue": null, "authors": [{"name": "M Duncan"}], "url": null, "journal": "J. Gen. Microbiol"}, {"index": 8, "publisher": null, "volume": "187", "title": "Coding properties of cloned nitrogenase structural genes from Rhizobium japonicum", "id": "b8", "date": "1982", "issue": null, "authors": [{"name": "M Fuhrmann"}, {"name": "H Hennecke"}], "url": null, "journal": "Mol Gen Genet"}, {"index": 9, "publisher": null, "volume": "187", "title": "Coding properties of cloned nitrogenase structural genes from Rhizobium japonicum", "id": "b9", "date": "1982", "issue": null, "authors": [{"name": "M Fuhrmann"}, {"name": "H Hennecke"}], "url": null, "journal": "Mol Gen Genet"}, {"index": 10, "publisher": null, "volume": "193", "title": "Localized mutagenesis in Rhizobium japonicum", "id": "b10", "date": "1984", "issue": null, "authors": [{"name": "M Hahn"}, {"name": "H Hennecke"}], "url": null, "journal": "Mol Gen Genet"}], "journal": {"issn": null, "publisher": null, "volume": "3", "eissn": null, "issue": "2", "name": "International Journal of Advanced Science and Research International Journal of Advanced Science and Research"}, "acknowledgement": null}
+sha1:GRAR6SQ7LDO5PSRRK73IENWJGUXWERQ2 {"c_size": 322220, "dt": "20180428112949", "offset": 1206445, "surt": "eus,ehu)/ojs/index.php/asju/article/viewfile/3888/3502", "url": "http://www.ehu.eus/ojs/index.php/ASJU/article/viewFile/3888/3502", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180428102802023-04230-04239-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180428112954435-04238-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 333834 {"title": "EXTRACTING INFORMATION FROM PARTICIPIAL STRUCTURES", "date": false, "doi": null, "abstract": "Our applied linguistic research aims at increasing the efficiency of a rule-based information extraction (IE) system by enhancing it with further grammatical knowledge. The input of the IE system is made up of sentences of business news. The event of the piece of news is identified through the main verb of the sentence, while parti cipants and circumstances of the event through arguments and adjuncts of the main verb. Our objective was to unfold the hidden information, contained by NPs within which non-finite verbs (e.g. participles) appear. Thus, we invented a rule-system to transform participial structures into sentences with a finite verb, so that they could serve as input of the IE system. To tackle this task we had to be able to distinguish between real participles and adjectives. According to us there are some distributional criteria which can be used as the basis for creating the right classification.", "authors": [{"name": "Enik\u00f6 H\u00e9ja"}, {"name": "Kata G\u00e1bor"}], "citations": [{"index": 0, "publisher": null, "volume": "3", "title": "Words, Lexical Categories, Suffixes", "id": "b0", "date": "2000", "issue": null, "authors": [{"name": "I Kenesei"}], "url": null, "journal": "Structural Grammar of Hungarian"}, {"index": 1, "publisher": null, "volume": null, "title": "Semantics')", "id": "b1", "date": "2000", "issue": null, "authors": [{"name": "F Kiefer"}], "url": null, "journal": null}, {"index": 2, "publisher": null, "volume": "1", "title": "Akad\u00e9miai Kiad\u00f3 ('Predicates and Complements", "id": "b2", "date": "1992", "issue": null, "authors": [{"name": "A Koml\u00f3sy"}], "url": null, "journal": "Structural Grammar of Hungarian"}, {"index": 3, "publisher": null, "volume": "3", "title": "The Suffixes of Adjectival and Adverbial Participles", "id": "b3", "date": "2000", "issue": null, "authors": [{"name": "T Laczk\u00f3"}], "url": null, "journal": "Structural Grammar of Hungarian"}, {"index": 4, "publisher": null, "volume": null, "title": "\u00abInformation Extraction from Short Business News Items\u00bb", "id": "b4", "date": "2003", "issue": null, "authors": [{"name": "G Pr\u00f3sz\u00e9ky"}], "url": null, "journal": "Proceedings of the First Hungarian Computational Linguistics Conference"}, {"index": 5, "publisher": null, "volume": null, "title": "Dictionnaires \u00e9lectroniques et analyse automatique de textes: Le systeme Intex", "id": "b5", "date": "1993", "issue": null, "authors": [{"name": "M Silberztein"}], "url": null, "journal": null}, {"index": 6, "publisher": null, "volume": null, "title": "\u00abA ragoz\u00e1si sz\u00f3t\u00e1rt\u00f3l a NooJ morfol\u00f3giai modulj\u00e1ig\u00bb", "id": "b6", "date": "2004", "issue": null, "authors": [{"name": "P Vajda"}], "url": null, "journal": "Proceedings of the Second Hungarian Computational Linguistics Conference"}], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null}
+sha1:NW5OOMXV5TCHAAUOPU65IM2MBWIPKZOC {"c_size": 562543, "dt": "20180414173100", "offset": 780583906, "surt": "id,ac,undip,ejournal)/index.php/ijred/article/viewfile/17701/pdf", "url": "https://ejournal.undip.ac.id/index.php/ijred/article/viewFile/17701/pdf", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180414165847369-01259-01268-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180414170641162-01262-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 613883 {"title": "Impact of Blanket Configuration on the Design of a Fusion-Driven Transmutation Reactor", "date": "2018", "doi": "10.14710/ijred.7.1.65-70", "abstract": null, "authors": [{"name": "Int"}], "citations": [{"index": 0, "publisher": null, "volume": "84", "title": "The status of the ITER design", "id": "b0", "date": "2009", "issue": null, "authors": [{"name": "N Holtkamp"}], "url": null, "journal": "Fusion Engineering and Design"}, {"index": 1, "publisher": null, "volume": "89", "title": "Conceptual study of fusion-driven system for nuclear waste transmutation", "id": "b1", "date": "2014", "issue": null, "authors": [{"name": "B Hong"}], "url": null, "journal": "Fusion Engineering and Design"}, {"index": 2, "publisher": null, "volume": null, "title": "Conceptual design study of a superconducting spherical tokamak reactor with a self-consistent system analysis code", "id": "b2", "date": "2011", "issue": null, "authors": [{"name": "B Hong"}, {"name": "Y Hwang"}, {"name": "J Kang"}, {"name": "D Lee"}, {"name": "H Joo"}, {"name": "M Ono"}], "url": null, "journal": "Nucl. Fusion"}, {"index": 3, "publisher": null, "volume": "40", "title": "Tokamak Reactor System Analysis Code for the Conceptual Development of DEMO Reactor", "id": "b3", "date": "2008", "issue": null, "authors": [{"name": "B Hong"}, {"name": "D Lee"}, {"name": "S In"}], "url": null, "journal": "Nuclear Engineering and Technology"}, {"index": 4, "publisher": null, "volume": "40", "title": "Characteristics of nuclear waste transmutation based on a tokamak neutron source", "id": "b4", "date": "2015", "issue": null, "authors": [{"name": "B Hong"}, {"name": "P Oh"}], "url": null, "journal": "Int. J. Hydrogen Energy"}, {"index": 5, "publisher": null, "volume": null, "title": "Optimum plasma states for next step tokamaks", "id": "b5", "date": "2002", "issue": null, "authors": [{"name": "Y Lin-Liu"}, {"name": "R Stambaugh"}], "url": null, "journal": "General Atomics Report"}, {"index": 6, "publisher": null, "volume": null, "title": "Unified ideal stability limits for advanced tokamak and spherical torus plasmas, Princeton Plasma Physics Laboratory Report", "id": "b6", "date": "2003", "issue": null, "authors": [{"name": "J Menard"}, {"name": "M Bell"}, {"name": "R Bell"}, {"name": "D Gates"}, {"name": "S Kaye"}, {"name": "B Leblanc"}, {"name": "S Sabbagh"}, {"name": "E Fredrickson"}, {"name": "S Jardin"}, {"name": "R Maingi"}, {"name": "J Manickam"}, {"name": "D Mueller"}, {"name": "M Ono"}, {"name": "F Paoletti"}, {"name": "Y.-K Peng"}, {"name": "V Soukhanovskii"}, {"name": "D Stutman"}, {"name": "E Synakowski"}], "url": null, "journal": null}, {"index": 7, "publisher": null, "volume": "22", "title": "The ITER Magnets: Design and Construction Status", "id": "b7", "date": "2012", "issue": null, "authors": [{"name": "N Mitchell"}, {"name": "A Devred"}, {"name": "P Libeyre"}], "url": null, "journal": "IEEE Transactions on Applied Superconductivity"}, {"index": 8, "publisher": null, "volume": null, "title": "Fusion Eng. Des. 65", "id": "b8", "date": "2003", "issue": null, "authors": [{"name": "F Najmabadi"}, {"name": "Aries The"}, {"name": "Team"}], "url": null, "journal": null}, {"index": 9, "publisher": null, "volume": "32", "title": "Japanese evaluated nuclear data library version 3 reversion-2: JENDL-3.2", "id": "b9", "date": "1995", "issue": null, "authors": [{"name": "T Nakagawa"}, {"name": "T Asami"}], "url": null, "journal": "Journal of Nuclear Science and Technology"}, {"index": 10, "publisher": null, "volume": "81", "title": "Fusion Eng. Des", "id": "b10", "date": "2006", "issue": null, "authors": [{"name": "T Nishitani"}, {"name": "M Yamauchi"}, {"name": "S Nishio"}, {"name": "M Wada"}], "url": null, "journal": null}, {"index": 11, "publisher": null, "volume": null, "title": "ORNL", "id": "b11", "date": "1998", "issue": null, "authors": [], "url": null, "journal": null}, {"index": 12, "publisher": null, "volume": "42", "title": "Status of Pyroprocessing Technology Development in Korea", "id": "b12", "date": "2010", "issue": null, "authors": [{"name": "K Song"}, {"name": "H Lee"}, {"name": "J-M Hur"}, {"name": "J Kim"}, {"name": "D Ahn"}, {"name": "Y Cho"}], "url": null, "journal": "Nuclear Engineering and Technology"}, {"index": 13, "publisher": null, "volume": "82", "title": "Transmutation missions for fusion neutron sources", "id": "b13", "date": "2007", "issue": null, "authors": [{"name": "W Stacey"}], "url": null, "journal": "Fusion Engineering and Design"}, {"index": 14, "publisher": null, "volume": "42", "title": "Toroidal reactor designs as a function of aspect ratio and elongation", "id": "b14", "date": "2002", "issue": null, "authors": [{"name": "C Wong"}, {"name": "J Wesley"}, {"name": "R Stambaugh"}, {"name": "E Cheng"}], "url": null, "journal": "Nucl. Fusion"}, {"index": 15, "publisher": null, "volume": "81", "title": "Conceptual design of the fusion-driven subcritical system FDS-I, Fusion Engineering and Design", "id": "b15", "date": "2006", "issue": null, "authors": [{"name": "Y Wu"}, {"name": "S Zheng"}, {"name": "X Zhu"}, {"name": "W Wang"}, {"name": "S Liu"}, {"name": "Y Bai"}, {"name": "H Chen"}, {"name": "L Hu"}, {"name": "M Chen"}, {"name": "Q Huang"}, {"name": "D Huang"}, {"name": "S Zhang"}, {"name": "J Li"}, {"name": "D Chu"}, {"name": "J Jiang"}], "url": null, "journal": null}], "journal": {"issn": null, "publisher": "Institute of Research and Community Services Diponegoro University (LPPM UNDIP)", "volume": "7", "eissn": "2252-4940", "issue": "1", "name": "International Journal of Renewable Energy Development"}, "acknowledgement": null}
+sha1:HBX4RMRDQEAQ5VALWMBCZ6L4XCGYEXNF {"c_size": 350217, "dt": "20180412205334", "offset": 840088849, "surt": "com,ijppsjournal)/vol3suppl4/2358.pdf", "url": "http://ijppsjournal.com/Vol3Suppl4/2358.pdf", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180412202848067-00872-00881-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180412203230711-00875-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 361954 {"title": "ANTIFUNGAL ACTIVITY AND PRELIMINARY PHYTOCHEMICAL STUDIES OF LEAF EXTRACT OF SOLANUM NIGRUM LINN", "date": false, "doi": null, "abstract": "Solanum nigrum Linn. is extensively used in Indian traditional and folk medicines to cure various skin ailments. The present study aims to evaluate the possibility for the presence of novel bio-active compounds against fungal pathogens. To determine antifungal activity, aqueous and crude extracts from leaves of S. nigrum Linn. was used against A. niger, A. flavus, C. albicans by dry weight method. Extracts prepared using crude solvents exhibited higher antifungal activity as compared to their corresponding aqueous extracts. No good activity was observed in the aqueous extract. The pathogen inhibiting activity was found to be dose dependent. The preliminary phytochemical screening of the leaves revealed the presence of Alkaloids, Flavonols, Flavones, Flavanols, Saponins and Steroids.", "authors": [{"name": "Research Article"}, {"name": "Sweta Prakash"}, {"name": "Ashok Jain"}], "citations": [{"index": 0, "publisher": null, "volume": "18", "title": "A phytochemical survey of Malaya-III, Alkaloids and saponins", "id": "b0", "date": "1964", "issue": null, "authors": [{"name": "R Amarsingham"}, {"name": "N G Bisset"}, {"name": "A Millard"}, {"name": "M C Woods"}], "url": null, "journal": "Economic Botany"}, {"index": 1, "publisher": null, "volume": "63", "title": "Possible Industrial application of genus Solanum in twenty first century-A review", "id": "b1", "date": "2004", "issue": null, "authors": [{"name": "M Amir"}, {"name": "S Kumar"}], "url": null, "journal": "Journal of Scientific and Industrial Research"}, {"index": 2, "publisher": null, "volume": "96", "title": "The relationship between the volume of antimicrobial consumption in human communities and the frequency of resistance", "id": "b2", "date": "1999", "issue": null, "authors": [{"name": "D J Austin"}, {"name": "K Kristinsson"}, {"name": "Anderson R M"}], "url": null, "journal": "Proc Natl Acad Sci"}, {"index": 3, "publisher": null, "volume": null, "title": "Antibacterial activity of Solanum trilobatum", "id": "b3", "date": "0211", "issue": null, "authors": [{"name": "K Balakrishna"}, {"name": "G Veluchamy"}, {"name": "P Ragothaman"}, {"name": "G Sajan"}], "url": null, "journal": "Proceeding of International Congress on Ayurveda"}, {"index": 4, "publisher": null, "volume": null, "title": "Handbook of Medicinal Plants", "id": "b4", "date": "2001", "issue": null, "authors": [{"name": "S Bhattacharjee"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": "20", "title": "Phytochemical screening of some plants of Indian Botanical garden", "id": "b5", "date": "1978", "issue": "1-4", "authors": [{"name": "S S Dan"}, {"name": "N R Mondal"}, {"name": "S Das"}], "url": null, "journal": "Bulletin of Botanical Survey of India"}, {"index": 6, "publisher": null, "volume": "4", "title": "Phytochemical constituent of some Nigerian medicinal plants", "id": "b6", "date": "2005", "issue": "7", "authors": [{"name": "H O Edeoga"}, {"name": "D Okwu"}, {"name": "B O Mbacbie"}], "url": null, "journal": "African Journal of Biotechnology"}, {"index": 7, "publisher": null, "volume": null, "title": "Phtochemical screening of some species of Iranian plants", "id": "b7", "date": "2003", "issue": null, "authors": [{"name": "M Faraz"}, {"name": "K Mohammed"}, {"name": "G Naysanet"}, {"name": "H R Vahidipour"}], "url": null, "journal": "Iranian Journal of Pharmaceutical Research"}, {"index": 8, "publisher": null, "volume": "2", "title": "African indigenous plants with chemotherapeutic potentials and biotechnological approach to the production of bioactive prophylactic agents", "id": "b8", "date": "2003", "issue": null, "authors": [{"name": "E O Farombi"}], "url": null, "journal": "African Journal of Biotechnology"}, {"index": 9, "publisher": null, "volume": "61", "title": "The cellulolytic activity of thermophilic fungi", "id": "b9", "date": "1969", "issue": null, "authors": [{"name": "C Fergus"}], "url": null, "journal": "Mycology"}, {"index": 10, "publisher": null, "volume": null, "title": "Chemotaxonomy of flowering plants. I-IV", "id": "b10", "date": "1974", "issue": null, "authors": [{"name": "R Gibbs"}], "url": null, "journal": null}, {"index": 11, "publisher": null, "volume": "84", "title": "Traditional herbal drugs of southern Uganda, II. Literature analysis and antimicrobial assays", "id": "b11", "date": "2003", "issue": null, "authors": [{"name": "F A Hamil"}, {"name": "S Apio"}, {"name": "N K Mubiru"}, {"name": "R Bukenya-Ziruba"}, {"name": "Mosanyo"}, {"name": "M"}, {"name": "Magangi Owe"}], "url": null, "journal": "Journal of Ethnopharmacology"}, {"index": 12, "publisher": null, "volume": null, "title": "Phytochemical methods. A guide to modern techniques of plant analysis", "id": "b12", "date": "1984", "issue": null, "authors": [{"name": "J Harborne"}], "url": null, "journal": null}, {"index": 13, "publisher": null, "volume": "48", "title": "Steroidal oligoglycosides from Solanum nigrum. Chemical and Pharmaceutical Bulletin", "id": "b13", "date": "2000", "issue": "7", "authors": [{"name": "T Ikeda"}, {"name": "H Tsumagari"}, {"name": "H Nohara"}, {"name": "T Nohara"}], "url": null, "journal": null}, {"index": 14, "publisher": null, "volume": "39", "title": "Screening of some medicinal plants for isolation of antifungal proteins and peptides", "id": "b14", "date": "2007", "issue": "1", "authors": [{"name": "A Jamil"}, {"name": "S Muhommad"}, {"name": "Masud-Ul-Haq"}, {"name": "M Khan"}, {"name": "A Muhammad"}], "url": null, "journal": "Pakistan Journal of Botany"}, {"index": 15, "publisher": null, "volume": "11", "title": "Phytochemical screening of some folklore medicinal plants for their anti-inflammatory activity", "id": "b15", "date": "2007", "issue": null, "authors": [{"name": "D Juneja"}, {"name": "P N Shrivastava"}, {"name": "M K Guha"}, {"name": "R C Saxena"}], "url": null, "journal": "Pharmacognosy Magazine"}, {"index": 16, "publisher": null, "volume": "45", "title": "Invitro antimicrobial activity of bakuchiol against oral microorganisms", "id": "b16", "date": "2001", "issue": null, "authors": [{"name": "H Katsura"}, {"name": "Sukiyama"}, {"name": "A Suzuki"}, {"name": "M Kobayashi"}], "url": null, "journal": "Antimicrobial agents & Chemotherapy. Antimicrobial agents & Chemical Journal"}, {"index": 17, "publisher": null, "volume": null, "title": "Indian Medicinal Plants", "id": "b17", "date": "1935", "issue": null, "authors": [{"name": "K R Kritikar"}, {"name": "B D Basu"}], "url": null, "journal": null}, {"index": 18, "publisher": null, "volume": "86", "title": "Screening of traditionally used South African pants for antifungal activity against Candida albicans", "id": "b18", "date": "2003", "issue": null, "authors": [{"name": "M L Motsei"}, {"name": "K L Lindsey"}, {"name": "J Vanstaden"}, {"name": "A K Jaeger"}], "url": null, "journal": "Journal of Ethnopharmacology"}, {"index": 19, "publisher": null, "volume": "30", "title": "Extraction methods and bioautography for evaluation of medicinal plant antimicrobial activity", "id": "b19", "date": "2000", "issue": null, "authors": [{"name": "A Nostro"}, {"name": "M P Germano"}, {"name": "V Angelo"}, {"name": "A Marino"}, {"name": "M A Cannatelli"}], "url": null, "journal": "Letters in Applied Microbiology"}, {"index": 20, "publisher": null, "volume": null, "title": "Medicinal plants cultivation a scientific approach including processing and financial guidelines", "id": "b20", "date": "2004", "issue": null, "authors": [{"name": "S S Purohit"}, {"name": "S P Vyas"}], "url": null, "journal": "Publishers Agrobios"}, {"index": 21, "publisher": null, "volume": "39", "title": "Invitro evaluation of inhibitory nature of extract of eighteen plant species of Chhindwara against three keratinophilic fungi", "id": "b21", "date": "1997", "issue": "1-4", "authors": [{"name": "S Qureshi"}, {"name": "M Rai"}], "url": null, "journal": "Hindustan Antibiotic Bulletin"}, {"index": 22, "publisher": null, "volume": "62", "title": "Screening of 34 Indian medicinal plants for antibacterial properties", "id": "b22", "date": "1998", "issue": null, "authors": [{"name": "R P Samy"}, {"name": "S Ignacimuthu"}, {"name": "A Sen"}], "url": null, "journal": "Journal of Ethnopharmacology"}, {"index": 23, "publisher": null, "volume": null, "title": "Pharmacognosy", "id": "b23", "date": "1987", "issue": null, "authors": [{"name": "E Trease"}, {"name": "W C Evans"}], "url": null, "journal": null}, {"index": 24, "publisher": null, "volume": null, "title": null, "id": "b24", "date": false, "issue": null, "authors": [{"name": "Billiare Tindall"}, {"name": "London"}], "url": null}, {"index": 25, "publisher": null, "volume": null, "title": "World Health Organization, WHO Traditional medicine strategy", "id": "b25", "date": "2002", "issue": null, "authors": [], "url": null, "journal": null}, {"index": 26, "publisher": null, "volume": "48", "title": "Protecting and promoting traditional knowledge: System, National experiences and International Dimensions .Part-I. The role of Traditional knowledge in Health care and Agriculture", "id": "b26", "date": "2004", "issue": null, "authors": [{"name": "X Zhang"}], "url": null, "journal": "United nations"}], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null}
+sha1:EVE7BEGUAOININZRFDIT5D5SPY3NZR2S {"c_size": 655278, "dt": "20180422095600", "offset": 107970268, "surt": "com,ijpda)/admin/uploads/yarp6w.pdf", "url": "http://www.ijpda.com/admin/uploads/YaRp6W.pdf", "warc": "OA-JOURNAL-TESTCRAWL-TWO-2018-20180422095004287-02850-02859-wbgrp-svc284/OA-JOURNAL-TESTCRAWL-TWO-2018-20180422095448165-02851-23249~wbgrp-svc284.us.archive.org~8443.warc.gz"} application/pdf 679566 {"title": "POSSIBILITY OF MAKING HYDRODYNAMIC OSCILLATOR-AS AN ARTIFICIAL TASTE SENSING DEVICE", "date": false, "doi": null, "abstract": "The mechanism leading to sensation of taste, which is so very essential for the maintenance of life, are very complex and little understood. Broadly speaking, it can be said that interaction of different chemicals, which may be called as taste stimuli, with taste receptors is converted into information of nerve impulses, which the brain receives to decipher the taste. It is, therefore, necessary to have an excitable oscillatory system on which the phenomenon of taste can be mimicked. In a recent study, it has been shown that the hydrodynamic oscillator is a good candidate for mimicking the sensing mechanism of taste. It was argued that the hydrodynamic oscillator could be viewed as an analogue of taste buds and the amplitude of electrical potential oscillations in the hydrodynamic oscillator, using a taste stimulant, as an analogue of the receptor potential. The amplitudes of the electrical potential oscillations in the hydrodynamic oscillator were shown to correlate well with the logarithms of the relatives taste indices (The intensitites of taste sensations within a particular taste category are measured by their relative taste indices), concentration, etc. of the substances belonging to different taste categories, and it appears that hydrodynamic oscillator can be used as possible artificial taste sensing device.", "authors": [{"name": "Amlan Das"}], "citations": [{"index": 0, "publisher": null, "volume": null, "title": "Fundamentals of sensory physiology", "id": "b0", "date": "1966", "issue": null, "authors": [{"name": "R Schimdt"}], "url": null, "journal": null}, {"index": 1, "publisher": null, "volume": null, "title": "Neurobiology of taste and smell", "id": "b1", "date": "1987", "issue": null, "authors": [{"name": "T Finger &amp; W"}, {"name": "Silver"}], "url": null, "journal": null}, {"index": 2, "publisher": null, "volume": "105", "title": "J. Am. chem. Soc", "id": "b2", "date": "1983", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "&amp; Y Matsubara"}], "url": null, "journal": null}, {"index": 3, "publisher": null, "volume": "17", "title": "Biophys Chem", "id": "b3", "date": "1983", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "&amp; Y Matsubara"}], "url": null, "journal": null}, {"index": 4, "publisher": null, "volume": "106", "title": "J Am chem Soc", "id": "b4", "date": "1984", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "&amp; Y Matsubara"}], "url": null, "journal": null}, {"index": 5, "publisher": null, "volume": "23", "title": "Biophys Chem", "id": "b5", "date": "1986", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "T Omochi"}, {"name": "&amp; Y Matsubara"}], "url": null, "journal": null}, {"index": 6, "publisher": null, "volume": "24", "title": "Biophys Chem", "id": "b6", "date": "1986", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "T Omochi"}, {"name": "&amp; Y Matsubara"}], "url": null, "journal": null}, {"index": 7, "publisher": null, "volume": "273", "title": "J.Membrane Sci", "id": "b7", "date": "2006", "issue": null, "authors": [{"name": "M Szpakowska"}, {"name": "A Magnusewska"}, {"name": "J Szwacki"}], "url": null, "journal": null}, {"index": 8, "publisher": null, "volume": "110", "title": "J.Phys.Chem.A", "id": "b8", "date": "2006", "issue": null, "authors": [{"name": "M Szpakowska"}, {"name": "I Czaplicka"}, {"name": "O Nagy"}], "url": null, "journal": null}, {"index": 9, "publisher": null, "volume": "4", "title": "Langmuir", "id": "b9", "date": "1984", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "M Shoji"}, {"name": "S Nakata"}, {"name": "S Maeda"}, {"name": "&amp; H Kawakami"}], "url": null, "journal": null}, {"index": 10, "publisher": null, "volume": null, "title": "Text book of medical physiology", "id": "b10", "date": "1981", "issue": null, "authors": [{"name": "A Guyton"}], "url": null, "journal": null}, {"index": 11, "publisher": null, "volume": null, "title": "Best and Taylor's physiological basis of medical practice", "id": "b11", "date": "1979", "issue": null, "authors": [{"name": "J R Brobeck"}], "url": null, "journal": null}, {"index": 12, "publisher": null, "volume": null, "title": "Human physiology, vol2 (Calcutta", "id": "b12", "date": "1985", "issue": null, "authors": [{"name": "C Chatterjee"}], "url": null, "journal": "Medical and Allied Agency"}, {"index": 13, "publisher": null, "volume": "66", "title": "J chem Educ", "id": "b13", "date": "1989", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "S Nakata"}, {"name": "M Yamanaka"}, {"name": "&amp; T Waki"}], "url": null, "journal": null}, {"index": 14, "publisher": null, "volume": "59", "title": "Am J Phys", "id": "b14", "date": "1991", "issue": null, "authors": [{"name": "K Yoshikawa"}, {"name": "N Oyama"}, {"name": "M Shoji"}, {"name": "&amp; Nakata"}], "url": null, "journal": null}, {"index": 15, "publisher": null, "volume": "8", "title": "Langmuir", "id": "b15", "date": "1992", "issue": null, "authors": [{"name": "S Upadhyay"}, {"name": "A Das"}, {"name": "V Agarwala"}, {"name": "Srivastava"}], "url": null, "journal": null}, {"index": 16, "publisher": null, "volume": "89", "title": "Faraday Trans", "id": "b16", "date": "1993", "issue": null, "authors": [{"name": "A Das &amp; R"}, {"name": "Srivastava"}], "url": null, "journal": null}], "journal": {"issn": null, "publisher": null, "volume": null, "eissn": null, "issue": null, "name": null}, "acknowledgement": null}
diff --git a/python/tests/grobid_metadata_importer.py b/python/tests/grobid_metadata_importer.py
new file mode 100644
index 00000000..516d8c44
--- /dev/null
+++ b/python/tests/grobid_metadata_importer.py
@@ -0,0 +1,47 @@
+
+import json
+import pytest
+from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter
+
+
+@pytest.fixture(scope="function")
+def grobid_metadata_importer():
+ yield FatcatGrobidMetadataImporter("http://localhost:9411/v0")
+
+# TODO: use API to check that entities actually created...
+#def test_grobid_metadata_importer_batch(grobid_metadata_importer):
+# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+# grobid_metadata_importer.process_batch(f)
+
+def test_grobid_metadata_importer(grobid_metadata_importer):
+ with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ grobid_metadata_importer.process_source(f)
+
+def test_grobid_metadata_parse(grobid_metadata_importer):
+ with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ raw = json.loads(f.readline().split('\t')[4])
+ re = grobid_metadata_importer.parse_grobid_json(raw)
+ assert re
+ assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA"
+ assert len(re.contribs) == 5
+ print(re.contribs)
+ assert re.contribs[0].raw_name == "Wahyu Ary"
+ assert re.publisher == None
+ assert re.extra.get('container_name') == None
+ assert len(re.refs) == 27
+
+def test_file_metadata_parse(grobid_metadata_importer):
+ with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ raw = f.readline().split('\t')
+ fe = grobid_metadata_importer.parse_file_metadata(
+ raw[0], json.loads(raw[1]), raw[2], int(raw[3]))
+ assert fe
+ assert fe.sha1 == "38d725127246895368e4d9f950e377b4f21b6d75" # "sha1:HDLSKETSI2EVG2HE3H4VBY3XWTZBW3LV"
+ assert fe.md5 == None
+ assert fe.mimetype == "application/pdf"
+ assert fe.size == 260608
+ assert fe.urls[1].url.startswith("http://e-journal.hamzanwadi.ac.id")
+ assert fe.urls[1].rel == "web"
+ assert fe.urls[0].url.startswith("https://web.archive.org/")
+ assert fe.urls[0].rel == "webarchive"
+ assert len(fe.releases) == 0