diff options
Diffstat (limited to 'pig/tests/files/papers_domain_words.cdx')
-rw-r--r-- | pig/tests/files/papers_domain_words.cdx | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/pig/tests/files/papers_domain_words.cdx b/pig/tests/files/papers_domain_words.cdx new file mode 100644 index 0000000..48e2313 --- /dev/null +++ b/pig/tests/files/papers_domain_words.cdx @@ -0,0 +1,11 @@ +#http://research.fit.edu/sealevelriselibrary/documents/doc_mgr/448/Florida_Keys_Low_Island_Biodiversity_&_SLR_-_Ross_et_al_2009.pdf +#http://ijs.sgmjournals.org:80/cgi/reprint/54/6/2217.pdf +#http://eprints.ecs.soton.ac.uk/12020/1/mind-the-semantic-gap.pdf +#http://eprint.uq.edu.au/archive/00004120/01/R103_Forrester_pp.pdf + +# should match 4: + +edu,fit,research)/sealevelriselibrary/documents/doc_mgr/448/Florida_Keys_Low_Island_Biodiversity_&_SLR_-_Ross_et_al_2009.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +org,sgmjournals,ijs)//cgi/reprint/54/6/2217.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +uk,ac,soton,ecs,eprints)/12020/1/mind-the-semantic-gap.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +au,edu,uq,eprint)/archive/00004120/01/R103_Forrester_pp.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz |