aboutsummaryrefslogtreecommitdiffstats
path: root/pig/tests/files/papers_domain_words.cdx
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-05-08 06:21:29 +0000
committerBryan Newbold <bnewbold@archive.org>2018-05-08 06:21:29 +0000
commite566ee1b4e134bfc06284cf77d8d1370df30d53f (patch)
treef3969054cc5f93608b5c72d41541ea381ef89a6b /pig/tests/files/papers_domain_words.cdx
parent0c398392aa298d28694bf5bd37d3e4912de8a2f5 (diff)
parent65b7d45852af3de557eaaf200471ff9b1a211970 (diff)
downloadsandcrawler-e566ee1b4e134bfc06284cf77d8d1370df30d53f.tar.gz
sandcrawler-e566ee1b4e134bfc06284cf77d8d1370df30d53f.zip
Merge branch 'master' of git.archive.org:webgroup/sandcrawler
Diffstat (limited to 'pig/tests/files/papers_domain_words.cdx')
-rw-r--r--pig/tests/files/papers_domain_words.cdx11
1 files changed, 11 insertions, 0 deletions
diff --git a/pig/tests/files/papers_domain_words.cdx b/pig/tests/files/papers_domain_words.cdx
new file mode 100644
index 0000000..48e2313
--- /dev/null
+++ b/pig/tests/files/papers_domain_words.cdx
@@ -0,0 +1,11 @@
+#http://research.fit.edu/sealevelriselibrary/documents/doc_mgr/448/Florida_Keys_Low_Island_Biodiversity_&_SLR_-_Ross_et_al_2009.pdf
+#http://ijs.sgmjournals.org:80/cgi/reprint/54/6/2217.pdf
+#http://eprints.ecs.soton.ac.uk/12020/1/mind-the-semantic-gap.pdf
+#http://eprint.uq.edu.au/archive/00004120/01/R103_Forrester_pp.pdf
+
+# should match 4:
+
+edu,fit,research)/sealevelriselibrary/documents/doc_mgr/448/Florida_Keys_Low_Island_Biodiversity_&_SLR_-_Ross_et_al_2009.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+org,sgmjournals,ijs)//cgi/reprint/54/6/2217.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+uk,ac,soton,ecs,eprints)/12020/1/mind-the-semantic-gap.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+au,edu,uq,eprint)/archive/00004120/01/R103_Forrester_pp.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz