aboutsummaryrefslogtreecommitdiffstats
path: root/pig/tests/files/tarballs.cdx
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-10-17 17:19:34 +0100
committerBryan Newbold <bnewbold@archive.org>2019-10-17 17:19:34 +0100
commit54dabe601eaa19d0495d9a102b34e9daa056457d (patch)
tree392e3ba4fa6a6c9d4fdda2de0e7b4656ead18f83 /pig/tests/files/tarballs.cdx
parent04e1ae4f903af98ef174be9110aaae5e1ab81360 (diff)
downloadsandcrawler-54dabe601eaa19d0495d9a102b34e9daa056457d.tar.gz
sandcrawler-54dabe601eaa19d0495d9a102b34e9daa056457d.zip
new/additional GWB CDX filter scripts
Diffstat (limited to 'pig/tests/files/tarballs.cdx')
-rw-r--r--pig/tests/files/tarballs.cdx10
1 files changed, 10 insertions, 0 deletions
diff --git a/pig/tests/files/tarballs.cdx b/pig/tests/files/tarballs.cdx
new file mode 100644
index 0000000..7a81b79
--- /dev/null
+++ b/pig/tests/files/tarballs.cdx
@@ -0,0 +1,10 @@
+#http://research.fit.edu/sealevelriselibrary/documents/doc_mgr/448/Florida_Keys_Low_Island_Biodiversity_&_SLR_-_Ross_et_al_2009.pdf
+#http://ijs.sgmjournals.org:80/cgi/reprint/54/6/2217.pdf
+#http://eprints.ecs.soton.ac.uk/12020/1/mind-the-semantic-gap.pdf
+#http://eprint.uq.edu.au/archive/00004120/01/R103_Forrester_pp.pdf
+
+# should match 2:
+
+edu,fit,research)/sealevelriselibrary/documents/doc_mgr/448/Florida_Keys_Low_Island_Biodiversity_&_SLR_-_Ross_et_al_2009.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3I - - 123 456 CRAWL/CRAWL.warc.gz
+edu,fit,research)/sealevelriselibrary/documents/doc_mgr/448/Florida_Keys_Low_Island_Biodiversity_&_SLR_-_Ross_et_al_2009.tar.gz 20170706005950 http://mit.edu/file.tar.gz application/octet-stream 200 NQHD36X5MNZPWFNMD5LFOYZSFGCHUN3I - - 123 456 CRAWL/CRAWL.warc.gz
+org,sgmjournals,ijs)//cgi/reprint/54/6/2217.tar.gz 20170706005950 http://mit.edu/file.tar.gz application/gzip 200 TQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz