aboutsummaryrefslogtreecommitdiffstats
path: root/pig/tests/files/papers_edu_tilde.cdx
diff options
context:
space:
mode:
Diffstat (limited to 'pig/tests/files/papers_edu_tilde.cdx')
-rw-r--r--pig/tests/files/papers_edu_tilde.cdx15
1 files changed, 15 insertions, 0 deletions
diff --git a/pig/tests/files/papers_edu_tilde.cdx b/pig/tests/files/papers_edu_tilde.cdx
new file mode 100644
index 0000000..47ca069
--- /dev/null
+++ b/pig/tests/files/papers_edu_tilde.cdx
@@ -0,0 +1,15 @@
+#http://www.stanford.edu:80/~johntayl/Papers/taylor2.pdf
+#http://met.nps.edu/~mtmontgo/papers/isabel_part2.pdf
+#http://www.pitt.edu:80/~druzdzel/psfiles/ecai06.pdf
+#http://www.comp.hkbu.edu.hk/~ymc/papers/conference/ijcnn03_710.pdf
+
+# should be 6 matches:
+hk,edu,hkbu,comp)/~ymc/papers/conference/ijcnn03_710.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+edu,stanford,www)/~johntayl/Papers/taylor2.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+edu,nps,met)/~mtmontgo/papers/isabel_part2.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+edu,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+jp,ac,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+co,edu,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz
+
+# NOT:
+com,corp,edu,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz