diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-05-08 06:21:29 +0000 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-05-08 06:21:29 +0000 |
commit | e566ee1b4e134bfc06284cf77d8d1370df30d53f (patch) | |
tree | f3969054cc5f93608b5c72d41541ea381ef89a6b /pig/tests/files/papers_edu_tilde.cdx | |
parent | 0c398392aa298d28694bf5bd37d3e4912de8a2f5 (diff) | |
parent | 65b7d45852af3de557eaaf200471ff9b1a211970 (diff) | |
download | sandcrawler-e566ee1b4e134bfc06284cf77d8d1370df30d53f.tar.gz sandcrawler-e566ee1b4e134bfc06284cf77d8d1370df30d53f.zip |
Merge branch 'master' of git.archive.org:webgroup/sandcrawler
Diffstat (limited to 'pig/tests/files/papers_edu_tilde.cdx')
-rw-r--r-- | pig/tests/files/papers_edu_tilde.cdx | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/pig/tests/files/papers_edu_tilde.cdx b/pig/tests/files/papers_edu_tilde.cdx new file mode 100644 index 0000000..47ca069 --- /dev/null +++ b/pig/tests/files/papers_edu_tilde.cdx @@ -0,0 +1,15 @@ +#http://www.stanford.edu:80/~johntayl/Papers/taylor2.pdf +#http://met.nps.edu/~mtmontgo/papers/isabel_part2.pdf +#http://www.pitt.edu:80/~druzdzel/psfiles/ecai06.pdf +#http://www.comp.hkbu.edu.hk/~ymc/papers/conference/ijcnn03_710.pdf + +# should be 6 matches: +hk,edu,hkbu,comp)/~ymc/papers/conference/ijcnn03_710.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +edu,stanford,www)/~johntayl/Papers/taylor2.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +edu,nps,met)/~mtmontgo/papers/isabel_part2.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +edu,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +jp,ac,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz +co,edu,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz + +# NOT: +com,corp,edu,pitt,www)/~druzdzel/psfiles/ecai06.pdf 20170706005950 http://mit.edu/file.pdf application/pdf 200 MQHD36X5MNZPWFNMD5LFOYZSFGCHUN3V - - 123 456 CRAWL/CRAWL.warc.gz |