aboutsummaryrefslogtreecommitdiffstats
path: root/pig/filter-cdx-ps.pig
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-10-17 17:19:34 +0100
committerBryan Newbold <bnewbold@archive.org>2019-10-17 17:19:34 +0100
commit54dabe601eaa19d0495d9a102b34e9daa056457d (patch)
tree392e3ba4fa6a6c9d4fdda2de0e7b4656ead18f83 /pig/filter-cdx-ps.pig
parent04e1ae4f903af98ef174be9110aaae5e1ab81360 (diff)
downloadsandcrawler-54dabe601eaa19d0495d9a102b34e9daa056457d.tar.gz
sandcrawler-54dabe601eaa19d0495d9a102b34e9daa056457d.zip
new/additional GWB CDX filter scripts
Diffstat (limited to 'pig/filter-cdx-ps.pig')
-rw-r--r--pig/filter-cdx-ps.pig6
1 files changed, 6 insertions, 0 deletions
diff --git a/pig/filter-cdx-ps.pig b/pig/filter-cdx-ps.pig
index 6e80acc..b27a547 100644
--- a/pig/filter-cdx-ps.pig
+++ b/pig/filter-cdx-ps.pig
@@ -1,3 +1,9 @@
+-- Tries to filter down a large CDX file (GWB index) to a subset of postscript
+-- files, by mimetype.
+--
+-- Author: Bryan Newbold <bnewbold@archive.org>
+-- Date: May 2018
+
%default INPUT ''
%default OUTPUT ''