diff options
-rw-r--r-- | pig/filter-cdx-join-urls.pig | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/pig/filter-cdx-join-urls.pig b/pig/filter-cdx-join-urls.pig index b396c82..70858b9 100644 --- a/pig/filter-cdx-join-urls.pig +++ b/pig/filter-cdx-join-urls.pig @@ -18,7 +18,7 @@ surts = FOREACH urls GENERATE SURTURL(url) AS url_surt; surts = ORDER surts by url_surt ASC PARALLEL 10; surts = DISTINCT surts; -cdx = LOAD '$INPUT' AS cdxline:chararray; +cdx = LOAD '$INPUT_CDX' AS cdxline:chararray; cdx = FILTER cdx BY not STARTSWITH (cdxline, 'filedesc'); cdx = FILTER cdx BY not STARTSWITH (cdxline, ' '); |