From 172c426c4aa3fc3722813e32c08ee557c9b9d0cd Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 26 Dec 2019 21:35:36 -0800 Subject: update job log with pig runs --- notes/job_log.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'notes') diff --git a/notes/job_log.txt b/notes/job_log.txt index 68bef9b..67623ec 100644 --- a/notes/job_log.txt +++ b/notes/job_log.txt @@ -173,3 +173,13 @@ extract_chunk.sh: touch $1.SUCCESS seems to be working better! tested and if there is a problem with one chunk the others continue + +## Pig Joins (around 2019-12-24) + +Partial (as a start): + + pig -param INPUT_CDX="/user/bnewbold/pdfs/gwb-pdf-20191005172329" -param INPUT_DIGEST="/user/bnewbold/scihash/shadow.20191222.sha1b32.sorted" -param OUTPUT="/user/bnewbold/scihash/gwb-pdf-20191005172329.shadow.20191222.join.cdx" join-cdx-sha1.pig + +Full GWB: + + pig -param INPUT_CDX="/user/bnewbold/pdfs/gwb-pdf-20191005172329" -param INPUT_DIGEST="/user/bnewbold/scihash/shadow.20191222.sha1b32.sorted" -param OUTPUT="/user/bnewbold/scihash/gwb-pdf-20191005172329.shadow.20191222.join.cdx" join-cdx-sha1.pig -- cgit v1.2.3