aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-03 11:31:38 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-03 11:31:41 -0800
commit55815b2e62a3ce53d5e71d0c6fd676b6cbf5baca (patch)
tree077f36599337175c50d3e943e7a6efefc19bc44a
parent40e2e20378fb06e43cc93f67427f865a0de0a692 (diff)
downloadsandcrawler-55815b2e62a3ce53d5e71d0c6fd676b6cbf5baca.tar.gz
sandcrawler-55815b2e62a3ce53d5e71d0c6fd676b6cbf5baca.zip
SQL: unmatched glutton query (old)
-rw-r--r--sql/dump_unmatched_glutton_pdf.sql19
1 files changed, 19 insertions, 0 deletions
diff --git a/sql/dump_unmatched_glutton_pdf.sql b/sql/dump_unmatched_glutton_pdf.sql
new file mode 100644
index 0000000..d089c7e
--- /dev/null
+++ b/sql/dump_unmatched_glutton_pdf.sql
@@ -0,0 +1,19 @@
+
+-- Run like:
+-- psql sandcrawler < THING.sql > THING.2019-09-23.json
+
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE;
+
+COPY (
+ SELECT row_to_json(grobid)
+ FROM grobid
+ LEFT JOIN fatcat_file ON grobid.sha1hex = fatcat_file.sha1hex
+ WHERE fatcat_file.sha1hex IS NULL
+ AND grobid.fatcat_release IS NOT NULL
+ LIMIT 1000
+)
+TO '/grande/snapshots/dump_unmatched_glutton_pdf.2020-06-30.json';
+--TO STDOUT
+--WITH NULL '';
+
+ROLLBACK;