aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-08-09 16:50:48 -0700
committerBryan Newbold <bnewbold@archive.org>2019-08-09 16:50:48 -0700
commit9944c674e9ded47431d76d06e60a65eebd510980 (patch)
treeb8bae25f9f2d3a80b326e27ca296194ec728e61a
parent5f6bbd4899069b9ae6ba20402842e74f594be060 (diff)
downloadsandcrawler-9944c674e9ded47431d76d06e60a65eebd510980.tar.gz
sandcrawler-9944c674e9ded47431d76d06e60a65eebd510980.zip
more tweaks to sql schema
-rw-r--r--postgresql/sandcrawler_schema.sql3
1 files changed, 2 insertions, 1 deletions
diff --git a/postgresql/sandcrawler_schema.sql b/postgresql/sandcrawler_schema.sql
index abd6daf..7460cdc 100644
--- a/postgresql/sandcrawler_schema.sql
+++ b/postgresql/sandcrawler_schema.sql
@@ -6,6 +6,7 @@ CREATE TABLE IF NOT EXISTS cdx (
cdx_sha1hex TEXT CHECK (octet_length(cdx_sha1hex) = 40),
mimetype TEXT CHECK (octet_length(mimetype) >= 1),
warc_path TEXT CHECK (octet_length(warc_path) >= 1),
+ warc_csize BIGINT,
warc_offset BIGINT,
row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
PRIMARY KEY(url, datetime)
@@ -38,7 +39,7 @@ CREATE INDEX petabox_sha1hex_idx ON petabox(sha1hex);
CREATE TABLE IF NOT EXISTS grobid (
sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40),
updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
- grobid_version TEXT NOT NULL CHECK (octet_length(grobid_version) >= 1),
+ grobid_version TEXT CHECK (octet_length(grobid_version) >= 1),
status_code INT NOT NULL,
status TEXT CHECK (octet_length(status) >= 1),
fatcat_release TEXT CHECK (octet_length(fatcat_release) = 26),