diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-08-09 16:50:48 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-08-09 16:50:48 -0700 |
commit | 9944c674e9ded47431d76d06e60a65eebd510980 (patch) | |
tree | b8bae25f9f2d3a80b326e27ca296194ec728e61a /postgresql | |
parent | 5f6bbd4899069b9ae6ba20402842e74f594be060 (diff) | |
download | sandcrawler-9944c674e9ded47431d76d06e60a65eebd510980.tar.gz sandcrawler-9944c674e9ded47431d76d06e60a65eebd510980.zip |
more tweaks to sql schema
Diffstat (limited to 'postgresql')
-rw-r--r-- | postgresql/sandcrawler_schema.sql | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/postgresql/sandcrawler_schema.sql b/postgresql/sandcrawler_schema.sql index abd6daf..7460cdc 100644 --- a/postgresql/sandcrawler_schema.sql +++ b/postgresql/sandcrawler_schema.sql @@ -6,6 +6,7 @@ CREATE TABLE IF NOT EXISTS cdx ( cdx_sha1hex TEXT CHECK (octet_length(cdx_sha1hex) = 40), mimetype TEXT CHECK (octet_length(mimetype) >= 1), warc_path TEXT CHECK (octet_length(warc_path) >= 1), + warc_csize BIGINT, warc_offset BIGINT, row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, PRIMARY KEY(url, datetime) @@ -38,7 +39,7 @@ CREATE INDEX petabox_sha1hex_idx ON petabox(sha1hex); CREATE TABLE IF NOT EXISTS grobid ( sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40), updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, - grobid_version TEXT NOT NULL CHECK (octet_length(grobid_version) >= 1), + grobid_version TEXT CHECK (octet_length(grobid_version) >= 1), status_code INT NOT NULL, status TEXT CHECK (octet_length(status) >= 1), fatcat_release TEXT CHECK (octet_length(fatcat_release) = 26), |