diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-08-08 14:32:43 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-08-08 14:32:43 -0700 |
commit | 48a802d42cff309543466a9f23245aa93c6d84ea (patch) | |
tree | f60138068d9745fc53b2b913e6760bad314aa20a | |
parent | 2a1e99c7fd000d4f49dff06361c5214f08f0d03f (diff) | |
download | sandcrawler-48a802d42cff309543466a9f23245aa93c6d84ea.tar.gz sandcrawler-48a802d42cff309543466a9f23245aa93c6d84ea.zip |
update sandcrawler_schema.sql
-rw-r--r-- | postgresql/sandcrawler_schema.sql | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/postgresql/sandcrawler_schema.sql b/postgresql/sandcrawler_schema.sql index 9e5651d..5365ff3 100644 --- a/postgresql/sandcrawler_schema.sql +++ b/postgresql/sandcrawler_schema.sql @@ -1,14 +1,14 @@ CREATE TABLE cdx ( - id BIGSERIAL PRIMARY KEY, + url TEXT NOT NULL CHECK (octet_length(url) >= 1), + datetime TEXT NOT NULL CHECK (octet_length(datetime) = 14), sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40), cdx_sha1hex TEXT CHECK (octet_length(cdx_sha1hex) = 40), - url TEXT NOT NULL CHECK (octet_length(url) >= 1), - datetime TIMESTAMP WITH TIME ZONE NOT NULL, mimetype TEXT CHECK (octet_length(mimetype) >= 1), warc_path TEXT CHECK (octet_length(warc_path) >= 1), warc_offset BIGINT, - row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL + row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, + PRIMARY KEY(url, datetime) ); CREATE INDEX cdx_sha1hex_idx ON cdx(sha1hex); CREATE INDEX cdx_row_created_idx ON cdx(row_created); @@ -28,10 +28,10 @@ CREATE TABLE fatcat_file ( ); CREATE TABLE petabox ( - id BIGSERIAL PRIMARY KEY, - sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40), item TEXT NOT NULL CHECK (octet_length(item) >= 1), - path TEXT NOT NULL CHECK (octet_length(path) >= 1) + path TEXT NOT NULL CHECK (octet_length(path) >= 1), + sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40), + PRIMARY KEY(item, path) ); CREATE INDEX petabox_sha1hex_idx ON petabox(sha1hex); |