diff options
Diffstat (limited to 'postgresql')
| -rw-r--r-- | postgresql/sandcrawler_schema.sql | 14 | 
1 files changed, 7 insertions, 7 deletions
| diff --git a/postgresql/sandcrawler_schema.sql b/postgresql/sandcrawler_schema.sql index 9e5651d..5365ff3 100644 --- a/postgresql/sandcrawler_schema.sql +++ b/postgresql/sandcrawler_schema.sql @@ -1,14 +1,14 @@  CREATE TABLE cdx ( -    id                  BIGSERIAL PRIMARY KEY, +    url                 TEXT NOT NULL CHECK (octet_length(url) >= 1), +    datetime            TEXT NOT NULL CHECK (octet_length(datetime) = 14),      sha1hex             TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),      cdx_sha1hex         TEXT CHECK (octet_length(cdx_sha1hex) = 40), -    url                 TEXT NOT NULL CHECK (octet_length(url) >= 1), -    datetime            TIMESTAMP WITH TIME ZONE NOT NULL,      mimetype            TEXT CHECK (octet_length(mimetype) >= 1),      warc_path           TEXT CHECK (octet_length(warc_path) >= 1),      warc_offset         BIGINT, -    row_created         TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL +    row_created         TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, +    PRIMARY KEY(url, datetime)  );  CREATE INDEX cdx_sha1hex_idx ON cdx(sha1hex);  CREATE INDEX cdx_row_created_idx ON cdx(row_created); @@ -28,10 +28,10 @@ CREATE TABLE fatcat_file (  );  CREATE TABLE petabox ( -    id                  BIGSERIAL PRIMARY KEY, -    sha1hex             TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),      item                TEXT NOT NULL CHECK (octet_length(item) >= 1), -    path                TEXT NOT NULL CHECK (octet_length(path) >= 1) +    path                TEXT NOT NULL CHECK (octet_length(path) >= 1), +    sha1hex             TEXT NOT NULL CHECK (octet_length(sha1hex) = 40), +    PRIMARY KEY(item, path)  );  CREATE INDEX petabox_sha1hex_idx ON petabox(sha1hex); | 
