aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--postgresql/sandcrawler_schema.sql14
1 files changed, 7 insertions, 7 deletions
diff --git a/postgresql/sandcrawler_schema.sql b/postgresql/sandcrawler_schema.sql
index 9e5651d..5365ff3 100644
--- a/postgresql/sandcrawler_schema.sql
+++ b/postgresql/sandcrawler_schema.sql
@@ -1,14 +1,14 @@
CREATE TABLE cdx (
- id BIGSERIAL PRIMARY KEY,
+ url TEXT NOT NULL CHECK (octet_length(url) >= 1),
+ datetime TEXT NOT NULL CHECK (octet_length(datetime) = 14),
sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),
cdx_sha1hex TEXT CHECK (octet_length(cdx_sha1hex) = 40),
- url TEXT NOT NULL CHECK (octet_length(url) >= 1),
- datetime TIMESTAMP WITH TIME ZONE NOT NULL,
mimetype TEXT CHECK (octet_length(mimetype) >= 1),
warc_path TEXT CHECK (octet_length(warc_path) >= 1),
warc_offset BIGINT,
- row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL
+ row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ PRIMARY KEY(url, datetime)
);
CREATE INDEX cdx_sha1hex_idx ON cdx(sha1hex);
CREATE INDEX cdx_row_created_idx ON cdx(row_created);
@@ -28,10 +28,10 @@ CREATE TABLE fatcat_file (
);
CREATE TABLE petabox (
- id BIGSERIAL PRIMARY KEY,
- sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),
item TEXT NOT NULL CHECK (octet_length(item) >= 1),
- path TEXT NOT NULL CHECK (octet_length(path) >= 1)
+ path TEXT NOT NULL CHECK (octet_length(path) >= 1),
+ sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),
+ PRIMARY KEY(item, path)
);
CREATE INDEX petabox_sha1hex_idx ON petabox(sha1hex);