aboutsummaryrefslogtreecommitdiffstats
path: root/postgresql/sandcrawler_schema.sql
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-08-08 16:43:42 -0700
committerBryan Newbold <bnewbold@archive.org>2019-08-08 16:43:42 -0700
commit5f6bbd4899069b9ae6ba20402842e74f594be060 (patch)
treebb1490b9d8b10113cb8f2df7275d93e414dea458 /postgresql/sandcrawler_schema.sql
parent51e73fa019577bb3b5443274767252c748d5773a (diff)
downloadsandcrawler-5f6bbd4899069b9ae6ba20402842e74f594be060.tar.gz
sandcrawler-5f6bbd4899069b9ae6ba20402842e74f594be060.zip
sandcrawler SQL schema more idempotent-ish
Diffstat (limited to 'postgresql/sandcrawler_schema.sql')
-rw-r--r--postgresql/sandcrawler_schema.sql16
1 files changed, 8 insertions, 8 deletions
diff --git a/postgresql/sandcrawler_schema.sql b/postgresql/sandcrawler_schema.sql
index 5365ff3..abd6daf 100644
--- a/postgresql/sandcrawler_schema.sql
+++ b/postgresql/sandcrawler_schema.sql
@@ -1,5 +1,5 @@
-CREATE TABLE cdx (
+CREATE TABLE IF NOT EXISTS cdx (
url TEXT NOT NULL CHECK (octet_length(url) >= 1),
datetime TEXT NOT NULL CHECK (octet_length(datetime) = 14),
sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),
@@ -10,10 +10,10 @@ CREATE TABLE cdx (
row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
PRIMARY KEY(url, datetime)
);
-CREATE INDEX cdx_sha1hex_idx ON cdx(sha1hex);
-CREATE INDEX cdx_row_created_idx ON cdx(row_created);
+CREATE INDEX IF NOT EXISTS cdx_sha1hex_idx ON cdx(sha1hex);
+CREATE INDEX IF NOT EXISTS cdx_row_created_idx ON cdx(row_created);
-CREATE TABLE file_meta (
+CREATE TABLE IF NOT EXISTS file_meta (
sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40),
sha256hex TEXT CHECK (octet_length(sha256hex) = 64),
md5hex TEXT CHECK (octet_length(md5hex) = 32),
@@ -21,13 +21,13 @@ CREATE TABLE file_meta (
mimetype TEXT CHECK (octet_length(mimetype) >= 1)
);
-CREATE TABLE fatcat_file (
+CREATE TABLE IF NOT EXISTS fatcat_file (
sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40),
file_ident TEXT CHECK (octet_length(file_ident) = 26),
first_release_ident TEXT CHECK (octet_length(first_release_ident) = 26)
);
-CREATE TABLE petabox (
+CREATE TABLE IF NOT EXISTS petabox (
item TEXT NOT NULL CHECK (octet_length(item) >= 1),
path TEXT NOT NULL CHECK (octet_length(path) >= 1),
sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),
@@ -35,7 +35,7 @@ CREATE TABLE petabox (
);
CREATE INDEX petabox_sha1hex_idx ON petabox(sha1hex);
-CREATE TABLE grobid (
+CREATE TABLE IF NOT EXISTS grobid (
sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40),
updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
grobid_version TEXT NOT NULL CHECK (octet_length(grobid_version) >= 1),
@@ -46,7 +46,7 @@ CREATE TABLE grobid (
);
-- CREATE INDEX grobid_fatcat_release_idx ON grobid(fatcat_release);
-CREATE TABLE shadow (
+CREATE TABLE IF NOT EXISTS shadow (
sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40),
shadow_corpus TEXT NOT NULL CHECK (octet_length(shadow_corpus) >= 1),
shadow_id TEXT CHECK (octet_length(shadow_id) >= 1),