diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-08-09 16:57:52 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-08-09 16:57:52 -0700 |
commit | db711e1ac9e4b8ba8c9d5229bf310e89ffc27a47 (patch) | |
tree | d977dfee48cfd947aec3c424733aebe74646d25e /postgresql/sandcrawler_schema.sql | |
parent | 9e4657d49dd91f1249042865505d1a9ea8ad2ea6 (diff) | |
download | sandcrawler-db711e1ac9e4b8ba8c9d5229bf310e89ffc27a47.tar.gz sandcrawler-db711e1ac9e4b8ba8c9d5229bf310e89ffc27a47.zip |
move postgres/rest directory
Diffstat (limited to 'postgresql/sandcrawler_schema.sql')
-rw-r--r-- | postgresql/sandcrawler_schema.sql | 58 |
1 files changed, 0 insertions, 58 deletions
diff --git a/postgresql/sandcrawler_schema.sql b/postgresql/sandcrawler_schema.sql deleted file mode 100644 index 7460cdc..0000000 --- a/postgresql/sandcrawler_schema.sql +++ /dev/null @@ -1,58 +0,0 @@ - -CREATE TABLE IF NOT EXISTS cdx ( - url TEXT NOT NULL CHECK (octet_length(url) >= 1), - datetime TEXT NOT NULL CHECK (octet_length(datetime) = 14), - sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40), - cdx_sha1hex TEXT CHECK (octet_length(cdx_sha1hex) = 40), - mimetype TEXT CHECK (octet_length(mimetype) >= 1), - warc_path TEXT CHECK (octet_length(warc_path) >= 1), - warc_csize BIGINT, - warc_offset BIGINT, - row_created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, - PRIMARY KEY(url, datetime) -); -CREATE INDEX IF NOT EXISTS cdx_sha1hex_idx ON cdx(sha1hex); -CREATE INDEX IF NOT EXISTS cdx_row_created_idx ON cdx(row_created); - -CREATE TABLE IF NOT EXISTS file_meta ( - sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40), - sha256hex TEXT CHECK (octet_length(sha256hex) = 64), - md5hex TEXT CHECK (octet_length(md5hex) = 32), - size_bytes BIGINT, - mimetype TEXT CHECK (octet_length(mimetype) >= 1) -); - -CREATE TABLE IF NOT EXISTS fatcat_file ( - sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40), - file_ident TEXT CHECK (octet_length(file_ident) = 26), - first_release_ident TEXT CHECK (octet_length(first_release_ident) = 26) -); - -CREATE TABLE IF NOT EXISTS petabox ( - item TEXT NOT NULL CHECK (octet_length(item) >= 1), - path TEXT NOT NULL CHECK (octet_length(path) >= 1), - sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40), - PRIMARY KEY(item, path) -); -CREATE INDEX petabox_sha1hex_idx ON petabox(sha1hex); - -CREATE TABLE IF NOT EXISTS grobid ( - sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40), - updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, - grobid_version TEXT CHECK (octet_length(grobid_version) >= 1), - status_code INT NOT NULL, - status TEXT CHECK (octet_length(status) >= 1), - fatcat_release TEXT CHECK (octet_length(fatcat_release) = 26), - metadata JSONB -); --- CREATE INDEX grobid_fatcat_release_idx ON grobid(fatcat_release); - -CREATE TABLE IF NOT EXISTS shadow ( - sha1hex TEXT NOT NULL CHECK (octet_length(sha1hex) = 40), - shadow_corpus TEXT NOT NULL CHECK (octet_length(shadow_corpus) >= 1), - shadow_id TEXT CHECK (octet_length(shadow_id) >= 1), - doi TEXT CHECK (octet_length(doi) >= 1), - pmid TEXT CHECK (octet_length(pmid) >= 1), - isbn13 TEXT CHECK (octet_length(isbn13) >= 1), - PRIMARY KEY(sha1hex, shadow_corpus) -); |