diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-04 12:52:04 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 18:15:20 -0700 |
commit | 636ae0e44f6a4bc2e5325cdc8cbf7ae3a1f16d8b (patch) | |
tree | 0422323b87cf36b2c8847803af05edc9d9bc09f1 | |
parent | 9b47798b2fd69fcf3f318bddc896e6342e7f8580 (diff) | |
download | sandcrawler-636ae0e44f6a4bc2e5325cdc8cbf7ae3a1f16d8b.tar.gz sandcrawler-636ae0e44f6a4bc2e5325cdc8cbf7ae3a1f16d8b.zip |
sql: initial ingest fileset table
-rw-r--r-- | sql/migrations/2019-12-19-060141_init/up.sql | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql index e51bfb4..f312b6f 100644 --- a/sql/migrations/2019-12-19-060141_init/up.sql +++ b/sql/migrations/2019-12-19-060141_init/up.sql @@ -165,6 +165,44 @@ CREATE TABLE IF NOT EXISTS ingest_file_result ( CREATE INDEX ingest_file_result_terminal_url_idx ON ingest_file_result(terminal_url); CREATE INDEX ingest_file_result_terminal_sha1hex_idx ON ingest_file_result(terminal_sha1hex); +CREATE TABLE IF NOT EXISTS ingest_fileset_result ( + ingest_type TEXT NOT NULL CHECK (octet_length(ingest_type) >= 1), + base_url TEXT NOT NULL CHECK (octet_length(base_url) >= 1), + updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, + hit BOOLEAN NOT NULL, + status TEXT CHECK (octet_length(status) >= 1), + + terminal_url TEXT CHECK (octet_length(terminal_url) >= 1), + terminal_dt TEXT CHECK (octet_length(terminal_dt) = 14), + terminal_status_code INT, + terminal_sha1hex TEXT CHECK (octet_length(terminal_sha1hex) = 40), + + platform TEXT CHECK (octet_length(platform) >= 1), + platform_id TEXT CHECK (octet_length(platform_id) >= 1), + ingest_strategy TEXT CHECK (octet_length(ingest_strategy) >= 1), + total_size BIGINT, + file_count INT, + item_name TEXT CHECK (octet_length(item_name) >= 1), + item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1), + + manifest JSONB, + -- list, similar to fatcat fileset manifest, plus extra: + -- status (str) + -- path (str) + -- size (int) + -- md5 (str) + -- sha1 (str) + -- sha256 (str) + -- mimetype (str) + -- platform_url (str) + -- terminal_url (str) + -- terminal_dt (str) + -- extra (dict) + + PRIMARY KEY (ingest_type, base_url) +); +CREATE INDEX ingest_fileset_result_terminal_url_idx ON ingest_fileset_result(terminal_url); + CREATE TABLE IF NOT EXISTS shadow ( shadow_corpus TEXT NOT NULL CHECK (octet_length(shadow_corpus) >= 1), shadow_id TEXT NOT NULL CHECK (octet_length(shadow_id) >= 1), |