aboutsummaryrefslogtreecommitdiffstats
path: root/sql
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-04 12:52:04 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-15 18:15:20 -0700
commit636ae0e44f6a4bc2e5325cdc8cbf7ae3a1f16d8b (patch)
tree0422323b87cf36b2c8847803af05edc9d9bc09f1 /sql
parent9b47798b2fd69fcf3f318bddc896e6342e7f8580 (diff)
downloadsandcrawler-636ae0e44f6a4bc2e5325cdc8cbf7ae3a1f16d8b.tar.gz
sandcrawler-636ae0e44f6a4bc2e5325cdc8cbf7ae3a1f16d8b.zip
sql: initial ingest fileset table
Diffstat (limited to 'sql')
-rw-r--r--sql/migrations/2019-12-19-060141_init/up.sql38
1 files changed, 38 insertions, 0 deletions
diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql
index e51bfb4..f312b6f 100644
--- a/sql/migrations/2019-12-19-060141_init/up.sql
+++ b/sql/migrations/2019-12-19-060141_init/up.sql
@@ -165,6 +165,44 @@ CREATE TABLE IF NOT EXISTS ingest_file_result (
CREATE INDEX ingest_file_result_terminal_url_idx ON ingest_file_result(terminal_url);
CREATE INDEX ingest_file_result_terminal_sha1hex_idx ON ingest_file_result(terminal_sha1hex);
+CREATE TABLE IF NOT EXISTS ingest_fileset_result (
+ ingest_type TEXT NOT NULL CHECK (octet_length(ingest_type) >= 1),
+ base_url TEXT NOT NULL CHECK (octet_length(base_url) >= 1),
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ hit BOOLEAN NOT NULL,
+ status TEXT CHECK (octet_length(status) >= 1),
+
+ terminal_url TEXT CHECK (octet_length(terminal_url) >= 1),
+ terminal_dt TEXT CHECK (octet_length(terminal_dt) = 14),
+ terminal_status_code INT,
+ terminal_sha1hex TEXT CHECK (octet_length(terminal_sha1hex) = 40),
+
+ platform TEXT CHECK (octet_length(platform) >= 1),
+ platform_id TEXT CHECK (octet_length(platform_id) >= 1),
+ ingest_strategy TEXT CHECK (octet_length(ingest_strategy) >= 1),
+ total_size BIGINT,
+ file_count INT,
+ item_name TEXT CHECK (octet_length(item_name) >= 1),
+ item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1),
+
+ manifest JSONB,
+ -- list, similar to fatcat fileset manifest, plus extra:
+ -- status (str)
+ -- path (str)
+ -- size (int)
+ -- md5 (str)
+ -- sha1 (str)
+ -- sha256 (str)
+ -- mimetype (str)
+ -- platform_url (str)
+ -- terminal_url (str)
+ -- terminal_dt (str)
+ -- extra (dict)
+
+ PRIMARY KEY (ingest_type, base_url)
+);
+CREATE INDEX ingest_fileset_result_terminal_url_idx ON ingest_fileset_result(terminal_url);
+
CREATE TABLE IF NOT EXISTS shadow (
shadow_corpus TEXT NOT NULL CHECK (octet_length(shadow_corpus) >= 1),
shadow_id TEXT NOT NULL CHECK (octet_length(shadow_id) >= 1),