From 350a4e64aa60896391c1040d958b6b039ea3a79f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 15 Oct 2021 17:14:43 -0700 Subject: sql fileset ingest table iteration --- sql/migrations/2019-12-19-060141_init/up.sql | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql index f312b6f..e478616 100644 --- a/sql/migrations/2019-12-19-060141_init/up.sql +++ b/sql/migrations/2019-12-19-060141_init/up.sql @@ -165,25 +165,24 @@ CREATE TABLE IF NOT EXISTS ingest_file_result ( CREATE INDEX ingest_file_result_terminal_url_idx ON ingest_file_result(terminal_url); CREATE INDEX ingest_file_result_terminal_sha1hex_idx ON ingest_file_result(terminal_sha1hex); -CREATE TABLE IF NOT EXISTS ingest_fileset_result ( +CREATE TABLE IF NOT EXISTS ingest_fileset_platform ( ingest_type TEXT NOT NULL CHECK (octet_length(ingest_type) >= 1), base_url TEXT NOT NULL CHECK (octet_length(base_url) >= 1), updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, hit BOOLEAN NOT NULL, status TEXT CHECK (octet_length(status) >= 1), - terminal_url TEXT CHECK (octet_length(terminal_url) >= 1), - terminal_dt TEXT CHECK (octet_length(terminal_dt) = 14), - terminal_status_code INT, - terminal_sha1hex TEXT CHECK (octet_length(terminal_sha1hex) = 40), - - platform TEXT CHECK (octet_length(platform) >= 1), - platform_id TEXT CHECK (octet_length(platform_id) >= 1), + platform_name TEXT NOT NULL CHECK (octet_length(platform) >= 1), + platform_domain TEXT NOT NULL CHECK (octet_length(platform_domain) >= 1), + platform_id TEXT NOT NULL CHECK (octet_length(platform_id) >= 1), ingest_strategy TEXT CHECK (octet_length(ingest_strategy) >= 1), total_size BIGINT, file_count INT, - item_name TEXT CHECK (octet_length(item_name) >= 1), - item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1), + archiveorg_item_name TEXT CHECK (octet_length(item_name) >= 1), + + archiveorg_item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1), + web_bundle_url TEXT CHECK (octet_length(terminal_url) >= 1), + web_bundle_dt TEXT CHECK (octet_length(terminal_dt) = 14), manifest JSONB, -- list, similar to fatcat fileset manifest, plus extra: @@ -194,14 +193,14 @@ CREATE TABLE IF NOT EXISTS ingest_fileset_result ( -- sha1 (str) -- sha256 (str) -- mimetype (str) + -- extra (dict) -- platform_url (str) -- terminal_url (str) -- terminal_dt (str) - -- extra (dict) PRIMARY KEY (ingest_type, base_url) ); -CREATE INDEX ingest_fileset_result_terminal_url_idx ON ingest_fileset_result(terminal_url); +CREATE INDEX ingest_fileset_platform_name_domain_id_idx ON ingest_fileset_platform(platform_name, platform_domain, platform_id); CREATE TABLE IF NOT EXISTS shadow ( shadow_corpus TEXT NOT NULL CHECK (octet_length(shadow_corpus) >= 1), -- cgit v1.2.3