diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 17:14:43 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 18:15:29 -0700 |
commit | 350a4e64aa60896391c1040d958b6b039ea3a79f (patch) | |
tree | d8d31cc6e62440fd9742138fd9d76e5b3ab77a71 /sql/migrations | |
parent | 0a6e449317278e95c3c706aaee19ffb9dc00bebc (diff) | |
download | sandcrawler-350a4e64aa60896391c1040d958b6b039ea3a79f.tar.gz sandcrawler-350a4e64aa60896391c1040d958b6b039ea3a79f.zip |
sql fileset ingest table iteration
Diffstat (limited to 'sql/migrations')
-rw-r--r-- | sql/migrations/2019-12-19-060141_init/up.sql | 23 |
1 files changed, 11 insertions, 12 deletions
diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql index f312b6f..e478616 100644 --- a/sql/migrations/2019-12-19-060141_init/up.sql +++ b/sql/migrations/2019-12-19-060141_init/up.sql @@ -165,25 +165,24 @@ CREATE TABLE IF NOT EXISTS ingest_file_result ( CREATE INDEX ingest_file_result_terminal_url_idx ON ingest_file_result(terminal_url); CREATE INDEX ingest_file_result_terminal_sha1hex_idx ON ingest_file_result(terminal_sha1hex); -CREATE TABLE IF NOT EXISTS ingest_fileset_result ( +CREATE TABLE IF NOT EXISTS ingest_fileset_platform ( ingest_type TEXT NOT NULL CHECK (octet_length(ingest_type) >= 1), base_url TEXT NOT NULL CHECK (octet_length(base_url) >= 1), updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, hit BOOLEAN NOT NULL, status TEXT CHECK (octet_length(status) >= 1), - terminal_url TEXT CHECK (octet_length(terminal_url) >= 1), - terminal_dt TEXT CHECK (octet_length(terminal_dt) = 14), - terminal_status_code INT, - terminal_sha1hex TEXT CHECK (octet_length(terminal_sha1hex) = 40), - - platform TEXT CHECK (octet_length(platform) >= 1), - platform_id TEXT CHECK (octet_length(platform_id) >= 1), + platform_name TEXT NOT NULL CHECK (octet_length(platform) >= 1), + platform_domain TEXT NOT NULL CHECK (octet_length(platform_domain) >= 1), + platform_id TEXT NOT NULL CHECK (octet_length(platform_id) >= 1), ingest_strategy TEXT CHECK (octet_length(ingest_strategy) >= 1), total_size BIGINT, file_count INT, - item_name TEXT CHECK (octet_length(item_name) >= 1), - item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1), + archiveorg_item_name TEXT CHECK (octet_length(item_name) >= 1), + + archiveorg_item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1), + web_bundle_url TEXT CHECK (octet_length(terminal_url) >= 1), + web_bundle_dt TEXT CHECK (octet_length(terminal_dt) = 14), manifest JSONB, -- list, similar to fatcat fileset manifest, plus extra: @@ -194,14 +193,14 @@ CREATE TABLE IF NOT EXISTS ingest_fileset_result ( -- sha1 (str) -- sha256 (str) -- mimetype (str) + -- extra (dict) -- platform_url (str) -- terminal_url (str) -- terminal_dt (str) - -- extra (dict) PRIMARY KEY (ingest_type, base_url) ); -CREATE INDEX ingest_fileset_result_terminal_url_idx ON ingest_fileset_result(terminal_url); +CREATE INDEX ingest_fileset_platform_name_domain_id_idx ON ingest_fileset_platform(platform_name, platform_domain, platform_id); CREATE TABLE IF NOT EXISTS shadow ( shadow_corpus TEXT NOT NULL CHECK (octet_length(shadow_corpus) >= 1), |