aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-15 17:14:43 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-15 18:15:29 -0700
commit350a4e64aa60896391c1040d958b6b039ea3a79f (patch)
treed8d31cc6e62440fd9742138fd9d76e5b3ab77a71
parent0a6e449317278e95c3c706aaee19ffb9dc00bebc (diff)
downloadsandcrawler-350a4e64aa60896391c1040d958b6b039ea3a79f.tar.gz
sandcrawler-350a4e64aa60896391c1040d958b6b039ea3a79f.zip
sql fileset ingest table iteration
-rw-r--r--sql/migrations/2019-12-19-060141_init/up.sql23
1 files changed, 11 insertions, 12 deletions
diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql
index f312b6f..e478616 100644
--- a/sql/migrations/2019-12-19-060141_init/up.sql
+++ b/sql/migrations/2019-12-19-060141_init/up.sql
@@ -165,25 +165,24 @@ CREATE TABLE IF NOT EXISTS ingest_file_result (
CREATE INDEX ingest_file_result_terminal_url_idx ON ingest_file_result(terminal_url);
CREATE INDEX ingest_file_result_terminal_sha1hex_idx ON ingest_file_result(terminal_sha1hex);
-CREATE TABLE IF NOT EXISTS ingest_fileset_result (
+CREATE TABLE IF NOT EXISTS ingest_fileset_platform (
ingest_type TEXT NOT NULL CHECK (octet_length(ingest_type) >= 1),
base_url TEXT NOT NULL CHECK (octet_length(base_url) >= 1),
updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
hit BOOLEAN NOT NULL,
status TEXT CHECK (octet_length(status) >= 1),
- terminal_url TEXT CHECK (octet_length(terminal_url) >= 1),
- terminal_dt TEXT CHECK (octet_length(terminal_dt) = 14),
- terminal_status_code INT,
- terminal_sha1hex TEXT CHECK (octet_length(terminal_sha1hex) = 40),
-
- platform TEXT CHECK (octet_length(platform) >= 1),
- platform_id TEXT CHECK (octet_length(platform_id) >= 1),
+ platform_name TEXT NOT NULL CHECK (octet_length(platform) >= 1),
+ platform_domain TEXT NOT NULL CHECK (octet_length(platform_domain) >= 1),
+ platform_id TEXT NOT NULL CHECK (octet_length(platform_id) >= 1),
ingest_strategy TEXT CHECK (octet_length(ingest_strategy) >= 1),
total_size BIGINT,
file_count INT,
- item_name TEXT CHECK (octet_length(item_name) >= 1),
- item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1),
+ archiveorg_item_name TEXT CHECK (octet_length(item_name) >= 1),
+
+ archiveorg_item_bundle_path TEXT CHECK (octet_length(item_path_bundle) >= 1),
+ web_bundle_url TEXT CHECK (octet_length(terminal_url) >= 1),
+ web_bundle_dt TEXT CHECK (octet_length(terminal_dt) = 14),
manifest JSONB,
-- list, similar to fatcat fileset manifest, plus extra:
@@ -194,14 +193,14 @@ CREATE TABLE IF NOT EXISTS ingest_fileset_result (
-- sha1 (str)
-- sha256 (str)
-- mimetype (str)
+ -- extra (dict)
-- platform_url (str)
-- terminal_url (str)
-- terminal_dt (str)
- -- extra (dict)
PRIMARY KEY (ingest_type, base_url)
);
-CREATE INDEX ingest_fileset_result_terminal_url_idx ON ingest_fileset_result(terminal_url);
+CREATE INDEX ingest_fileset_platform_name_domain_id_idx ON ingest_fileset_platform(platform_name, platform_domain, platform_id);
CREATE TABLE IF NOT EXISTS shadow (
shadow_corpus TEXT NOT NULL CHECK (octet_length(shadow_corpus) >= 1),