summaryrefslogtreecommitdiffstats
path: root/rust/migrations/2019-01-01-000000_init
diff options
context:
space:
mode:
Diffstat (limited to 'rust/migrations/2019-01-01-000000_init')
-rw-r--r--rust/migrations/2019-01-01-000000_init/down.sql48
-rw-r--r--rust/migrations/2019-01-01-000000_init/up.sql663
2 files changed, 711 insertions, 0 deletions
diff --git a/rust/migrations/2019-01-01-000000_init/down.sql b/rust/migrations/2019-01-01-000000_init/down.sql
new file mode 100644
index 00000000..b2666083
--- /dev/null
+++ b/rust/migrations/2019-01-01-000000_init/down.sql
@@ -0,0 +1,48 @@
+
+-- in opposite order as up.sql
+
+DROP TABLE IF EXISTS release_contrib CASCADE;
+DROP TABLE IF EXISTS release_ref CASCADE;
+DROP TABLE IF EXISTS file_rev_release CASCADE;
+DROP TABLE IF EXISTS fileset_rev_release CASCADE;
+DROP TABLE IF EXISTS webcapture_rev_release CASCADE;
+
+DROP TABLE IF EXISTS work_rev CASCADE;
+DROP TABLE IF EXISTS work_ident CASCADE;
+DROP TABLE IF EXISTS work_edit CASCADE;
+
+DROP TABLE IF EXISTS release_rev CASCADE;
+DROP TABLE IF EXISTS release_ident CASCADE;
+DROP TABLE IF EXISTS release_edit CASCADE;
+DROP TABLE IF EXISTS release_rev_abstract CASCADE;
+
+DROP TABLE IF EXISTS file_rev CASCADE;
+DROP TABLE IF EXISTS file_ident CASCADE;
+DROP TABLE IF EXISTS file_edit CASCADE;
+DROP TABLE IF EXISTS file_rev_url CASCADE;
+
+DROP TABLE IF EXISTS fileset_rev CASCADE;
+DROP TABLE IF EXISTS fileset_ident CASCADE;
+DROP TABLE IF EXISTS fileset_edit CASCADE;
+DROP TABLE IF EXISTS fileset_rev_file CASCADE;
+DROP TABLE IF EXISTS fileset_rev_url CASCADE;
+
+DROP TABLE IF EXISTS webcapture_rev CASCADE;
+DROP TABLE IF EXISTS webcapture_ident CASCADE;
+DROP TABLE IF EXISTS webcapture_edit CASCADE;
+DROP TABLE IF EXISTS webcapture_rev_cdx CASCADE;
+DROP TABLE IF EXISTS webcapture_rev_url CASCADE;
+
+DROP TABLE IF EXISTS container_rev CASCADE;
+DROP TABLE IF EXISTS container_ident CASCADE;
+DROP TABLE IF EXISTS container_edit CASCADE;
+
+DROP TABLE IF EXISTS creator_rev CASCADE;
+DROP TABLE IF EXISTS creator_ident CASCADE;
+DROP TABLE IF EXISTS creator_edit CASCADE;
+
+DROP TABLE IF EXISTS abstracts CASCADE;
+DROP TABLE IF EXISTS auth_oidc CASCADE;
+DROP TABLE IF EXISTS editor CASCADE;
+DROP TABLE IF EXISTS editgroup CASCADE;
+DROP TABLE IF EXISTS changelog CASCADE;
diff --git a/rust/migrations/2019-01-01-000000_init/up.sql b/rust/migrations/2019-01-01-000000_init/up.sql
new file mode 100644
index 00000000..ddaa60b3
--- /dev/null
+++ b/rust/migrations/2019-01-01-000000_init/up.sql
@@ -0,0 +1,663 @@
+-- written for Postgres 9.6 with OSSP extension for UUIDs -- ... but actually runs on Postgres 10 in qa/production
+
+-- Previously VARCHAR and fixed-size CHAR was used in this schema for specific
+-- columns (especially fixed-size external identifiers, and hashes). This was
+-- found to cause lookup problems, so switched to TEXT with CHECK constraints.
+
+-- Default timezone (of clients) is expected to be UTC.
+
+CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
+
+
+-- uuid_generate_v1mc: timestamp ordered, random MAC address
+-- uuid_generate_v4: totally random
+
+-- NB: could use LIKE clause, or "composite types"
+
+CREATE TABLE editor (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ username TEXT NOT NULL CHECK (username ~* '^[A-Za-z][A-Za-z0-9._-]{2,24}$'), -- UNIQ below
+ is_superuser BOOLEAN NOT NULL DEFAULT false,
+ is_admin BOOLEAN NOT NULL DEFAULT false,
+ is_bot BOOLEAN NOT NULL DEFAULT false,
+ is_active BOOLEAN NOT NULL DEFAULT true,
+ registered TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ auth_epoch TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ wrangler_id UUID REFERENCES editor(id),
+ active_editgroup_id UUID -- REFERENCES( editgroup(id) via ALTER below
+);
+
+-- case-insensitive UNIQ index on username
+CREATE UNIQUE INDEX editor_username_uniq_idx on editor(lower(username));
+CREATE INDEX active_editgroup_idx ON editor(active_editgroup_id);
+CREATE INDEX editor_username_idx ON editor(username);
+
+CREATE TABLE auth_oidc (
+ id BIGSERIAL PRIMARY KEY,
+ created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ editor_id UUID REFERENCES editor(id) NOT NULL,
+ provider TEXT NOT NULL,
+ oidc_iss TEXT NOT NULL,
+ oidc_sub TEXT NOT NULL,
+ UNIQUE (editor_id, provider),
+ UNIQUE (oidc_iss, oidc_sub)
+);
+
+CREATE TABLE editgroup (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editor_id UUID REFERENCES editor(id) NOT NULL,
+ created TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ extra_json JSONB,
+ description TEXT
+);
+
+ALTER TABLE editor
+ ADD CONSTRAINT editor_editgroupid_fkey FOREIGN KEY (active_editgroup_id)
+ REFERENCES editgroup(id);
+
+CREATE TABLE changelog (
+ id BIGSERIAL PRIMARY KEY,
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ timestamp TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL
+);
+
+-- for "is this editgroup merged" queries
+CREATE INDEX changelog_editgroup_idx ON changelog(editgroup_id);
+
+CREATE TABLE abstracts (
+ -- fixed size hash (in hex). TODO: switch to bytes
+ sha1 TEXT PRIMARY KEY CHECK (octet_length(sha1) = 40),
+ content TEXT NOT NULL
+);
+
+-------------------- Creators -----------------------------------------------
+CREATE TABLE creator_rev (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ extra_json JSONB,
+
+ display_name TEXT NOT NULL,
+ given_name TEXT,
+ surname TEXT,
+ -- fixed size identifier
+ orcid TEXT CHECK(octet_length(orcid) = 19),
+ -- limited size for data quality
+ wikidata_qid TEXT CHECK(octet_length(wikidata_qid) <= 12)
+
+ -- TODO: viaf TEXT,
+ -- TODO: aliases/alternatives
+ -- TODO: sortable name?
+);
+
+-- Could denormalize a "is_live" flag into revision tables, to make indices
+-- more efficient
+CREATE INDEX creator_rev_orcid_idx ON creator_rev(orcid);
+CREATE INDEX creator_rev_wikidata_idx ON creator_rev(wikidata_qid);
+
+CREATE TABLE creator_ident (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ is_live BOOL NOT NULL DEFAULT false,
+ rev_id UUID REFERENCES creator_rev(id),
+ redirect_id UUID REFERENCES creator_ident(id)
+);
+
+CREATE INDEX creator_ident_rev_idx ON creator_ident(rev_id);
+CREATE INDEX creator_ident_redirect_idx ON creator_ident(redirect_id);
+
+CREATE TABLE creator_edit (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ ident_id UUID REFERENCES creator_ident(id) NOT NULL,
+ rev_id UUID REFERENCES creator_rev(id),
+ redirect_id UUID REFERENCES creator_ident(id),
+ prev_rev UUID REFERENCES creator_rev(id),
+ extra_json JSONB,
+ UNIQUE (editgroup_id, ident_id)
+);
+
+-------------------- Containers --------------------------------------------
+CREATE TABLE container_rev (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ extra_json JSONB,
+
+ name TEXT NOT NULL,
+ publisher TEXT,
+ -- fixed size identifier
+ issnl TEXT CHECK(octet_length(issnl) = 9),
+ -- limited size for data quality
+ wikidata_qid TEXT CHECK(octet_length(wikidata_qid) <= 12),
+ abbrev TEXT,
+ -- limited size for data quality
+ coden TEXT CHECK(octet_length(coden) <= 6)
+);
+
+CREATE INDEX container_rev_issnl_idx ON container_rev(issnl);
+CREATE INDEX container_rev_wikidata_idx ON container_rev(wikidata_qid);
+
+CREATE TABLE container_ident (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ is_live BOOL NOT NULL DEFAULT false,
+ rev_id UUID REFERENCES container_rev(id),
+ redirect_id UUID REFERENCES container_ident(id)
+);
+
+CREATE INDEX container_ident_rev_idx ON container_ident(rev_id);
+CREATE INDEX container_ident_redirect_idx ON container_ident(redirect_id);
+
+CREATE TABLE container_edit (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ ident_id UUID REFERENCES container_ident(id) NOT NULL,
+ rev_id UUID REFERENCES container_rev(id),
+ redirect_id UUID REFERENCES container_ident(id),
+ prev_rev UUID REFERENCES container_rev(id),
+ extra_json JSONB,
+ UNIQUE (editgroup_id, ident_id)
+);
+
+-------------------- Files -------------------------------------------------
+CREATE TABLE file_rev (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ extra_json JSONB,
+
+ size_bytes BIGINT,
+ -- fixed size hashes (in hex). TODO: switch to binary type type
+ sha1 TEXT CHECK(octet_length(sha1) = 40),
+ sha256 TEXT CHECK(octet_length(sha256) = 64),
+ md5 TEXT CHECK(octet_length(md5) = 32),
+ mimetype TEXT
+);
+
+CREATE INDEX file_rev_sha1_idx ON file_rev(sha1);
+CREATE INDEX file_rev_md5_idx ON file_rev(md5);
+CREATE INDEX file_rev_sha256_idx ON file_rev(sha256);
+
+CREATE TABLE file_rev_url (
+ id BIGSERIAL PRIMARY KEY,
+ file_rev UUID REFERENCES file_rev(id) NOT NULL,
+ rel TEXT NOT NULL, -- TODO: enum? web, webarchive, repo, etc TODO: default web?
+ url TEXT NOT NULL
+);
+
+CREATE INDEX file_rev_url_rev_idx ON file_rev_url(file_rev);
+
+CREATE TABLE file_ident (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ is_live BOOL NOT NULL DEFAULT false,
+ rev_id UUID REFERENCES file_rev(id),
+ redirect_id UUID REFERENCES file_ident(id)
+);
+
+CREATE INDEX file_ident_rev_idx ON file_ident(rev_id);
+CREATE INDEX file_ident_redirect_idx ON file_ident(redirect_id);
+
+CREATE TABLE file_edit (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ ident_id UUID REFERENCES file_ident(id) NOT NULL,
+ rev_id UUID REFERENCES file_rev(id),
+ redirect_id UUID REFERENCES file_ident(id),
+ prev_rev UUID REFERENCES file_rev(id),
+ extra_json JSONB,
+ UNIQUE (editgroup_id, ident_id)
+);
+
+-------------------- Fileset -----------------------------------------------
+CREATE TABLE fileset_rev (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ extra_json JSONB
+);
+
+CREATE TABLE fileset_rev_url (
+ id BIGSERIAL PRIMARY KEY,
+ fileset_rev UUID REFERENCES fileset_rev(id) NOT NULL,
+ rel TEXT NOT NULL, -- TODO: enum? web, webarchive, repo, etc TODO: default web?
+ url TEXT NOT NULL
+);
+
+CREATE INDEX fileset_rev_url_rev_idx ON fileset_rev_url(fileset_rev);
+
+CREATE TABLE fileset_rev_file (
+ id BIGSERIAL PRIMARY KEY,
+ fileset_rev UUID REFERENCES fileset_rev(id) NOT NULL,
+ path_name TEXT NOT NULL,
+ size_bytes BIGINT NOT NULL,
+ md5 TEXT CHECK(octet_length(md5) = 32),
+ sha1 TEXT CHECK(octet_length(sha1) = 40),
+ sha256 TEXT CHECK(octet_length(sha256) = 64),
+ extra_json JSONB
+);
+
+CREATE INDEX fileset_rev_file_rev_idx ON fileset_rev_file(fileset_rev);
+
+CREATE TABLE fileset_ident (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ is_live BOOL NOT NULL DEFAULT false,
+ rev_id UUID REFERENCES fileset_rev(id),
+ redirect_id UUID REFERENCES fileset_ident(id)
+);
+
+CREATE INDEX fileset_ident_rev_idx ON fileset_ident(rev_id);
+
+CREATE TABLE fileset_edit (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ ident_id UUID REFERENCES fileset_ident(id) NOT NULL,
+ rev_id UUID REFERENCES fileset_rev(id),
+ redirect_id UUID REFERENCES fileset_ident(id),
+ prev_rev UUID REFERENCES fileset_rev(id),
+ extra_json JSONB,
+ UNIQUE (editgroup_id, ident_id)
+);
+
+-------------------- Webcapture -----------------------------------------------
+CREATE TABLE webcapture_rev (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ extra_json JSONB,
+
+ original_url TEXT NOT NULL,
+ timestamp TIMESTAMP WITH TIME ZONE NOT NULL
+);
+
+CREATE TABLE webcapture_rev_url (
+ id BIGSERIAL PRIMARY KEY,
+ webcapture_rev UUID REFERENCES webcapture_rev(id) NOT NULL,
+ rel TEXT NOT NULL, -- TODO: enum? web, webarchive, repo, etc TODO: default web?
+ url TEXT NOT NULL
+);
+
+CREATE INDEX webcapture_rev_url_rev_idx ON webcapture_rev_url(webcapture_rev);
+
+CREATE TABLE webcapture_rev_cdx (
+ id BIGSERIAL PRIMARY KEY,
+ webcapture_rev UUID REFERENCES webcapture_rev(id) NOT NULL,
+ surt TEXT NOT NULL,
+ timestamp TEXT NOT NULL, -- TODO: timestamp type?
+ url TEXT NOT NULL,
+ mimetype TEXT,
+ status_code BIGINT,
+ sha1 TEXT CHECK(octet_length(sha1) = 40) NOT NULL,
+ sha256 TEXT CHECK(octet_length(sha256) = 64)
+ -- could extend with: language (detection), simhash, redirect
+);
+
+CREATE INDEX webcapture_rev_cdx_rev_idx ON webcapture_rev_cdx(webcapture_rev);
+
+CREATE TABLE webcapture_ident (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ is_live BOOL NOT NULL DEFAULT false,
+ rev_id UUID REFERENCES webcapture_rev(id),
+ redirect_id UUID REFERENCES webcapture_ident(id)
+);
+
+CREATE INDEX webcapture_ident_rev_idx ON webcapture_ident(rev_id);
+
+CREATE TABLE webcapture_edit (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ ident_id UUID REFERENCES webcapture_ident(id) NOT NULL,
+ rev_id UUID REFERENCES webcapture_rev(id),
+ redirect_id UUID REFERENCES webcapture_ident(id),
+ prev_rev UUID REFERENCES webcapture_rev(id),
+ extra_json JSONB,
+ UNIQUE (editgroup_id, ident_id)
+);
+
+-------------------- Release -----------------------------------------------
+CREATE TABLE release_rev (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ extra_json JSONB,
+
+ work_ident_id UUID NOT NULL, -- FOREIGN KEY; see ALRTER below
+ container_ident_id UUID REFERENCES container_ident(id),
+ title TEXT NOT NULL,
+ release_type TEXT, -- TODO: enum
+ release_status TEXT, -- TODO: enum
+ release_date DATE,
+ release_year BIGINT,
+ doi TEXT,
+ -- CHECK for length limit for data quality
+ pmid TEXT CHECK(octet_length(pmid) <= 12),
+ pmcid TEXT CHECK(octet_length(pmcid) <= 12),
+ wikidata_qid TEXT CHECK(octet_length(wikidata_qid) <= 12),
+ isbn13 TEXT CHECK(octet_length(isbn13) = 17),
+ core_id TEXT CHECK(octet_length(core_id) <= 12),
+ volume TEXT,
+ issue TEXT,
+ pages TEXT,
+ publisher TEXT, -- for books, NOT if container exists
+ language TEXT -- primary language of the work's fulltext; RFC1766/ISO639-1
+ -- TODO: oclc_ocn (TEXT or BIGINT)
+ -- TODO: identifier table?
+);
+
+CREATE INDEX release_rev_doi_idx ON release_rev(doi);
+CREATE INDEX release_rev_pmid_idx ON release_rev(pmid);
+CREATE INDEX release_rev_pmcid_idx ON release_rev(pmcid);
+CREATE INDEX release_rev_wikidata_idx ON release_rev(wikidata_qid);
+CREATE INDEX release_rev_isbn13_idx ON release_rev(isbn13);
+CREATE INDEX release_rev_core_idx ON release_rev(core_id);
+CREATE INDEX release_rev_work_idx ON release_rev(work_ident_id);
+
+CREATE TABLE release_rev_abstract (
+ id BIGSERIAL PRIMARY KEY,
+ release_rev UUID REFERENCES release_rev(id) NOT NULL,
+ abstract_sha1 TEXT REFERENCES abstracts(sha1) NOT NULL,
+ mimetype TEXT,
+ lang TEXT
+);
+
+CREATE INDEX release_rev_abstract_rev_idx ON release_rev_abstract(release_rev);
+CREATE INDEX release_rev_abstract_sha1_idx ON release_rev_abstract(abstract_sha1);
+
+CREATE TABLE release_ident (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ is_live BOOL NOT NULL DEFAULT false,
+ rev_id UUID REFERENCES release_rev(id),
+ redirect_id UUID REFERENCES release_ident(id)
+);
+
+CREATE INDEX release_ident_rev_idx ON release_ident(rev_id);
+CREATE INDEX release_ident_redirect_idx ON release_ident(redirect_id);
+
+CREATE TABLE release_edit (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ ident_id UUID REFERENCES release_ident(id) NOT NULL,
+ rev_id UUID REFERENCES release_rev(id),
+ redirect_id UUID REFERENCES release_ident(id),
+ prev_rev UUID REFERENCES release_rev(id),
+ extra_json JSONB,
+ UNIQUE (editgroup_id, ident_id)
+);
+
+-------------------- Works --------------------------------------------------
+CREATE TABLE work_rev (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ extra_json JSONB
+);
+
+CREATE TABLE work_ident (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ is_live BOOL NOT NULL DEFAULT false,
+ rev_id UUID REFERENCES work_rev(id),
+ redirect_id UUID REFERENCES work_ident(id)
+);
+
+CREATE INDEX work_ident_rev_idx ON work_ident(rev_id);
+CREATE INDEX work_ident_redirect_idx ON work_ident(redirect_id);
+
+CREATE TABLE work_edit (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ editgroup_id UUID REFERENCES editgroup(id) NOT NULL,
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ ident_id UUID REFERENCES work_ident(id) NOT NULL,
+ rev_id UUID REFERENCES work_rev(id),
+ redirect_id UUID REFERENCES work_ident(id),
+ prev_rev UUID REFERENCES work_rev(id),
+ extra_json JSONB,
+ UNIQUE (editgroup_id, ident_id)
+);
+
+ALTER TABLE release_rev
+ ADD CONSTRAINT release_containeridentid_fkey FOREIGN KEY (work_ident_id)
+ REFERENCES work_ident(id);
+
+-------------------- Inter-Entity Relations
+
+CREATE TABLE release_contrib (
+ id BIGSERIAL PRIMARY KEY,
+ release_rev UUID REFERENCES release_rev(id) NOT NULL,
+ creator_ident_id UUID REFERENCES creator_ident(id),
+ raw_name TEXT,
+ role TEXT, -- TODO: enum?
+ index_val INTEGER,
+ extra_json JSONB
+);
+
+CREATE INDEX release_contrib_rev_idx ON release_contrib(release_rev);
+CREATE INDEX release_contrib_creator_idx ON release_contrib(creator_ident_id);
+
+CREATE TABLE release_ref (
+ id BIGSERIAL PRIMARY KEY,
+ release_rev UUID REFERENCES release_rev(id) NOT NULL,
+ target_release_ident_id UUID REFERENCES release_ident(id), -- or work?
+ index_val INTEGER,
+ key TEXT,
+ extra_json JSONB, -- title, year, container_title, locator (aka, page), oci_id
+ container_name TEXT,
+ year INTEGER,
+ title TEXT,
+ locator TEXT
+ -- TODO: oci_id (TEXT)
+);
+
+CREATE INDEX release_ref_rev_idx ON release_ref(release_rev);
+CREATE INDEX release_ref_target_release_idx ON release_ref(target_release_ident_id);
+
+CREATE TABLE file_rev_release (
+ file_rev UUID REFERENCES file_rev(id) NOT NULL,
+ target_release_ident_id UUID REFERENCES release_ident(id) NOT NULL,
+ PRIMARY KEY (file_rev, target_release_ident_id)
+);
+CREATE INDEX file_rev_release_target_release_idx ON file_rev_release(target_release_ident_id);
+CREATE TABLE fileset_rev_release (
+ fileset_rev UUID REFERENCES fileset_rev(id) NOT NULL,
+ target_release_ident_id UUID REFERENCES release_ident(id) NOT NULL,
+ PRIMARY KEY (fileset_rev, target_release_ident_id)
+);
+CREATE INDEX fileset_rev_release_target_release_idx ON fileset_rev_release(target_release_ident_id);
+CREATE TABLE webcapture_rev_release (
+ webcapture_rev UUID REFERENCES webcapture_rev(id) NOT NULL,
+ target_release_ident_id UUID REFERENCES release_ident(id) NOT NULL,
+ PRIMARY KEY (webcapture_rev, target_release_ident_id)
+);
+CREATE INDEX webcapture_rev_release_target_release_idx ON webcapture_rev_release(target_release_ident_id);
+
+---------------------------------------------------------------------------
+---------------------------------------------------------------------------
+---------------------------------------------------------------------------
+
+-- Fake data at the raw SQL level, for early development and testing
+-- Convention:
+-- * first entity is smallest possible (mostly null)
+-- * second entity is rich (all fields/relations designed) but artificial
+-- * third entity (and above) are realistic (real DOI, etc)
+
+BEGIN;
+
+INSERT INTO editor (id, username, is_superuser, is_admin, is_bot, auth_epoch) VALUES
+ ('00000000-0000-0000-AAAA-000000000001', 'root', true, true, false, '1970-01-01T01:01:01Z'), -- aaaaaaaaaaaabkvkaaaaaaaaae
+ ('00000000-0000-0000-AAAA-000000000002', 'admin', true, true, false, '1970-01-01T01:01:01Z'), -- aaaaaaaaaaaabkvkaaaaaaaaai
+ ('00000000-0000-0000-AAAA-000000000003', 'demo-user', false, true, false, '1970-01-01T01:01:01Z'), -- aaaaaaaaaaaabkvkaaaaaaaaam
+ ('00000000-0000-0000-AAAA-000000000004', 'claire', false, false, false, default), -- aaaaaaaaaaaabkvkaaaaaaaaaq
+ ('00000000-0000-0000-AAAA-000000000005', 'webface-bot', true, true, true, '1970-01-01T01:01:01Z'), -- aaaaaaaaaaaabkvkaaaaaaaaau
+ ('00000000-0000-0000-AAAA-000000000006', 'bnewbold', false, true, false, '1970-01-01T01:01:01Z'); -- aaaaaaaaaaaabkvkaaaaaaaaay
+
+INSERT INTO editgroup (id, editor_id, description) VALUES
+ ('00000000-0000-0000-BBBB-000000000001', '00000000-0000-0000-AAAA-000000000001', 'first edit ever!'), -- aaaaaaaaaaaabo53aaaaaaaaae
+ ('00000000-0000-0000-BBBB-000000000002', '00000000-0000-0000-AAAA-000000000001', 'another one!'), -- aaaaaaaaaaaabo53aaaaaaaaai
+ ('00000000-0000-0000-BBBB-000000000003', '00000000-0000-0000-AAAA-000000000003', 'user edit'), -- aaaaaaaaaaaabo53aaaaaaaaam
+ ('00000000-0000-0000-BBBB-000000000004', '00000000-0000-0000-AAAA-000000000002', 'uncommited edit'), -- aaaaaaaaaaaabo53aaaaaaaaaq
+ ('00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-AAAA-000000000001', 'journal edit'), -- aaaaaaaaaaaabo53aaaaaaaaau
+ ('00000000-0000-0000-BBBB-000000000006', '00000000-0000-0000-AAAA-000000000001', 'another journal edit'); -- aaaaaaaaaaaabo53aaaaaaaaay
+
+INSERT INTO changelog (editgroup_id) VALUES
+ ('00000000-0000-0000-BBBB-000000000001'),
+ ('00000000-0000-0000-BBBB-000000000002'),
+ ('00000000-0000-0000-BBBB-000000000003'),
+ ('00000000-0000-0000-BBBB-000000000004'),
+ ('00000000-0000-0000-BBBB-000000000005');
+
+INSERT INTO abstracts (sha1, content) VALUES
+ ('1ba86bf8c2979a62d29b18b537e50b2b093be27e', 'some long abstract in plain text'),
+ ('0da908ab584b5e445a06beb172e3fab8cb5169e3', '<jats>A longer, more correct abstract should in theory go here</jats>');
+
+INSERT INTO container_rev (id, name, publisher, issnl, abbrev, coden, extra_json) VALUES
+ ('00000000-0000-0000-1111-FFF000000001', 'MySpace Blog', null, null, null, null, null),
+ ('00000000-0000-0000-1111-FFF000000002', 'Journal of Trivial Results', 'bogus publishing group', '1234-5678', 'Triv. Res.', 'CDNXYZ', '{"is_oa": false, "in_doaj": false}'),
+ ('00000000-0000-0000-1111-FFF000000003', 'PLOS Medicine', 'Public Library of Science', '1549-1277', 'PLoS med.', null, '{"is_oa": true, "in_doaj": true}');
+
+INSERT INTO container_ident (id, is_live, rev_id, redirect_id) VALUES
+ ('00000000-0000-0000-1111-000000000001', true, '00000000-0000-0000-1111-FFF000000001', null), -- aaaaaaaaaaaaaeiraaaaaaaaae
+ ('00000000-0000-0000-1111-000000000002', true, '00000000-0000-0000-1111-FFF000000002', null), -- aaaaaaaaaaaaaeiraaaaaaaaai
+ ('00000000-0000-0000-1111-000000000003', true, '00000000-0000-0000-1111-FFF000000003', null); -- aaaaaaaaaaaaaeiraaaaaaaaam
+
+INSERT INTO container_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VALUES
+ ('00000000-0000-0000-1111-000000000001', '00000000-0000-0000-1111-FFF000000001', null, '00000000-0000-0000-BBBB-000000000003', null),
+ ('00000000-0000-0000-1111-000000000002', '00000000-0000-0000-1111-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
+ ('00000000-0000-0000-1111-000000000003', '00000000-0000-0000-1111-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-1111-FFF000000002');
+
+INSERT INTO creator_rev (id, display_name, given_name, surname, orcid, wikidata_qid) VALUES
+ ('00000000-0000-0000-2222-FFF000000001', 'Grace Hopper', null, null, null, null),
+ ('00000000-0000-0000-2222-FFF000000002', 'Christine Moran', 'Christine', 'Moran', '0000-0003-2088-7465', 'Q1234'),
+ ('00000000-0000-0000-2222-FFF000000003', 'John P. A. Ioannidis', 'John', 'Ioannidis', '0000-0003-3118-6859', 'Q5678');
+
+INSERT INTO creator_ident (id, is_live, rev_id, redirect_id) VALUES
+ ('00000000-0000-0000-2222-000000000001', true, '00000000-0000-0000-2222-FFF000000001', null), -- aaaaaaaaaaaaaircaaaaaaaaae
+ ('00000000-0000-0000-2222-000000000002', true, '00000000-0000-0000-2222-FFF000000002', null), -- aaaaaaaaaaaaaircaaaaaaaaai
+ ('00000000-0000-0000-2222-000000000003', true, '00000000-0000-0000-2222-FFF000000003', null), -- aaaaaaaaaaaaaircaaaaaaaaam
+ ('00000000-0000-0000-2222-000000000004', false, '00000000-0000-0000-2222-FFF000000002', null); -- aaaaaaaaaaaaaircaaaaaaaaaq
+
+INSERT INTO creator_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VALUES
+ ('00000000-0000-0000-2222-000000000001', '00000000-0000-0000-2222-FFF000000001', null, '00000000-0000-0000-BBBB-000000000001', null),
+ ('00000000-0000-0000-2222-000000000002', '00000000-0000-0000-2222-FFF000000002', null, '00000000-0000-0000-BBBB-000000000002', null),
+ ('00000000-0000-0000-2222-000000000003', '00000000-0000-0000-2222-FFF000000003', null, '00000000-0000-0000-BBBB-000000000003', null),
+ ('00000000-0000-0000-2222-000000000004', '00000000-0000-0000-2222-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', '00000000-0000-0000-2222-FFF000000003');
+
+INSERT INTO file_rev (id, size_bytes, sha1, sha256, md5, mimetype) VALUES
+ ('00000000-0000-0000-3333-FFF000000001', null, null, null, null, null),
+ ('00000000-0000-0000-3333-FFF000000002', 4321, '7d97e98f8af710c7e7fe703abc8f639e0ee507c4', null, null, 'text/plain'),
+ ('00000000-0000-0000-3333-FFF000000003', 255629, '3f242a192acc258bdfdb151943419437f440c313', 'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362', 'f4de91152c7ab9fdc2a128f962faebff', 'application/pdf');
+
+INSERT INTO file_rev_url (file_rev, rel, url) VALUES
+ ('00000000-0000-0000-3333-FFF000000002', 'web', 'http://archive.org/robots.txt'),
+ ('00000000-0000-0000-3333-FFF000000003', 'publisher', 'http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable');
+
+INSERT INTO file_ident (id, is_live, rev_id, redirect_id) VALUES
+ ('00000000-0000-0000-3333-000000000001', true, '00000000-0000-0000-3333-FFF000000001', null), -- aaaaaaaaaaaaamztaaaaaaaaae
+ ('00000000-0000-0000-3333-000000000002', true, '00000000-0000-0000-3333-FFF000000002', null), -- aaaaaaaaaaaaamztaaaaaaaaai
+ ('00000000-0000-0000-3333-000000000003', true, '00000000-0000-0000-3333-FFF000000003', null); -- aaaaaaaaaaaaamztaaaaaaaaam
+
+INSERT INTO file_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VALUES
+ ('00000000-0000-0000-3333-000000000001', '00000000-0000-0000-3333-FFF000000001', null, '00000000-0000-0000-BBBB-000000000003', null),
+ ('00000000-0000-0000-3333-000000000002', '00000000-0000-0000-3333-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
+ ('00000000-0000-0000-3333-000000000003', '00000000-0000-0000-3333-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-3333-FFF000000002');
+
+INSERT INTO fileset_rev (id) VALUES
+ ('00000000-0000-0000-6666-FFF000000001'),
+ ('00000000-0000-0000-6666-FFF000000002'),
+ ('00000000-0000-0000-6666-FFF000000003');
+
+INSERT INTO fileset_rev_file (fileset_rev, path_name, size_bytes, md5, sha1, sha256, extra_json) VALUES
+ ('00000000-0000-0000-6666-FFF000000002', 'README.md', 1024, null, null, null, null),
+ ('00000000-0000-0000-6666-FFF000000003', 'README.md', 2048, null, null, null, null),
+ ('00000000-0000-0000-6666-FFF000000003', 'stuff/data.tar.gz', 2340000, 'f4de91152c7ab9fdc2a128f962faebff', '3f242a192acc258bdfdb151943419437f440c313', 'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362', '{"mimetype": "application/gzip"}');
+
+INSERT INTO fileset_rev_url (fileset_rev, rel, url) VALUES
+ ('00000000-0000-0000-6666-FFF000000002', 'web', 'http://personal-blog.name/dataset/'),
+ ('00000000-0000-0000-6666-FFF000000003', 'web', 'http://other-personal-blog.name/dataset/'),
+ ('00000000-0000-0000-6666-FFF000000003', 'archive', 'https://archive.org/download/random-dataset/');
+
+INSERT INTO fileset_ident (id, is_live, rev_id, redirect_id) VALUES
+ ('00000000-0000-0000-6666-000000000001', true, '00000000-0000-0000-6666-FFF000000001', null), -- aaaaaaaaaaaaaztgaaaaaaaaam
+ ('00000000-0000-0000-6666-000000000002', true, '00000000-0000-0000-6666-FFF000000002', null), -- aaaaaaaaaaaaaztgaaaaaaaaai
+ ('00000000-0000-0000-6666-000000000003', true, '00000000-0000-0000-6666-FFF000000003', null); -- aaaaaaaaaaaaaztgaaaaaaaaam
+
+INSERT INTO fileset_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VALUES
+ ('00000000-0000-0000-6666-000000000001', '00000000-0000-0000-6666-FFF000000001', null, '00000000-0000-0000-BBBB-000000000003', null),
+ ('00000000-0000-0000-6666-000000000002', '00000000-0000-0000-6666-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
+ ('00000000-0000-0000-6666-000000000003', '00000000-0000-0000-6666-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-6666-FFF000000002');
+
+INSERT INTO webcapture_rev (id, original_url, timestamp) VALUES
+ ('00000000-0000-0000-7777-FFF000000001', 'http://example.org', '1996-01-02T12:34:56Z'),
+ ('00000000-0000-0000-7777-FFF000000002', 'http://example.org', '1996-01-02T12:34:56Z'),
+ ('00000000-0000-0000-7777-FFF000000003', 'https://asheesh.org', '2003-02-17T04:47:21Z');
+
+INSERT INTO webcapture_rev_cdx (webcapture_rev, surt, timestamp, url, mimetype, status_code, sha1, sha256) VALUES
+ ('00000000-0000-0000-7777-FFF000000002', 'org,example)/', 19960102123456, 'http://example.org', null, 200, '5886903ba5aeaf7446fe9f77bd03adfc029cedf0', null),
+ ('00000000-0000-0000-7777-FFF000000003', 'org,asheesh)/', 20030217044721, 'http://asheesh.org:80/', 'text/html', 200, '5886903ba5aeaf7446fe9f77bd03adfc029cedf0', 'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362'),
+ ('00000000-0000-0000-7777-FFF000000003', 'org,asheesh)/robots.txt', 20030217044719, 'http://asheesh.org:80/robots.txt', 'text/html', 404, 'a637f1d27d9bcb237310ed29f19c07e1c8cf0aa5', 'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362');
+
+INSERT INTO webcapture_rev_url (webcapture_rev, rel, url) VALUES
+ ('00000000-0000-0000-7777-FFF000000002', 'wayback', 'http://web.archive.org/201801010001/http://example.org'),
+ ('00000000-0000-0000-7777-FFF000000003', 'wayback', 'http://web.archive.org/201801010001/https://asheesh.org'),
+ ('00000000-0000-0000-7777-FFF000000003', 'warc', 'https://example.org/something.warc.gz');
+
+INSERT INTO webcapture_ident (id, is_live, rev_id, redirect_id) VALUES
+ ('00000000-0000-0000-7777-000000000001', true, '00000000-0000-0000-7777-FFF000000001', null), -- aaaaaaaaaaaaa53xaaaaaaaaae
+ ('00000000-0000-0000-7777-000000000002', true, '00000000-0000-0000-7777-FFF000000002', null), -- aaaaaaaaaaaaa53xaaaaaaaaai
+ ('00000000-0000-0000-7777-000000000003', true, '00000000-0000-0000-7777-FFF000000003', null); -- aaaaaaaaaaaaa53xaaaaaaaaam
+
+INSERT INTO webcapture_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VALUES
+ ('00000000-0000-0000-7777-000000000001', '00000000-0000-0000-7777-FFF000000001', null, '00000000-0000-0000-BBBB-000000000003', null),
+ ('00000000-0000-0000-7777-000000000002', '00000000-0000-0000-7777-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
+ ('00000000-0000-0000-7777-000000000003', '00000000-0000-0000-7777-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-7777-FFF000000002');
+
+INSERT INTO work_rev (id) VALUES
+ ('00000000-0000-0000-5555-FFF000000001'),
+ ('00000000-0000-0000-5555-FFF000000002'),
+ ('00000000-0000-0000-5555-FFF000000003');
+
+INSERT INTO work_ident (id, is_live, rev_id, redirect_id) VALUES
+ ('00000000-0000-0000-5555-000000000001', true, '00000000-0000-0000-5555-FFF000000001', null), -- aaaaaaaaaaaaavkvaaaaaaaaae
+ ('00000000-0000-0000-5555-000000000002', true, '00000000-0000-0000-5555-FFF000000002', null), -- aaaaaaaaaaaaavkvaaaaaaaaai
+ ('00000000-0000-0000-5555-000000000003', true, '00000000-0000-0000-5555-FFF000000003', null); -- aaaaaaaaaaaaavkvaaaaaaaaam
+
+INSERT INTO work_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VALUES
+ ('00000000-0000-0000-5555-000000000001', '00000000-0000-0000-5555-FFF000000001', null, '00000000-0000-0000-BBBB-000000000003', null),
+ ('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
+ ('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-5555-FFF000000002');
+
+INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, release_year, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES
+ ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001', null, 'example title', null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+ ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001', 'bigger example', 'article-journal', null, '2018-01-01', 2018, '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'),
+ ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', 2005, '10.1371/journal.pmed.0020124', null, null, null, null, null, '2', '8', 'e124', 'Public Library of Science', 'en');
+
+INSERT INTO release_ident (id, is_live, rev_id, redirect_id) VALUES
+ ('00000000-0000-0000-4444-000000000001', true, '00000000-0000-0000-4444-FFF000000001', null), -- aaaaaaaaaaaaarceaaaaaaaaae
+ ('00000000-0000-0000-4444-000000000002', true, '00000000-0000-0000-4444-FFF000000002', null), -- aaaaaaaaaaaaarceaaaaaaaaai
+ ('00000000-0000-0000-4444-000000000003', true, '00000000-0000-0000-4444-FFF000000003', null); -- aaaaaaaaaaaaarceaaaaaaaaam
+
+INSERT INTO release_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VALUES
+ ('00000000-0000-0000-4444-000000000001', '00000000-0000-0000-4444-FFF000000001', null, '00000000-0000-0000-BBBB-000000000003', null),
+ ('00000000-0000-0000-4444-000000000002', '00000000-0000-0000-4444-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
+ ('00000000-0000-0000-4444-000000000003', '00000000-0000-0000-4444-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-4444-FFF000000002');
+
+INSERT INTO release_rev_abstract (release_rev, abstract_sha1, mimetype, lang) VALUES
+ ('00000000-0000-0000-4444-FFF000000001', '1ba86bf8c2979a62d29b18b537e50b2b093be27e', 'text/plain', 'en'),
+ ('00000000-0000-0000-4444-FFF000000002', '0da908ab584b5e445a06beb172e3fab8cb5169e3', 'application/xml+jats', 'en');
+
+INSERT INTO release_contrib (release_rev, creator_ident_id, raw_name, role, index_val) VALUES
+ ('00000000-0000-0000-4444-FFF000000002', null, null, null, null),
+ ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-2222-000000000002', 'some contrib', 'editor', 4),
+ ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-2222-000000000003', 'John P. A. Ioannidis', 'author', 0);
+
+INSERT INTO release_ref (release_rev, target_release_ident_id, index_val, extra_json) VALUES
+ ('00000000-0000-0000-4444-FFF000000002', null, null, null),
+ ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-4444-000000000001', 4, '{"unstructured":"citation note"}'),
+ ('00000000-0000-0000-4444-FFF000000003', null, 0, '{"unstructured": "Ioannidis JP, Haidich AB, Lau J. Any casualties in the clash of randomised and observational evidence? BMJ. 2001;322:879–880"}'),
+ ('00000000-0000-0000-4444-FFF000000003', null, 1, '{"unstructured":"Lawlor DA, Davey Smith G, Kundu D, Bruckdorfer KR, Ebrahim S. Those confounded vitamins: What can we learn from the differences between observational versus randomised trial evidence? Lancet. 2004;363:1724–1727."}'),
+ ('00000000-0000-0000-4444-FFF000000003', null, 2, '{"unstructured":"Vandenbroucke JP. When are observational studies as credible as randomised trials? Lancet. 2004;363:1728–1731."}'),
+ ('00000000-0000-0000-4444-FFF000000003', null, 3, '{"unstructured":"Michiels S, Koscielny S, Hill C. Prediction of cancer outcome with microarrays: A multiple random validation strategy. Lancet. 2005;365:488–492."}'),
+ ('00000000-0000-0000-4444-FFF000000003', null, 4, '{"unstructured":"Ioannidis JPA, Ntzani EE, Trikalinos TA, Contopoulos-Ioannidis DG. Replication validity of genetic association studies. Nat Genet. 2001;29:306–309."}'),
+ ('00000000-0000-0000-4444-FFF000000003', null, 5, '{"unstructured":"Colhoun HM, McKeigue PM, Davey Smith G. Problems of reporting genetic associations with complex outcomes. Lancet. 2003;361:865–872."}');
+
+INSERT INTO file_rev_release (file_rev, target_release_ident_id) VALUES
+ ('00000000-0000-0000-3333-FFF000000002', '00000000-0000-0000-4444-000000000002'),
+ ('00000000-0000-0000-3333-FFF000000003', '00000000-0000-0000-4444-000000000003');
+
+INSERT INTO fileset_rev_release (fileset_rev, target_release_ident_id) VALUES
+ ('00000000-0000-0000-6666-FFF000000002', '00000000-0000-0000-4444-000000000002'),
+ ('00000000-0000-0000-6666-FFF000000003', '00000000-0000-0000-4444-000000000003');
+
+INSERT INTO webcapture_rev_release (webcapture_rev, target_release_ident_id) VALUES
+ ('00000000-0000-0000-7777-FFF000000002', '00000000-0000-0000-4444-000000000002'),
+ ('00000000-0000-0000-7777-FFF000000003', '00000000-0000-0000-4444-000000000003');
+
+commit;