aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-17 10:44:38 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-17 10:44:38 -0800
commit24e7a7038f9f0c827a11282a5cc646117ffdfb9b (patch)
tree40227dbd1a2b652bad24c9d71f8027c20ef1b7d1
parent3ad702e413c084590505cff16f9532b236718049 (diff)
parent0a8c9a5e07213276617f06b0379a166e7fd1c100 (diff)
downloadfatcat-24e7a7038f9f0c827a11282a5cc646117ffdfb9b.tar.gz
fatcat-24e7a7038f9f0c827a11282a5cc646117ffdfb9b.zip
Merge branch 'citation-efficiency'
Manually merged conflict in: - ./TODO
-rw-r--r--python/fatcat_web/auth.py5
-rw-r--r--python/fatcat_web/templates/release_view.html2
-rw-r--r--python/tests/citation_efficiency.py113
-rw-r--r--rust/Cargo.lock3
-rw-r--r--rust/Cargo.toml3
-rw-r--r--rust/migrations/2019-01-01-000000_init/down.sql1
-rw-r--r--rust/migrations/2019-01-01-000000_init/up.sql61
-rw-r--r--rust/src/database_models.rs107
-rw-r--r--rust/src/database_schema.rs23
-rw-r--r--rust/src/entity_crud.rs119
-rw-r--r--rust/src/lib.rs2
-rw-r--r--rust/tests/test_refs.rs161
12 files changed, 505 insertions, 95 deletions
diff --git a/python/fatcat_web/auth.py b/python/fatcat_web/auth.py
index 8035cbe5..03964c92 100644
--- a/python/fatcat_web/auth.py
+++ b/python/fatcat_web/auth.py
@@ -90,7 +90,10 @@ def handle_ia_xauth(email, password):
'secret': Config.IA_XAUTH_CLIENT_SECRET,
})
if resp.status_code == 401 or (not resp.json().get('success')):
- flash("Internet Archive email/password didn't match: {}".format(resp.json()['values']['reason']))
+ try:
+ flash("Internet Archive email/password didn't match: {}".format(resp.json()['values']['reason']))
+ except:
+ print("IA XAuth fail: {}".format(resp.content))
return render_template('auth_ia_login.html', email=email), resp.status_code
elif resp.status_code != 200:
flash("Internet Archive login failed (internal error?)")
diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html
index fd86b7c9..4e24b281 100644
--- a/python/fatcat_web/templates/release_view.html
+++ b/python/fatcat_web/templates/release_view.html
@@ -143,7 +143,7 @@ Raw Object:
{% endif %}
<br>
-{% if release.refs.size != 0 %}
+{% if release.refs != None and release.refs.size != 0 %}
<h3>References</h3>
This release citing other releases.
<ol>
diff --git a/python/tests/citation_efficiency.py b/python/tests/citation_efficiency.py
new file mode 100644
index 00000000..fe5006cc
--- /dev/null
+++ b/python/tests/citation_efficiency.py
@@ -0,0 +1,113 @@
+
+import json
+import pytest
+from copy import copy
+
+from fatcat_client import *
+from fatcat_client.rest import ApiException
+from fixtures import *
+
+
+def test_citation_indexing(api):
+ # indexing is consistent and reacts to change
+
+ eg = quick_eg(api)
+ r1 = ReleaseEntity(title="the target")
+ r1.refs = [
+ ReleaseRef(key="first", title="the first title"),
+ ReleaseRef(key="second", title="the second title"),
+ ReleaseRef(key="third", title="a third title"),
+ ]
+ r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident)
+ api.accept_editgroup(eg.editgroup_id)
+
+ assert r1.refs[0].index == 0
+ assert r1.refs[0].key == "first"
+ assert r1.refs[1].index == 1
+ assert r1.refs[1].key == "second"
+ assert r1.refs[2].index == 2
+ assert r1.refs[2].key == "third"
+
+ r1.refs.pop(1)
+ eg = quick_eg(api)
+ api.update_release(r1.ident, r1, editgroup_id=eg.editgroup_id)
+ api.accept_editgroup(eg.editgroup_id)
+ r1 = api.get_release(r1.ident)
+
+ assert r1.refs[0].index == 0
+ assert r1.refs[0].key == "first"
+ assert r1.refs[1].index == 1
+ assert r1.refs[1].key == "third"
+
+def test_citation_targets(api):
+ # invariant to linking citations
+ # also, updates work
+
+ eg = quick_eg(api)
+ r1 = ReleaseEntity(title="the target")
+ r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident)
+ r2 = ReleaseEntity(title="the citer")
+ r2.refs = [
+ ReleaseRef(key="first", title="something else"),
+ ReleaseRef(key="second", title="the target title"),
+ ]
+ r2 = api.get_release(api.create_release(r2, editgroup_id=eg.editgroup_id).ident)
+ api.accept_editgroup(eg.editgroup_id)
+
+ eg = quick_eg(api)
+ r2.refs[1].target_release_id = r1.ident
+ api.update_release(r2.ident, r2, editgroup_id=eg.editgroup_id)
+ api.accept_editgroup(eg.editgroup_id)
+ r2 = api.get_release(r2.ident)
+ assert r2.refs[0].key == "first"
+ assert r2.refs[1].key == "second"
+ assert r2.refs[0].index == 0 # TODO: one-indexing?
+ assert r2.refs[1].index == 1
+ assert r2.refs[0].target_release_id == None
+ assert r2.refs[1].target_release_id == r1.ident
+ assert len(r2.refs) == 2
+
+def test_citation_empty_array(api):
+ # distinction between empty array (no citations) and no array (hidden)
+
+ r1 = ReleaseEntity(title="citation null")
+ r2 = ReleaseEntity(title="citation empty array")
+ r1.refs = None
+ r2.refs = []
+
+ eg = quick_eg(api)
+ r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident)
+ r2 = api.get_release(api.create_release(r2, editgroup_id=eg.editgroup_id).ident)
+ api.accept_editgroup(eg.editgroup_id)
+
+ print(r1.refs)
+ print(r2.refs)
+ assert r1.refs == []
+ assert r1.refs == r2.refs
+
+ r1b = api.get_release(r1.ident, hide="refs")
+ assert r1b.refs == None
+
+def test_citation_encoding(api):
+ # escape-only changes (eg, \u1234 whatever for ASCII)
+
+ r1 = ReleaseEntity(title="citation encoding")
+ title = "title-unicode \\u0050 \\\" "
+ container = "container-unicode ☃︎ ä ö ü スティー"
+ extra = extra={'a': 1, 'b': 2, 'ö': 3}
+ locator = "p123"
+ r1.refs = [
+ ReleaseRef(key="1", year=1923, title=title, container_name=container,
+ extra=extra, locator=locator),
+ ReleaseRef(key="2"),
+ ]
+
+ eg = quick_eg(api)
+ r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident)
+ api.accept_editgroup(eg.editgroup_id)
+
+ assert title == r1.refs[0].title
+ assert container == r1.refs[0].container_name
+ assert extra == r1.refs[0].extra
+ assert locator == r1.refs[0].locator
+
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index e89954ad..c0df5a2a 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -538,6 +538,9 @@ dependencies = [
"rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sentry 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_ignored 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.34 (registry+https://github.com/rust-lang/crates.io-index)",
"sha1 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"slog 2.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 155e3c8a..c5a52845 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -37,6 +37,9 @@ slog = "^2.0"
slog-term = "*"
slog-async = "*"
serde_json = "1.0"
+serde = "*"
+serde_derive = "1.0"
+serde_ignored = "0.0.4"
sentry = { version = "^0.12", default-features = false, features = ["with_client_implementation", "with_backtrace", "with_panic", "with_log", "with_rust_info", "with_failure"] }
cadence = "^0.16"
diff --git a/rust/migrations/2019-01-01-000000_init/down.sql b/rust/migrations/2019-01-01-000000_init/down.sql
index 30e712e3..e238a690 100644
--- a/rust/migrations/2019-01-01-000000_init/down.sql
+++ b/rust/migrations/2019-01-01-000000_init/down.sql
@@ -2,6 +2,7 @@
-- in opposite order as up.sql
DROP TABLE IF EXISTS release_contrib CASCADE;
+DROP TABLE IF EXISTS refs_blob CASCADE;
DROP TABLE IF EXISTS release_ref CASCADE;
DROP TABLE IF EXISTS file_rev_release CASCADE;
DROP TABLE IF EXISTS fileset_rev_release CASCADE;
diff --git a/rust/migrations/2019-01-01-000000_init/up.sql b/rust/migrations/2019-01-01-000000_init/up.sql
index b4c7a684..57f91d44 100644
--- a/rust/migrations/2019-01-01-000000_init/up.sql
+++ b/rust/migrations/2019-01-01-000000_init/up.sql
@@ -82,6 +82,12 @@ CREATE TABLE abstracts (
content TEXT NOT NULL
);
+CREATE TABLE refs_blob (
+ -- fixed size hash (in hex). TODO: switch to bytes
+ sha1 TEXT PRIMARY KEY CHECK (octet_length(sha1) = 40),
+ refs_json JSONB NOT NULL
+);
+
-------------------- Creators -----------------------------------------------
CREATE TABLE creator_rev (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
@@ -326,6 +332,7 @@ CREATE TABLE release_rev (
work_ident_id UUID NOT NULL, -- FOREIGN KEY; see ALRTER below
container_ident_id UUID REFERENCES container_ident(id),
+ refs_blob_sha1 TEXT REFERENCES refs_blob(sha1),
title TEXT NOT NULL,
release_type TEXT, -- TODO: enum
release_status TEXT, -- TODO: enum
@@ -436,20 +443,19 @@ CREATE INDEX release_contrib_rev_idx ON release_contrib(release_rev);
CREATE INDEX release_contrib_creator_idx ON release_contrib(creator_ident_id);
CREATE TABLE release_ref (
- id BIGSERIAL PRIMARY KEY,
release_rev UUID REFERENCES release_rev(id) NOT NULL,
- target_release_ident_id UUID REFERENCES release_ident(id), -- or work?
- index_val INTEGER,
- key TEXT,
- extra_json JSONB, -- title, year, container_title, locator (aka, page), oci_id
- container_name TEXT,
- year INTEGER,
- title TEXT,
- locator TEXT
- -- TODO: oci_id (TEXT)
-);
-
-CREATE INDEX release_ref_rev_idx ON release_ref(release_rev);
+ index_val INTEGER NOT NULL,
+ target_release_ident_id UUID REFERENCES release_ident(id) NOT NULL,
+ -- all other fields are interned in refs_blob as JSONB
+ -- key TEXT,
+ -- extra_json JSONB, -- title, year, container_title, locator (aka, page), oci_id
+ -- container_name TEXT,
+ -- year INTEGER,
+ -- title TEXT,
+ -- locator TEXT
+ PRIMARY KEY(release_rev, index_val)
+);
+
CREATE INDEX release_ref_target_release_idx ON release_ref(target_release_ident_id);
CREATE TABLE file_rev_release (
@@ -632,10 +638,14 @@ INSERT INTO work_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VA
('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-5555-FFF000000002');
-INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, release_year, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES
- ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001', null, 'example title', null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
- ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001', 'bigger example', 'article-journal', null, '2018-01-01', 2018, '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'),
- ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', 2005, '10.1371/journal.pmed.0020124', null, null, null, null, null, '2', '8', 'e124', 'Public Library of Science', 'en');
+INSERT INTO refs_blob (sha1, refs_json) VALUES
+ ('22222222c2979a62d29b18b537e50b2b093be27e', '[{}, {}, {}, {}, {"extra": {"unstructured":"citation note"}}]'),
+ ('33333333c2979a62d29b18b537e50b2b093be27e', '[{"extra": {"unstructured": "Ioannidis JP, Haidich AB, Lau J. Any casualties in the clash of randomised and observational evidence? BMJ. 2001;322:879–880"}}, {"extra": {"unstructured":"Lawlor DA, Davey Smith G, Kundu D, Bruckdorfer KR, Ebrahim S. Those confounded vitamins: What can we learn from the differences between observational versus randomised trial evidence? Lancet. 2004;363:1724–1727."}}, {"extra": {"unstructured":"Vandenbroucke JP. When are observational studies as credible as randomised trials? Lancet. 2004;363:1728–1731."}}, {"extra": {"unstructured":"Michiels S, Koscielny S, Hill C. Prediction of cancer outcome with microarrays: A multiple random validation strategy. Lancet. 2005;365:488–492."}}, {"extra": {"unstructured":"Ioannidis JPA, Ntzani EE, Trikalinos TA, Contopoulos-Ioannidis DG. Replication validity of genetic association studies. Nat Genet. 2001;29:306–309."}}, {"extra": {"unstructured":"Colhoun HM, McKeigue PM, Davey Smith G. Problems of reporting genetic associations with complex outcomes. Lancet. 2003;361:865–872."}}]');
+
+INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, release_year, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language, refs_blob_sha1) VALUES
+ ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001', null, 'example title', null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+ ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001', 'bigger example', 'article-journal', null, '2018-01-01', 2018, '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn', '22222222c2979a62d29b18b537e50b2b093be27e'),
+ ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', 2005, '10.1371/journal.pmed.0020124', null, null, null, null, null, '2', '8', 'e124', 'Public Library of Science', 'en', '33333333c2979a62d29b18b537e50b2b093be27e');
INSERT INTO release_ident (id, is_live, rev_id, redirect_id) VALUES
('00000000-0000-0000-4444-000000000001', true, '00000000-0000-0000-4444-FFF000000001', null), -- aaaaaaaaaaaaarceaaaaaaaaae
@@ -656,15 +666,14 @@ INSERT INTO release_contrib (release_rev, creator_ident_id, raw_name, role, inde
('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-2222-000000000002', 'some contrib', 'editor', 4),
('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-2222-000000000003', 'John P. A. Ioannidis', 'author', 0);
-INSERT INTO release_ref (release_rev, target_release_ident_id, index_val, extra_json) VALUES
- ('00000000-0000-0000-4444-FFF000000002', null, null, null),
- ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-4444-000000000001', 4, '{"unstructured":"citation note"}'),
- ('00000000-0000-0000-4444-FFF000000003', null, 0, '{"unstructured": "Ioannidis JP, Haidich AB, Lau J. Any casualties in the clash of randomised and observational evidence? BMJ. 2001;322:879–880"}'),
- ('00000000-0000-0000-4444-FFF000000003', null, 1, '{"unstructured":"Lawlor DA, Davey Smith G, Kundu D, Bruckdorfer KR, Ebrahim S. Those confounded vitamins: What can we learn from the differences between observational versus randomised trial evidence? Lancet. 2004;363:1724–1727."}'),
- ('00000000-0000-0000-4444-FFF000000003', null, 2, '{"unstructured":"Vandenbroucke JP. When are observational studies as credible as randomised trials? Lancet. 2004;363:1728–1731."}'),
- ('00000000-0000-0000-4444-FFF000000003', null, 3, '{"unstructured":"Michiels S, Koscielny S, Hill C. Prediction of cancer outcome with microarrays: A multiple random validation strategy. Lancet. 2005;365:488–492."}'),
- ('00000000-0000-0000-4444-FFF000000003', null, 4, '{"unstructured":"Ioannidis JPA, Ntzani EE, Trikalinos TA, Contopoulos-Ioannidis DG. Replication validity of genetic association studies. Nat Genet. 2001;29:306–309."}'),
- ('00000000-0000-0000-4444-FFF000000003', null, 5, '{"unstructured":"Colhoun HM, McKeigue PM, Davey Smith G. Problems of reporting genetic associations with complex outcomes. Lancet. 2003;361:865–872."}');
+INSERT INTO release_ref (release_rev, index_val, target_release_ident_id) VALUES
+ ('00000000-0000-0000-4444-FFF000000002', 4, '00000000-0000-0000-4444-000000000001'), -- '{"unstructured":"citation note"}'),
+ ('00000000-0000-0000-4444-FFF000000003', 0, '00000000-0000-0000-4444-000000000001'), --'{"unstructured": "Ioannidis JP, Haidich AB, Lau J. Any casualties in the clash of randomised and observational evidence? BMJ. 2001;322:879–880"}'),
+ ('00000000-0000-0000-4444-FFF000000003', 1, '00000000-0000-0000-4444-000000000001'), --'{"unstructured":"Lawlor DA, Davey Smith G, Kundu D, Bruckdorfer KR, Ebrahim S. Those confounded vitamins: What can we learn from the differences between observational versus randomised trial evidence? Lancet. 2004;363:1724–1727."}'),
+ ('00000000-0000-0000-4444-FFF000000003', 2, '00000000-0000-0000-4444-000000000001'), --'{"unstructured":"Vandenbroucke JP. When are observational studies as credible as randomised trials? Lancet. 2004;363:1728–1731."}'),
+ ('00000000-0000-0000-4444-FFF000000003', 3, '00000000-0000-0000-4444-000000000001'), --'{"unstructured":"Michiels S, Koscielny S, Hill C. Prediction of cancer outcome with microarrays: A multiple random validation strategy. Lancet. 2005;365:488–492."}'),
+ ('00000000-0000-0000-4444-FFF000000003', 4, '00000000-0000-0000-4444-000000000001'), --'{"unstructured":"Ioannidis JPA, Ntzani EE, Trikalinos TA, Contopoulos-Ioannidis DG. Replication validity of genetic association studies. Nat Genet. 2001;29:306–309."}'),
+ ('00000000-0000-0000-4444-FFF000000003', 5, '00000000-0000-0000-4444-000000000001'); --'{"unstructured":"Colhoun HM, McKeigue PM, Davey Smith G. Problems of reporting genetic associations with complex outcomes. Lancet. 2003;361:865–872."}');
INSERT INTO file_rev_release (file_rev, target_release_ident_id) VALUES
('00000000-0000-0000-3333-FFF000000002', '00000000-0000-0000-4444-000000000002'),
diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs
index 63fbcb29..b76b469a 100644
--- a/rust/src/database_models.rs
+++ b/rust/src/database_models.rs
@@ -4,7 +4,7 @@ use crate::database_schema::*;
use crate::errors::*;
use crate::identifiers::uuid2fcid;
use chrono;
-use fatcat_api_spec::models::{ChangelogEntry, Editgroup, EditgroupAnnotation, Editor, EntityEdit};
+use fatcat_api_spec::models::{ChangelogEntry, Editgroup, EditgroupAnnotation, Editor, EntityEdit, ReleaseRef};
use serde_json;
use uuid::Uuid;
@@ -376,6 +376,7 @@ pub struct ReleaseRevRow {
pub extra_json: Option<serde_json::Value>,
pub work_ident_id: Uuid,
pub container_ident_id: Option<Uuid>,
+ pub refs_blob_sha1: Option<String>,
pub title: String,
pub release_type: Option<String>,
pub release_status: Option<String>,
@@ -400,6 +401,7 @@ pub struct ReleaseRevNewRow {
pub extra_json: Option<serde_json::Value>,
pub work_ident_id: Uuid,
pub container_ident_id: Option<Uuid>,
+ pub refs_blob_sha1: Option<String>,
pub title: String,
pub release_type: Option<String>,
pub release_status: Option<String>,
@@ -491,35 +493,102 @@ pub struct ReleaseContribNewRow {
pub extra_json: Option<serde_json::Value>,
}
-#[derive(Debug, Queryable, Identifiable, Associations, AsChangeset)]
+#[derive(Debug, Queryable, Insertable, Associations, AsChangeset)]
#[table_name = "release_ref"]
pub struct ReleaseRefRow {
- pub id: i64,
pub release_rev: Uuid,
- pub target_release_ident_id: Option<Uuid>,
- pub index_val: Option<i32>,
- pub key: Option<String>,
- pub extra_json: Option<serde_json::Value>,
- pub container_name: Option<String>,
- pub year: Option<i32>,
- pub title: Option<String>,
- pub locator: Option<String>,
+ pub index_val: i32,
+ pub target_release_ident_id: Uuid,
}
-#[derive(Debug, Insertable, AsChangeset)]
-#[table_name = "release_ref"]
-pub struct ReleaseRefNewRow {
- pub release_rev: Uuid,
- pub target_release_ident_id: Option<Uuid>,
- pub index_val: Option<i32>,
+#[derive(Debug, Queryable, Insertable, Associations, AsChangeset)]
+#[table_name = "refs_blob"]
+pub struct RefsBlobRow {
+ pub sha1: String,
+ pub refs_json: serde_json::Value,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+/// This model is a stable representation of what goes in a RefsBlobRow `refs_json` field (an array
+/// of this model). We could rely on the `ReleaseRef` API spec model directly, but that would lock
+/// the database contents to the API spec rigidly; by defining this struct independently, we can
+/// migrate the schemas. To start, this is a direct copy of the `ReleaseRef` model.
+pub struct RefsBlobJson {
+ #[serde(rename = "index")]
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub index: Option<i64>,
+
+ /// base32-encoded unique identifier
+ #[serde(rename = "target_release_id")]
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub target_release_id: Option<String>,
+
+ #[serde(rename = "extra")]
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub extra: Option<serde_json::Value>,
+
+ #[serde(rename = "key")]
+ #[serde(skip_serializing_if = "Option::is_none")]
pub key: Option<String>,
- pub extra_json: Option<serde_json::Value>,
+
+ #[serde(rename = "year")]
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub year: Option<i64>,
+
+ #[serde(rename = "container_name")]
+ #[serde(skip_serializing_if = "Option::is_none")]
pub container_name: Option<String>,
- pub year: Option<i32>,
+
+ #[serde(rename = "title")]
+ #[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
+
+ #[serde(rename = "locator")]
+ #[serde(skip_serializing_if = "Option::is_none")]
pub locator: Option<String>,
}
+impl RefsBlobJson {
+ pub fn into_model(self) -> ReleaseRef {
+ ReleaseRef {
+ index: self.index,
+ target_release_id: self.target_release_id,
+ extra: self.extra,
+ key: self.key,
+ year: self.year,
+ container_name: self.container_name,
+ title: self.title,
+ locator: self.locator,
+ }
+ }
+
+ pub fn to_model(&self) -> ReleaseRef {
+ ReleaseRef {
+ index: self.index,
+ target_release_id: self.target_release_id.clone(),
+ extra: self.extra.clone(),
+ key: self.key.clone(),
+ year: self.year,
+ container_name: self.container_name.clone(),
+ title: self.title.clone(),
+ locator: self.locator.clone(),
+ }
+ }
+
+ pub fn from_model(model: &ReleaseRef) -> RefsBlobJson {
+ RefsBlobJson {
+ index: model.index,
+ target_release_id: model.target_release_id.clone(),
+ extra: model.extra.clone(),
+ key: model.key.clone(),
+ year: model.year,
+ container_name: model.container_name.clone(),
+ title: model.title.clone(),
+ locator: model.locator.clone(),
+ }
+ }
+}
+
#[derive(Debug, Queryable, Insertable, Associations, AsChangeset)]
#[table_name = "file_rev_release"]
pub struct FileRevReleaseRow {
diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs
index 3bc57d95..0a067a10 100644
--- a/rust/src/database_schema.rs
+++ b/rust/src/database_schema.rs
@@ -239,6 +239,13 @@ table! {
}
table! {
+ refs_blob (sha1) {
+ sha1 -> Text,
+ refs_json -> Jsonb,
+ }
+}
+
+table! {
release_contrib (id) {
id -> Int8,
release_rev -> Uuid,
@@ -273,17 +280,10 @@ table! {
}
table! {
- release_ref (id) {
- id -> Int8,
+ release_ref (release_rev, index_val) {
release_rev -> Uuid,
- target_release_ident_id -> Nullable<Uuid>,
- index_val -> Nullable<Int4>,
- key -> Nullable<Text>,
- extra_json -> Nullable<Jsonb>,
- container_name -> Nullable<Text>,
- year -> Nullable<Int4>,
- title -> Nullable<Text>,
- locator -> Nullable<Text>,
+ index_val -> Int4,
+ target_release_ident_id -> Uuid,
}
}
@@ -293,6 +293,7 @@ table! {
extra_json -> Nullable<Jsonb>,
work_ident_id -> Uuid,
container_ident_id -> Nullable<Uuid>,
+ refs_blob_sha1 -> Nullable<Text>,
title -> Text,
release_type -> Nullable<Text>,
release_status -> Nullable<Text>,
@@ -439,6 +440,7 @@ joinable!(release_ident -> release_rev (rev_id));
joinable!(release_ref -> release_ident (target_release_ident_id));
joinable!(release_ref -> release_rev (release_rev));
joinable!(release_rev -> container_ident (container_ident_id));
+joinable!(release_rev -> refs_blob (refs_blob_sha1));
joinable!(release_rev -> work_ident (work_ident_id));
joinable!(release_rev_abstract -> abstracts (abstract_sha1));
joinable!(release_rev_abstract -> release_rev (release_rev));
@@ -475,6 +477,7 @@ allow_tables_to_appear_in_same_query!(
fileset_rev_file,
fileset_rev_release,
fileset_rev_url,
+ refs_blob,
release_contrib,
release_edit,
release_ident,
diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs
index ce1c1ed7..09ce9542 100644
--- a/rust/src/entity_crud.rs
+++ b/rust/src/entity_crud.rs
@@ -1812,28 +1812,27 @@ impl EntityCrud for ReleaseEntity {
None => (None, None, None),
};
- let refs: Option<Vec<ReleaseRef>> = match hide.refs {
- true => None,
- false => Some(
- release_ref::table
+ let refs: Option<Vec<ReleaseRef>> = match (hide.refs, rev_row.refs_blob_sha1) {
+ (true, _) => None,
+ (false, None) => Some(vec![]),
+ (false, Some(sha1)) => Some({
+ let refs_blob: RefsBlobRow = refs_blob::table
+ .find(sha1) // checked in match
+ .get_result(conn)?;
+ let refs: Vec<RefsBlobJson> = serde_json::from_value(refs_blob.refs_json)?;
+ let mut refs: Vec<ReleaseRef> = refs.into_iter().map(|j| j.into_model()).collect();
+ let ref_rows: Vec<ReleaseRefRow> = release_ref::table
.filter(release_ref::release_rev.eq(rev_row.id))
.order(release_ref::index_val.asc())
- .get_results(conn)?
- .into_iter()
- .map(|r: ReleaseRefRow| ReleaseRef {
- index: r.index_val.map(|v| v as i64),
- key: r.key,
- extra: r.extra_json,
- container_name: r.container_name,
- year: r.year.map(|v| v as i64),
- title: r.title,
- locator: r.locator,
- target_release_id: r
- .target_release_ident_id
- .map(|v| FatcatId::from_uuid(&v).to_string()),
- })
- .collect(),
- ),
+ .get_results(conn)?;
+ for index in 0..refs.len() {
+ refs[index].index = Some(index as i64)
+ }
+ for row in ref_rows {
+ refs[row.index_val as usize].target_release_id = Some(FatcatId::from_uuid(&row.target_release_ident_id).to_string());
+ }
+ refs
+ }),
};
let contribs: Option<Vec<ReleaseContrib>> = match hide.contribs {
@@ -1953,12 +1952,60 @@ impl EntityCrud for ReleaseEntity {
.into());
}
+ // First, calculate and upsert any refs JSON blobs and record the SHA1 keys, so they can be
+ // included in the release_rev row itself
+ let mut refs_blob_rows: Vec<RefsBlobRow> = vec![];
+ let mut refs_blob_sha1: Vec<Option<String>> = vec![];
+ for model in models.iter() {
+ match &model.refs {
+ None => {
+ refs_blob_sha1.push(None);
+ },
+ Some(ref_list) => {
+ if ref_list.is_empty() {
+ refs_blob_sha1.push(None);
+ continue
+ }
+ // Have to strip out target refs and indexes, or hashing won't work well when
+ // these change
+ let ref_list: Vec<RefsBlobJson> = ref_list
+ .iter()
+ .map(|r: &ReleaseRef| {
+ let mut r = RefsBlobJson::from_model(r);
+ r.target_release_id = None;
+ r.index = None;
+ r
+ })
+ .collect();
+ // TODO: maybe `canonical_json` crate?
+ let refs_json = serde_json::to_value(ref_list)?;
+ let refs_str = refs_json.to_string();
+ let sha1 = Sha1::from(refs_str).hexdigest();
+ let blob = RefsBlobRow { sha1: sha1.clone(), refs_json };
+ refs_blob_rows.push(blob);
+ refs_blob_sha1.push(Some(sha1));
+ }
+ };
+ }
+
+ if !refs_blob_rows.is_empty() {
+ // Sort of an "upsert"; only inserts new abstract rows if they don't already exist
+ insert_into(refs_blob::table)
+ .values(&refs_blob_rows)
+ .on_conflict(refs_blob::sha1)
+ .do_nothing()
+ .execute(conn)?;
+ }
+
+ // Then the main release_revs themselves
let rev_ids: Vec<Uuid> = insert_into(release_rev::table)
.values(
models
.iter()
- .map(|model| {
+ .zip(refs_blob_sha1.into_iter())
+ .map(|(model, refs_sha1)| {
Ok(ReleaseRevNewRow {
+ refs_blob_sha1: refs_sha1,
title: model.title.clone().unwrap(), // titles checked above
release_type: model.release_type.clone(),
release_status: model.release_status.clone(),
@@ -1991,34 +2038,30 @@ impl EntityCrud for ReleaseEntity {
.returning(release_rev::id)
.get_results(conn)?;
- let mut release_ref_rows: Vec<ReleaseRefNewRow> = vec![];
+ let mut release_ref_rows: Vec<ReleaseRefRow> = vec![];
let mut release_contrib_rows: Vec<ReleaseContribNewRow> = vec![];
let mut abstract_rows: Vec<AbstractsRow> = vec![];
let mut release_abstract_rows: Vec<ReleaseRevAbstractNewRow> = vec![];
for (model, rev_id) in models.iter().zip(rev_ids.iter()) {
+
+ // We didn't know the release_rev id to insert here, so need to re-iterate over refs
match &model.refs {
None => (),
Some(ref_list) => {
- let these_ref_rows: Vec<ReleaseRefNewRow> = ref_list
+ let these_ref_rows: Vec<ReleaseRefRow> = ref_list
.iter()
- .map(|r| {
- Ok(ReleaseRefNewRow {
+ .enumerate()
+ .filter(|(_, r)| r.target_release_id.is_some())
+ .map(|(index, r)| {
+ Ok(ReleaseRefRow {
release_rev: *rev_id,
- target_release_ident_id: match r.target_release_id.clone() {
- None => None,
- Some(v) => Some(FatcatId::from_str(&v)?.to_uuid()),
- },
- index_val: r.index.map(|v| v as i32),
- key: r.key.clone(),
- container_name: r.container_name.clone(),
- year: r.year.map(|v| v as i32),
- title: r.title.clone(),
- locator: r.locator.clone(),
- extra_json: r.extra.clone(),
+ // unwrap() checked by is_some() filter
+ target_release_ident_id: FatcatId::from_str(&r.target_release_id.clone().unwrap())?.to_uuid(),
+ index_val: index as i32,
})
})
- .collect::<Result<Vec<ReleaseRefNewRow>>>()?;
+ .collect::<Result<Vec<ReleaseRefRow>>>()?;
release_ref_rows.extend(these_ref_rows);
}
};
@@ -2053,7 +2096,7 @@ impl EntityCrud for ReleaseEntity {
.iter()
.filter(|ea| ea.content.is_some())
.map(|c| AbstractsRow {
- sha1: Sha1::from(c.content.clone().unwrap()).hexdigest(),
+ sha1: Sha1::from(c.content.as_ref().unwrap()).hexdigest(),
content: c.content.clone().unwrap(),
})
.collect();
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index b7661334..d089adf8 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -9,6 +9,8 @@ extern crate log;
extern crate lazy_static;
#[macro_use]
extern crate failure;
+#[macro_use]
+extern crate serde_derive;
pub mod auth;
pub mod database_models;
diff --git a/rust/tests/test_refs.rs b/rust/tests/test_refs.rs
new file mode 100644
index 00000000..a2bf37ba
--- /dev/null
+++ b/rust/tests/test_refs.rs
@@ -0,0 +1,161 @@
+
+use fatcat::identifiers::FatcatId;
+use fatcat::server;
+use std::str::FromStr;
+use fatcat::database_models::*;
+use fatcat::database_schema::*;
+use fatcat::entity_crud::{EntityCrud, HideFlags};
+use fatcat::editing::{make_edit_context, accept_editgroup};
+use fatcat_api_spec::models::*;
+use diesel::prelude::*;
+use uuid::Uuid;
+
+mod helpers;
+
+#[test]
+fn test_refs_blob() {
+ let server = server::create_test_server().unwrap();
+ let conn = server.db_pool.get().expect("db_pool error");
+ let editor_id = FatcatId::from_str(helpers::TEST_ADMIN_EDITOR_ID).unwrap();
+ let editgroup_id = helpers::quick_editgroup(&conn);
+ let edit_context = make_edit_context(&conn, editor_id, Some(editgroup_id), false).unwrap();
+
+ // this release entity should be unchanged after being inserted/fetched
+ let mut r1 = ReleaseEntity::new();
+ r1.title = Some("release-test hashes".to_string());
+ r1.refs = Some(vec![
+ ReleaseRef {
+ index: Some(0),
+ target_release_id: None,
+ extra: None,
+ key: Some("one".to_string()),
+ year: Some(1932),
+ container_name: Some("bogus container".to_string()),
+ title: Some("first bogus paper".to_string()),
+ locator: Some("p100".to_string()),
+ },
+ ReleaseRef {
+ index: Some(1),
+ target_release_id: Some("aaaaaaaaaaaaarceaaaaaaaaai".to_string()),
+ extra: None,
+ key: Some("one".to_string()),
+ year: Some(2032),
+ container_name: Some("bogus other container".to_string()),
+ title: Some("second bogus paper".to_string()),
+ locator: Some("p200".to_string()),
+ },
+ ]);
+
+ // this release entity should have the same hash as r1. the indexes will change after fetching,
+ // but otherwise the fetched refs should be the same as the r1 fetched results.
+ let mut r2 = r1.clone();
+ r2.refs = Some(vec![
+ ReleaseRef {
+ index: None,
+ target_release_id: None,
+ extra: None,
+ key: Some("one".to_string()),
+ year: Some(1932),
+ container_name: Some("bogus container".to_string()),
+ title: Some("first bogus paper".to_string()),
+ locator: Some("p100".to_string()),
+ },
+ ReleaseRef {
+ index: Some(99),
+ target_release_id: Some("aaaaaaaaaaaaarceaaaaaaaaai".to_string()),
+ extra: None,
+ key: Some("one".to_string()),
+ year: Some(2032),
+ container_name: Some("bogus other container".to_string()),
+ title: Some("second bogus paper".to_string()),
+ locator: Some("p200".to_string()),
+ },
+ ]);
+
+ // this release entity has different ref *targets* and indexes, but should still have the same
+ // refs_blob hashes as r1/r2.
+ let mut r3 = r1.clone();
+ r3.refs = Some(vec![
+ ReleaseRef {
+ index: Some(1),
+ target_release_id: Some("aaaaaaaaaaaaarceaaaaaaaaae".to_string()),
+ extra: None,
+ key: Some("one".to_string()),
+ year: Some(1932),
+ container_name: Some("bogus container".to_string()),
+ title: Some("first bogus paper".to_string()),
+ locator: Some("p100".to_string()),
+ },
+ ReleaseRef {
+ index: Some(1),
+ target_release_id: Some("aaaaaaaaaaaaarceaaaaaaaaam".to_string()),
+ extra: None,
+ key: Some("one".to_string()),
+ year: Some(2032),
+ container_name: Some("bogus other container".to_string()),
+ title: Some("second bogus paper".to_string()),
+ locator: Some("p200".to_string()),
+ },
+ ]);
+
+ // this one is obviously just plain different (hashes shouldn't match)
+ let mut r4 = r1.clone();
+ r4.refs = Some(vec![
+ ReleaseRef {
+ index: Some(1),
+ target_release_id: Some("aaaaaaaaaaaaarceaaaaaaaaae".to_string()),
+ extra: None,
+ key: Some("one".to_string()),
+ year: Some(1932),
+ container_name: Some("bogus container".to_string()),
+ title: Some("first bogus paper".to_string()),
+ locator: Some("p100".to_string()),
+ },
+ ]);
+
+ let edit1 = r1.db_create(&conn, &edit_context).unwrap();
+ let edit2 = r2.db_create(&conn, &edit_context).unwrap();
+ let edit3 = r3.db_create(&conn, &edit_context).unwrap();
+ let edit4 = r4.db_create(&conn, &edit_context).unwrap();
+
+ let r1b = ReleaseEntity::db_get(&conn, edit1.ident_id.into(), HideFlags::none()).unwrap();
+ let r2b = ReleaseEntity::db_get(&conn, edit2.ident_id.into(), HideFlags::none()).unwrap();
+ let r3b = ReleaseEntity::db_get(&conn, edit3.ident_id.into(), HideFlags::none()).unwrap();
+ let r4b = ReleaseEntity::db_get(&conn, edit4.ident_id.into(), HideFlags::none()).unwrap();
+ assert_eq!(r1b.refs, r1.refs);
+ assert_eq!(r1b.refs, r2b.refs);
+ assert_ne!(r1b.refs, r3b.refs);
+ assert_ne!(r1b.refs, r4b.refs);
+
+ let r1_row: ReleaseRevRow = release_rev::table
+ .find(Uuid::from_str(&r1b.revision.clone().unwrap()).unwrap())
+ .get_result(&conn).unwrap();
+ let r2_row: ReleaseRevRow = release_rev::table
+ .find(Uuid::from_str(&r2b.revision.unwrap()).unwrap())
+ .get_result(&conn).unwrap();
+ let r3_row: ReleaseRevRow = release_rev::table
+ .find(Uuid::from_str(&r3b.revision.clone().unwrap()).unwrap())
+ .get_result(&conn).unwrap();
+ let r4_row: ReleaseRevRow = release_rev::table
+ .find(Uuid::from_str(&r4b.revision.unwrap()).unwrap())
+ .get_result(&conn).unwrap();
+ assert_eq!(r1_row.refs_blob_sha1, r2_row.refs_blob_sha1);
+ assert_eq!(r1_row.refs_blob_sha1, r3_row.refs_blob_sha1);
+ assert_ne!(r1_row.refs_blob_sha1, r4_row.refs_blob_sha1);
+
+ // ensure that SHA1 hashing is stable over time (as much as possible!)
+ assert_eq!(r1_row.refs_blob_sha1, Some("4e38812fbf99e00e0cb648896e9f7a9d58c5ab23".to_string()));
+
+ // update r1 with new target_idents (r3); SHA1 row still shouldn't change
+ accept_editgroup(&conn, editgroup_id).unwrap();
+ let editgroup_id = helpers::quick_editgroup(&conn);
+ let edit_context = make_edit_context(&conn, editor_id, Some(editgroup_id), false).unwrap();
+
+ let _edit4 = r3b.db_update(&conn, &edit_context, edit1.ident_id.into()).unwrap();
+ let r1c = ReleaseEntity::db_get(&conn, edit1.ident_id.into(), HideFlags::none()).unwrap();
+ let r1c_row: ReleaseRevRow = release_rev::table
+ .find(Uuid::from_str(&r1c.revision.unwrap()).unwrap())
+ .get_result(&conn).unwrap();
+ assert_eq!(r1_row.refs_blob_sha1, r1c_row.refs_blob_sha1);
+}
+