summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-11 19:43:07 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-11 19:43:07 -0800
commit9ca9ed84f0a404c3740a7ac33978bd9d6c6a0c4a (patch)
tree0ec44f67e707c736f3b541dc1614f20663fa07fb
parentf6273b49b72447760f68fead35b885b4c2e8aca8 (diff)
downloadfatcat-9ca9ed84f0a404c3740a7ac33978bd9d6c6a0c4a.tar.gz
fatcat-9ca9ed84f0a404c3740a7ac33978bd9d6c6a0c4a.zip
first draft implementation of ref interning
-rw-r--r--rust/src/database_models.rs29
-rw-r--r--rust/src/database_schema.rs23
-rw-r--r--rust/src/entity_crud.rs114
3 files changed, 102 insertions, 64 deletions
diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs
index 63fbcb29..47fd062d 100644
--- a/rust/src/database_models.rs
+++ b/rust/src/database_models.rs
@@ -376,6 +376,7 @@ pub struct ReleaseRevRow {
pub extra_json: Option<serde_json::Value>,
pub work_ident_id: Uuid,
pub container_ident_id: Option<Uuid>,
+ pub refs_blob_sha1: Option<String>,
pub title: String,
pub release_type: Option<String>,
pub release_status: Option<String>,
@@ -400,6 +401,7 @@ pub struct ReleaseRevNewRow {
pub extra_json: Option<serde_json::Value>,
pub work_ident_id: Uuid,
pub container_ident_id: Option<Uuid>,
+ pub refs_blob_sha1: Option<String>,
pub title: String,
pub release_type: Option<String>,
pub release_status: Option<String>,
@@ -491,33 +493,28 @@ pub struct ReleaseContribNewRow {
pub extra_json: Option<serde_json::Value>,
}
-#[derive(Debug, Queryable, Identifiable, Associations, AsChangeset)]
+#[derive(Debug, Queryable, Insertable, Associations, AsChangeset)]
#[table_name = "release_ref"]
pub struct ReleaseRefRow {
- pub id: i64,
pub release_rev: Uuid,
- pub target_release_ident_id: Option<Uuid>,
- pub index_val: Option<i32>,
- pub key: Option<String>,
- pub extra_json: Option<serde_json::Value>,
- pub container_name: Option<String>,
- pub year: Option<i32>,
- pub title: Option<String>,
- pub locator: Option<String>,
+ pub index_val: i32,
+ pub target_release_ident_id: Uuid,
}
-#[derive(Debug, Insertable, AsChangeset)]
-#[table_name = "release_ref"]
-pub struct ReleaseRefNewRow {
- pub release_rev: Uuid,
- pub target_release_ident_id: Option<Uuid>,
- pub index_val: Option<i32>,
+/* These fields now interned in JSON blob
pub key: Option<String>,
pub extra_json: Option<serde_json::Value>,
pub container_name: Option<String>,
pub year: Option<i32>,
pub title: Option<String>,
pub locator: Option<String>,
+*/
+
+#[derive(Debug, Queryable, Insertable, Associations, AsChangeset)]
+#[table_name = "refs_blob"]
+pub struct RefsBlobRow {
+ pub sha1: String,
+ pub refs_json: serde_json::Value,
}
#[derive(Debug, Queryable, Insertable, Associations, AsChangeset)]
diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs
index 3bc57d95..0a067a10 100644
--- a/rust/src/database_schema.rs
+++ b/rust/src/database_schema.rs
@@ -239,6 +239,13 @@ table! {
}
table! {
+ refs_blob (sha1) {
+ sha1 -> Text,
+ refs_json -> Jsonb,
+ }
+}
+
+table! {
release_contrib (id) {
id -> Int8,
release_rev -> Uuid,
@@ -273,17 +280,10 @@ table! {
}
table! {
- release_ref (id) {
- id -> Int8,
+ release_ref (release_rev, index_val) {
release_rev -> Uuid,
- target_release_ident_id -> Nullable<Uuid>,
- index_val -> Nullable<Int4>,
- key -> Nullable<Text>,
- extra_json -> Nullable<Jsonb>,
- container_name -> Nullable<Text>,
- year -> Nullable<Int4>,
- title -> Nullable<Text>,
- locator -> Nullable<Text>,
+ index_val -> Int4,
+ target_release_ident_id -> Uuid,
}
}
@@ -293,6 +293,7 @@ table! {
extra_json -> Nullable<Jsonb>,
work_ident_id -> Uuid,
container_ident_id -> Nullable<Uuid>,
+ refs_blob_sha1 -> Nullable<Text>,
title -> Text,
release_type -> Nullable<Text>,
release_status -> Nullable<Text>,
@@ -439,6 +440,7 @@ joinable!(release_ident -> release_rev (rev_id));
joinable!(release_ref -> release_ident (target_release_ident_id));
joinable!(release_ref -> release_rev (release_rev));
joinable!(release_rev -> container_ident (container_ident_id));
+joinable!(release_rev -> refs_blob (refs_blob_sha1));
joinable!(release_rev -> work_ident (work_ident_id));
joinable!(release_rev_abstract -> abstracts (abstract_sha1));
joinable!(release_rev_abstract -> release_rev (release_rev));
@@ -475,6 +477,7 @@ allow_tables_to_appear_in_same_query!(
fileset_rev_file,
fileset_rev_release,
fileset_rev_url,
+ refs_blob,
release_contrib,
release_edit,
release_ident,
diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs
index ce1c1ed7..6f0f77aa 100644
--- a/rust/src/entity_crud.rs
+++ b/rust/src/entity_crud.rs
@@ -1812,28 +1812,26 @@ impl EntityCrud for ReleaseEntity {
None => (None, None, None),
};
- let refs: Option<Vec<ReleaseRef>> = match hide.refs {
- true => None,
- false => Some(
- release_ref::table
+ let refs: Option<Vec<ReleaseRef>> = match (hide.refs, rev_row.refs_blob_sha1) {
+ (true, _) => None,
+ (false, None) => None,
+ (false, Some(sha1)) => Some({
+ let refs_blob: RefsBlobRow = refs_blob::table
+ .find(sha1) // checked in match
+ .get_result(conn)?;
+ let mut refs: Vec<ReleaseRef> = serde_json::from_value(refs_blob.refs_json)?;
+ let ref_rows: Vec<ReleaseRefRow> = release_ref::table
.filter(release_ref::release_rev.eq(rev_row.id))
.order(release_ref::index_val.asc())
- .get_results(conn)?
- .into_iter()
- .map(|r: ReleaseRefRow| ReleaseRef {
- index: r.index_val.map(|v| v as i64),
- key: r.key,
- extra: r.extra_json,
- container_name: r.container_name,
- year: r.year.map(|v| v as i64),
- title: r.title,
- locator: r.locator,
- target_release_id: r
- .target_release_ident_id
- .map(|v| FatcatId::from_uuid(&v).to_string()),
- })
- .collect(),
- ),
+ .get_results(conn)?;
+ for index in 0..refs.len() {
+ refs[index].index = Some(index as i64)
+ }
+ for row in ref_rows {
+ refs[row.index_val as usize].target_release_id = Some(FatcatId::from_uuid(&row.target_release_ident_id).to_string());
+ }
+ refs
+ }),
};
let contribs: Option<Vec<ReleaseContrib>> = match hide.contribs {
@@ -1953,12 +1951,56 @@ impl EntityCrud for ReleaseEntity {
.into());
}
+ // First, calculate and upsert any refs JSON blobs and record the SHA1 keys, so they can be
+ // included in the release_rev row itself
+ let mut refs_blob_rows: Vec<RefsBlobRow> = vec![];
+ let mut refs_blob_sha1: Vec<Option<String>> = vec![];
+ for model in models.iter() {
+ match &model.refs {
+ None => {
+ refs_blob_sha1.push(None);
+ },
+ Some(ref_list) => {
+ // Have to strip out target refs and indexes, or hashing won't work well when
+ // these change
+ let ref_list: Vec<ReleaseRef> = ref_list
+ .iter()
+ .map(|r| {
+ let mut r = r.clone();
+ r.target_release_id = None;
+ r.index = None;
+ r
+ })
+ .collect();
+ // TODO: maybe `canonical_json` crate?
+ let refs_json = serde_json::to_value(ref_list)?;
+ let refs_str = refs_json.to_string();
+ let sha1 = Sha1::from(refs_str).hexdigest();
+ let blob = RefsBlobRow { sha1: sha1.clone(), refs_json };
+ refs_blob_rows.push(blob);
+ refs_blob_sha1.push(Some(sha1));
+ }
+ };
+ }
+
+ if !refs_blob_rows.is_empty() {
+ // Sort of an "upsert"; only inserts new abstract rows if they don't already exist
+ insert_into(refs_blob::table)
+ .values(&refs_blob_rows)
+ .on_conflict(refs_blob::sha1)
+ .do_nothing()
+ .execute(conn)?;
+ }
+
+ // Then the main release_revs themselves
let rev_ids: Vec<Uuid> = insert_into(release_rev::table)
.values(
models
.iter()
- .map(|model| {
+ .zip(refs_blob_sha1.into_iter())
+ .map(|(model, refs_sha1)| {
Ok(ReleaseRevNewRow {
+ refs_blob_sha1: refs_sha1,
title: model.title.clone().unwrap(), // titles checked above
release_type: model.release_type.clone(),
release_status: model.release_status.clone(),
@@ -1991,34 +2033,30 @@ impl EntityCrud for ReleaseEntity {
.returning(release_rev::id)
.get_results(conn)?;
- let mut release_ref_rows: Vec<ReleaseRefNewRow> = vec![];
+ let mut release_ref_rows: Vec<ReleaseRefRow> = vec![];
let mut release_contrib_rows: Vec<ReleaseContribNewRow> = vec![];
let mut abstract_rows: Vec<AbstractsRow> = vec![];
let mut release_abstract_rows: Vec<ReleaseRevAbstractNewRow> = vec![];
for (model, rev_id) in models.iter().zip(rev_ids.iter()) {
+
+ // We didn't know the release_rev id to insert here, so need to re-iterate over refs
match &model.refs {
None => (),
Some(ref_list) => {
- let these_ref_rows: Vec<ReleaseRefNewRow> = ref_list
+ let these_ref_rows: Vec<ReleaseRefRow> = ref_list
.iter()
- .map(|r| {
- Ok(ReleaseRefNewRow {
+ .enumerate()
+ .filter(|(_, r)| r.target_release_id.is_some())
+ .map(|(index, r)| {
+ Ok(ReleaseRefRow {
release_rev: *rev_id,
- target_release_ident_id: match r.target_release_id.clone() {
- None => None,
- Some(v) => Some(FatcatId::from_str(&v)?.to_uuid()),
- },
- index_val: r.index.map(|v| v as i32),
- key: r.key.clone(),
- container_name: r.container_name.clone(),
- year: r.year.map(|v| v as i32),
- title: r.title.clone(),
- locator: r.locator.clone(),
- extra_json: r.extra.clone(),
+ // unwrap() checked by is_some() filter
+ target_release_ident_id: FatcatId::from_str(&r.target_release_id.clone().unwrap())?.to_uuid(),
+ index_val: index as i32,
})
})
- .collect::<Result<Vec<ReleaseRefNewRow>>>()?;
+ .collect::<Result<Vec<ReleaseRefRow>>>()?;
release_ref_rows.extend(these_ref_rows);
}
};
@@ -2053,7 +2091,7 @@ impl EntityCrud for ReleaseEntity {
.iter()
.filter(|ea| ea.content.is_some())
.map(|c| AbstractsRow {
- sha1: Sha1::from(c.content.clone().unwrap()).hexdigest(),
+ sha1: Sha1::from(c.content.as_ref().unwrap()).hexdigest(),
content: c.content.clone().unwrap(),
})
.collect();