From 828deb42b6dbdb2d11527e073d96bde26d8fb979 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 25 Jul 2018 14:39:58 -0700 Subject: abstracts; more tests --- rust/src/api_helpers.rs | 55 ++++++++++++++++++++----------------- rust/src/api_server.rs | 66 ++++++++++++++++++++++++++++++++++++--------- rust/src/database_models.rs | 11 ++++++-- rust/src/database_schema.rs | 2 +- 4 files changed, 93 insertions(+), 41 deletions(-) (limited to 'rust/src') diff --git a/rust/src/api_helpers.rs b/rust/src/api_helpers.rs index ef07ee55..91c6200d 100644 --- a/rust/src/api_helpers.rs +++ b/rust/src/api_helpers.rs @@ -4,9 +4,8 @@ use database_schema::*; use diesel; use diesel::prelude::*; use errors::*; -use uuid::Uuid; use regex::Regex; - +use uuid::Uuid; pub fn get_or_create_editgroup(editor_id: Uuid, conn: &PgConnection) -> Result { // check for current active @@ -119,74 +118,80 @@ pub fn check_pmcid(raw: &str) -> Result<()> { if RE.is_match(raw) { Ok(()) } else { - Err(ErrorKind::MalformedExternalId( - format!("not a valid PubMed Central ID (PMCID): '{}' (expected, eg, 'PMC12345')", raw) - ).into()) + Err(ErrorKind::MalformedExternalId(format!( + "not a valid PubMed Central ID (PMCID): '{}' (expected, eg, 'PMC12345')", + raw + )).into()) } } pub fn check_pmid(raw: &str) -> Result<()> { lazy_static! { - static ref RE: Regex = Regex::new(r"^\d+$").unwrap(); + static ref RE: Regex = Regex::new(r"^\d+$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { - Err(ErrorKind::MalformedExternalId( - format!("not a valid PubMed ID (PMID): '{}' (expected, eg, '1234')", raw) - ).into()) + Err(ErrorKind::MalformedExternalId(format!( + "not a valid PubMed ID (PMID): '{}' (expected, eg, '1234')", + raw + )).into()) } } pub fn check_wikidata_qid(raw: &str) -> Result<()> { lazy_static! { - static ref RE: Regex = Regex::new(r"^Q\d+$").unwrap(); + static ref RE: Regex = Regex::new(r"^Q\d+$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { - Err(ErrorKind::MalformedExternalId( - format!("not a valid Wikidata QID: '{}' (expected, eg, 'Q1234')", raw) - ).into()) + Err(ErrorKind::MalformedExternalId(format!( + "not a valid Wikidata QID: '{}' (expected, eg, 'Q1234')", + raw + )).into()) } } pub fn check_doi(raw: &str) -> Result<()> { lazy_static! { - static ref RE: Regex = Regex::new(r"^10.\d{3,6}/.+$").unwrap(); + static ref RE: Regex = Regex::new(r"^10.\d{3,6}/.+$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { - Err(ErrorKind::MalformedExternalId( - format!("not a valid DOI: '{}' (expected, eg, '10.1234/aksjdfh')", raw) - ).into()) + Err(ErrorKind::MalformedExternalId(format!( + "not a valid DOI: '{}' (expected, eg, '10.1234/aksjdfh')", + raw + )).into()) } } pub fn check_issn(raw: &str) -> Result<()> { lazy_static! { - static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); + static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { - Err(ErrorKind::MalformedExternalId( - format!("not a valid ISSN: '{}' (expected, eg, '1234-5678')", raw) - ).into()) + Err(ErrorKind::MalformedExternalId(format!( + "not a valid ISSN: '{}' (expected, eg, '1234-5678')", + raw + )).into()) } } pub fn check_orcid(raw: &str) -> Result<()> { lazy_static! { - static ref RE: Regex = Regex::new(r"^\d{4}-\d{4}-\d{4}-\d{4}$").unwrap(); + static ref RE: Regex = Regex::new(r"^\d{4}-\d{4}-\d{4}-\d{4}$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { - Err(ErrorKind::MalformedExternalId( - format!("not a valid ORCID: '{}' (expected, eg, '0123-4567-3456-6789')", raw) - ).into()) + Err(ErrorKind::MalformedExternalId(format!( + "not a valid ORCID: '{}' (expected, eg, '0123-4567-3456-6789')", + raw + )).into()) } } diff --git a/rust/src/api_server.rs b/rust/src/api_server.rs index 64c028be..d172cb16 100644 --- a/rust/src/api_server.rs +++ b/rust/src/api_server.rs @@ -4,16 +4,17 @@ use api_helpers::*; use chrono; use database_models::*; use database_schema::{ - changelog, container_edit, container_ident, container_rev, creator_edit, creator_ident, - creator_rev, editgroup, editor, file_edit, file_ident, file_release, file_rev, file_rev_url, - release_contrib, release_edit, release_ident, release_ref, release_rev, release_rev_abstract, - work_edit, work_ident, work_rev, + abstracts, changelog, container_edit, container_ident, container_rev, creator_edit, + creator_ident, creator_rev, editgroup, editor, file_edit, file_ident, file_release, file_rev, + file_rev_url, release_contrib, release_edit, release_ident, release_ref, release_rev, + release_rev_abstract, work_edit, work_ident, work_rev, }; use diesel::prelude::*; use diesel::{self, insert_into}; use errors::*; use fatcat_api::models; use fatcat_api::models::*; +use sha1::Sha1; use uuid::Uuid; use ConnectionPool; @@ -221,24 +222,26 @@ fn release_row2entity( .into_iter() .map(|c: ReleaseContribRow| ReleaseContrib { index: c.index, - raw: c.raw, + raw_name: c.raw_name, role: c.role, extra: c.extra_json, creator_id: c.creator_ident_id.map(|v| uuid2fcid(&v)), }) .collect(); - // XXX: join abstracts table let abstracts: Vec = release_rev_abstract::table + .inner_join(abstracts::table) .filter(release_rev_abstract::release_rev.eq(rev.id)) .get_results(conn)? .into_iter() - .map(|r: ReleaseRevAbstractRow| ReleaseEntityAbstracts { - sha1: Some(r.abstract_sha1), - mimetype: r.mimetype, - lang: r.lang, - content: None, - }) + .map( + |r: (ReleaseRevAbstractRow, AbstractsRow)| ReleaseEntityAbstracts { + sha1: Some(r.0.abstract_sha1), + mimetype: r.0.mimetype, + lang: r.0.lang, + content: Some(r.1.content), + }, + ) .collect(); Ok(ReleaseEntity { @@ -767,6 +770,7 @@ impl Server { if contrib_list.is_empty() { Some(vec![]) } else { + println!("{:#?}", contrib_list); let contrib_rows: Vec = contrib_list .iter() .map(|c| ReleaseContribNewRow { @@ -774,7 +778,7 @@ impl Server { creator_ident_id: c.creator_id .clone() .map(|v| fcid2uuid(&v).expect("valid fatcat identifier")), - raw: c.raw.clone(), + raw_name: c.raw_name.clone(), index: c.index, role: c.role.clone(), extra_json: c.extra.clone(), @@ -789,6 +793,42 @@ impl Server { } }; + if let Some(abstract_list) = entity.abstracts { + // For rows that specify content, we need to insert the abstract if it doesn't exist + // already + let new_abstracts: Vec = abstract_list + .iter() + .filter(|ea| ea.content.is_some()) + .map(|c| AbstractsRow { + sha1: Sha1::from(c.content.clone().unwrap()).hexdigest(), + content: c.content.clone().unwrap(), + }) + .collect(); + if !new_abstracts.is_empty() { + // Sort of an "upsert"; only inserts new abstract rows if they don't already exist + insert_into(abstracts::table) + .values(new_abstracts) + //.on_conflict(abstracts::sha1) + //.do_nothing() + .execute(conn)?; + } + let release_abstract_rows: Vec = abstract_list + .into_iter() + .map(|c| ReleaseRevAbstractNewRow { + release_rev: edit.rev_id.unwrap(), + abstract_sha1: match c.content { + Some(ref content) => Sha1::from(content).hexdigest(), + None => c.sha1.expect("either abstract_sha1 or content is required"), + }, + lang: c.lang, + mimetype: c.mimetype, + }) + .collect(); + insert_into(release_rev_abstract::table) + .values(release_abstract_rows) + .execute(conn)?; + } + edit.into_model() } diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs index f875b492..50176f5f 100644 --- a/rust/src/database_models.rs +++ b/rust/src/database_models.rs @@ -225,7 +225,7 @@ pub struct ReleaseContribRow { pub id: i64, pub release_rev: Uuid, pub creator_ident_id: Option, - pub raw: Option, + pub raw_name: Option, pub role: Option, pub index: Option, pub extra_json: Option, @@ -236,7 +236,7 @@ pub struct ReleaseContribRow { pub struct ReleaseContribNewRow { pub release_rev: Uuid, pub creator_ident_id: Option, - pub raw: Option, + pub raw_name: Option, pub role: Option, pub index: Option, pub extra_json: Option, @@ -278,6 +278,13 @@ pub struct FileReleaseRow { pub target_release_ident_id: Uuid, } +#[derive(Debug, Queryable, Insertable, Associations, AsChangeset)] +#[table_name = "abstracts"] +pub struct AbstractsRow { + pub sha1: String, + pub content: String, +} + #[derive(Debug, Queryable, Identifiable, Associations, AsChangeset)] #[table_name = "editgroup"] pub struct EditgroupRow { diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs index c23e3f83..f935302a 100644 --- a/rust/src/database_schema.rs +++ b/rust/src/database_schema.rs @@ -157,7 +157,7 @@ table! { id -> Int8, release_rev -> Uuid, creator_ident_id -> Nullable, - raw -> Nullable, + raw_name -> Nullable, role -> Nullable, index -> Nullable, extra_json -> Nullable, -- cgit v1.2.3