use data_encoding::BASE32_NOPAD; use database_models::*; use database_schema::*; use diesel; use diesel::prelude::*; use errors::*; use regex::Regex; use uuid::Uuid; use std::str::FromStr; pub type DbConn = diesel::r2d2::PooledConnection>; /// This function should always be run within a transaction pub fn get_or_create_editgroup(editor_id: Uuid, conn: &PgConnection) -> Result { // check for current active let ed_row: EditorRow = editor::table.find(editor_id).first(conn)?; if let Some(current) = ed_row.active_editgroup_id { return Ok(current); } // need to insert and update let eg_row: EditgroupRow = diesel::insert_into(editgroup::table) .values((editgroup::editor_id.eq(ed_row.id),)) .get_result(conn)?; diesel::update(editor::table.find(ed_row.id)) .set(editor::active_editgroup_id.eq(eg_row.id)) .execute(conn)?; Ok(eg_row.id) } /// This function should always be run within a transaction pub fn accept_editgroup(editgroup_id: Uuid, conn: &PgConnection) -> Result { // check that we haven't accepted already (in changelog) // NB: could leave this to a UNIQUE constraint let count: i64 = changelog::table .filter(changelog::editgroup_id.eq(editgroup_id)) .count() .get_result(conn)?; if count > 0 { return Err(ErrorKind::EditgroupAlreadyAccepted(uuid2fcid(&editgroup_id)).into()); } // for each entity type... //for entity in (container_edit, creator_edit, file_edit, release_edit, work_edit) { /* // This would be the clean and efficient way, but see: // https://github.com/diesel-rs/diesel/issues/1478 diesel::update(container_ident::table) .inner_join(container_edit::table.on( container_ident::id.eq(container_edit::ident_id) )) .filter(container_edit::editgroup_id.eq(editgroup_id)) .values(( container_ident::is_live.eq(true), container_ident::rev_id.eq(container_edit::rev_id), container_ident::redirect_id.eq(container_edit::redirect_id), )) .execute()?; */ // Sketchy... but fast? Only a few queries per accept. for entity in &["container", "creator", "file", "work", "release"] { diesel::sql_query(format!( " UPDATE {entity}_ident SET is_live = true, rev_id = {entity}_edit.rev_id, redirect_id = {entity}_edit.redirect_id FROM {entity}_edit WHERE {entity}_ident.id = {entity}_edit.ident_id AND {entity}_edit.editgroup_id = $1", entity = entity )).bind::(editgroup_id) .execute(conn)?; } // append log/changelog row let entry: ChangelogRow = diesel::insert_into(changelog::table) .values((changelog::editgroup_id.eq(editgroup_id),)) .get_result(conn)?; // update any editor's active editgroup let no_active: Option = None; diesel::update(editor::table) .filter(editor::active_editgroup_id.eq(editgroup_id)) .set(editor::active_editgroup_id.eq(no_active)) .execute(conn)?; Ok(entry) } pub struct FatCatId(Uuid); impl ToString for FatCatId { fn to_string(&self) -> String { uuid2fcid(&self.to_uuid()) } } impl FromStr for FatCatId { type Err = Error; fn from_str(s: &str) -> Result { fcid2uuid(s).map(|u| FatCatId(u)) } } impl FatCatId { pub fn to_uuid(&self) -> Uuid { self.0 } pub fn from_uuid(u: &Uuid) -> FatCatId { FatCatId(u.clone()) } } /// Convert fatcat IDs (base32 strings) to UUID pub fn fcid2uuid(fcid: &str) -> Result { if fcid.len() != 26 { return Err(ErrorKind::InvalidFatcatId(fcid.to_string()).into()); } let mut raw = vec![0; 16]; BASE32_NOPAD .decode_mut(fcid.to_uppercase().as_bytes(), &mut raw) .map_err(|_dp| ErrorKind::InvalidFatcatId(fcid.to_string()))?; // unwrap() is safe here, because we know raw is always 16 bytes Ok(Uuid::from_bytes(&raw).unwrap()) } /// Convert UUID to fatcat ID string (base32 encoded) pub fn uuid2fcid(id: &Uuid) -> String { let raw = id.as_bytes(); BASE32_NOPAD.encode(raw).to_lowercase() } pub fn check_pmcid(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^PMC\d+$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { Err(ErrorKind::MalformedExternalId(format!( "not a valid PubMed Central ID (PMCID): '{}' (expected, eg, 'PMC12345')", raw )).into()) } } pub fn check_pmid(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d+$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { Err(ErrorKind::MalformedExternalId(format!( "not a valid PubMed ID (PMID): '{}' (expected, eg, '1234')", raw )).into()) } } pub fn check_wikidata_qid(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^Q\d+$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { Err(ErrorKind::MalformedExternalId(format!( "not a valid Wikidata QID: '{}' (expected, eg, 'Q1234')", raw )).into()) } } pub fn check_doi(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^10.\d{3,6}/.+$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { Err(ErrorKind::MalformedExternalId(format!( "not a valid DOI: '{}' (expected, eg, '10.1234/aksjdfh')", raw )).into()) } } pub fn check_issn(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { Err(ErrorKind::MalformedExternalId(format!( "not a valid ISSN: '{}' (expected, eg, '1234-5678')", raw )).into()) } } pub fn check_orcid(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$").unwrap(); } if RE.is_match(raw) { Ok(()) } else { Err(ErrorKind::MalformedExternalId(format!( "not a valid ORCID: '{}' (expected, eg, '0123-4567-3456-6789')", raw )).into()) } } #[test] fn test_check_orcid() { assert!(check_orcid("0123-4567-3456-6789").is_ok()); assert!(check_orcid("0123-4567-3456-678X").is_ok()); assert!(check_orcid("01234567-3456-6780").is_err()); assert!(check_orcid("0x23-4567-3456-6780").is_err()); } // TODO: make the above checks "more correct" // TODO: check ISBN-13 // TODO: check hashes (SHA-1, etc)