From ba7d6a842cb4d61357b588fb2d3ec552c654ae64 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 8 Jan 2019 23:18:32 -0800 Subject: huge refactor of rust modules/files Taking advantage of new Rust 2018 crate/module path changes, and re-organizing things. Somewhat optimistic this could help with partial rebuild speed also. --- rust/src/api_helpers.rs | 683 ------------------------------------------------ 1 file changed, 683 deletions(-) delete mode 100644 rust/src/api_helpers.rs (limited to 'rust/src/api_helpers.rs') diff --git a/rust/src/api_helpers.rs b/rust/src/api_helpers.rs deleted file mode 100644 index 55085403..00000000 --- a/rust/src/api_helpers.rs +++ /dev/null @@ -1,683 +0,0 @@ -use crate::api_entity_crud::EntityCrud; -use data_encoding::BASE32_NOPAD; -use crate::database_models::*; -use crate::database_schema::*; -use diesel; -use diesel::prelude::*; -use crate::errors::*; -use fatcat_api_spec::models::*; -use regex::Regex; -use serde_json; -use std::str::FromStr; -use uuid::Uuid; - -pub type DbConn = - diesel::r2d2::PooledConnection>; - -pub struct EditContext { - pub editor_id: FatCatId, - pub editgroup_id: FatCatId, - pub extra_json: Option, - pub autoaccept: bool, -} - -impl EditContext { - /// This function should always be run within a transaction - pub fn check(&self, conn: &DbConn) -> Result<()> { - let count: i64 = changelog::table - .filter(changelog::editgroup_id.eq(&self.editgroup_id.to_uuid())) - .count() - .get_result(conn)?; - if count > 0 { - return Err(ErrorKind::EditgroupAlreadyAccepted(self.editgroup_id.to_string()).into()); - } - return Ok(()); - } -} - -#[derive(Clone, Copy, PartialEq)] -pub struct ExpandFlags { - pub files: bool, - pub filesets: bool, - pub webcaptures: bool, - pub container: bool, - pub releases: bool, - pub creators: bool, -} - -impl FromStr for ExpandFlags { - type Err = Error; - fn from_str(param: &str) -> Result { - let list: Vec<&str> = param.split_terminator(",").collect(); - Ok(ExpandFlags::from_str_list(&list)) - } -} - -impl ExpandFlags { - pub fn from_str_list(list: &[&str]) -> ExpandFlags { - ExpandFlags { - files: list.contains(&"files"), - filesets: list.contains(&"filesets"), - webcaptures: list.contains(&"webcaptures"), - container: list.contains(&"container"), - releases: list.contains(&"releases"), - creators: list.contains(&"creators"), - } - } - pub fn none() -> ExpandFlags { - ExpandFlags { - files: false, - filesets: false, - webcaptures: false, - container: false, - releases: false, - creators: false, - } - } -} - -#[test] -fn test_expand_flags() { - assert!(ExpandFlags::from_str_list(&vec![]).files == false); - assert!(ExpandFlags::from_str_list(&vec!["files"]).files == true); - assert!(ExpandFlags::from_str_list(&vec!["file"]).files == false); - let all = ExpandFlags::from_str_list(&vec![ - "files", - "filesets", - "webcaptures", - "container", - "other_thing", - "releases", - "creators", - ]); - assert!( - all == ExpandFlags { - files: true, - filesets: true, - webcaptures: true, - container: true, - releases: true, - creators: true - } - ); - assert!(ExpandFlags::from_str("").unwrap().files == false); - assert!(ExpandFlags::from_str("files").unwrap().files == true); - assert!(ExpandFlags::from_str("something,,files").unwrap().files == true); - assert!(ExpandFlags::from_str("file").unwrap().files == false); - let all = - ExpandFlags::from_str("files,container,other_thing,releases,creators,filesets,webcaptures") - .unwrap(); - assert!( - all == ExpandFlags { - files: true, - filesets: true, - webcaptures: true, - container: true, - releases: true, - creators: true - } - ); -} - -#[derive(Clone, Copy, PartialEq)] -pub struct HideFlags { - // release - pub abstracts: bool, - pub refs: bool, - pub contribs: bool, - // fileset - pub manifest: bool, - // webcapture - pub cdx: bool, -} - -impl FromStr for HideFlags { - type Err = Error; - fn from_str(param: &str) -> Result { - let list: Vec<&str> = param.split_terminator(",").collect(); - Ok(HideFlags::from_str_list(&list)) - } -} - -impl HideFlags { - pub fn from_str_list(list: &[&str]) -> HideFlags { - HideFlags { - abstracts: list.contains(&"abstracts"), - refs: list.contains(&"refs"), - contribs: list.contains(&"contribs"), - manifest: list.contains(&"contribs"), - cdx: list.contains(&"contribs"), - } - } - pub fn none() -> HideFlags { - HideFlags { - abstracts: false, - refs: false, - contribs: false, - manifest: false, - cdx: false, - } - } -} - -#[test] -fn test_hide_flags() { - assert!(HideFlags::from_str_list(&vec![]).abstracts == false); - assert!(HideFlags::from_str_list(&vec!["abstracts"]).abstracts == true); - assert!(HideFlags::from_str_list(&vec!["abstract"]).abstracts == false); - let all = HideFlags::from_str_list(&vec![ - "abstracts", - "refs", - "other_thing", - "contribs", - "manifest", - "cdx", - ]); - assert!( - all == HideFlags { - abstracts: true, - refs: true, - contribs: true, - manifest: true, - cdx: true, - } - ); - assert!(HideFlags::from_str("").unwrap().abstracts == false); - assert!(HideFlags::from_str("abstracts").unwrap().abstracts == true); - assert!( - HideFlags::from_str("something,,abstracts") - .unwrap() - .abstracts - == true - ); - assert!(HideFlags::from_str("file").unwrap().abstracts == false); - let all = HideFlags::from_str("abstracts,cdx,refs,manifest,other_thing,contribs").unwrap(); - assert!( - all == HideFlags { - abstracts: true, - refs: true, - contribs: true, - manifest: true, - cdx: true, - } - ); -} - -pub fn make_edit_context( - conn: &DbConn, - editor_id: FatCatId, - editgroup_id: Option, - autoaccept: bool, -) -> Result { - let editgroup_id: FatCatId = match (editgroup_id, autoaccept) { - (Some(eg), _) => eg, - // If autoaccept and no editgroup_id passed, always create a new one for this transaction - (None, true) => { - let eg_row: EditgroupRow = diesel::insert_into(editgroup::table) - .values((editgroup::editor_id.eq(editor_id.to_uuid()),)) - .get_result(conn)?; - FatCatId::from_uuid(&eg_row.id) - } - (None, false) => FatCatId::from_uuid(&get_or_create_editgroup(editor_id.to_uuid(), conn)?), - }; - Ok(EditContext { - editor_id: editor_id, - editgroup_id: editgroup_id, - extra_json: None, - autoaccept: autoaccept, - }) -} - -pub fn create_editor( - conn: &DbConn, - username: String, - is_admin: bool, - is_bot: bool, -) -> Result { - check_username(&username)?; - let ed: EditorRow = diesel::insert_into(editor::table) - .values(( - editor::username.eq(username), - editor::is_admin.eq(is_admin), - editor::is_bot.eq(is_bot), - )) - .get_result(conn)?; - Ok(ed) -} - -pub fn update_editor_username( - conn: &DbConn, - editor_id: FatCatId, - username: String, -) -> Result { - check_username(&username)?; - diesel::update(editor::table.find(editor_id.to_uuid())) - .set(editor::username.eq(username)) - .execute(conn)?; - let editor: EditorRow = editor::table.find(editor_id.to_uuid()).get_result(conn)?; - Ok(editor) -} - -/// This function should always be run within a transaction -pub fn get_or_create_editgroup(editor_id: Uuid, conn: &DbConn) -> Result { - // check for current active - let ed_row: EditorRow = editor::table.find(editor_id).first(conn)?; - if let Some(current) = ed_row.active_editgroup_id { - return Ok(current); - } - - // need to insert and update - let eg_row: EditgroupRow = diesel::insert_into(editgroup::table) - .values((editgroup::editor_id.eq(ed_row.id),)) - .get_result(conn)?; - diesel::update(editor::table.find(ed_row.id)) - .set(editor::active_editgroup_id.eq(eg_row.id)) - .execute(conn)?; - Ok(eg_row.id) -} - -/// This function should always be run within a transaction -pub fn accept_editgroup(editgroup_id: FatCatId, conn: &DbConn) -> Result { - // check that we haven't accepted already (in changelog) - // NB: could leave this to a UNIQUE constraint - // TODO: redundant with check_edit_context - let count: i64 = changelog::table - .filter(changelog::editgroup_id.eq(editgroup_id.to_uuid())) - .count() - .get_result(conn)?; - if count > 0 { - return Err(ErrorKind::EditgroupAlreadyAccepted(editgroup_id.to_string()).into()); - } - - // copy edit columns to ident table - ContainerEntity::db_accept_edits(conn, editgroup_id)?; - CreatorEntity::db_accept_edits(conn, editgroup_id)?; - FileEntity::db_accept_edits(conn, editgroup_id)?; - FilesetEntity::db_accept_edits(conn, editgroup_id)?; - WebcaptureEntity::db_accept_edits(conn, editgroup_id)?; - ReleaseEntity::db_accept_edits(conn, editgroup_id)?; - WorkEntity::db_accept_edits(conn, editgroup_id)?; - - // append log/changelog row - let entry: ChangelogRow = diesel::insert_into(changelog::table) - .values((changelog::editgroup_id.eq(editgroup_id.to_uuid()),)) - .get_result(conn)?; - - // update any editor's active editgroup - let no_active: Option = None; - diesel::update(editor::table) - .filter(editor::active_editgroup_id.eq(editgroup_id.to_uuid())) - .set(editor::active_editgroup_id.eq(no_active)) - .execute(conn)?; - Ok(entry) -} - -#[derive(Clone, Copy, PartialEq, Debug)] -pub struct FatCatId(Uuid); - -impl ToString for FatCatId { - fn to_string(&self) -> String { - uuid2fcid(&self.to_uuid()) - } -} - -impl FromStr for FatCatId { - type Err = Error; - fn from_str(s: &str) -> Result { - fcid2uuid(s).map(|u| FatCatId(u)) - } -} - -impl FatCatId { - pub fn to_uuid(&self) -> Uuid { - self.0 - } - // TODO: just make it u: Uuid and clone (not by ref) - pub fn from_uuid(u: &Uuid) -> FatCatId { - FatCatId(*u) - } -} - -/// Convert fatcat IDs (base32 strings) to UUID -pub fn fcid2uuid(fcid: &str) -> Result { - if fcid.len() != 26 { - return Err(ErrorKind::InvalidFatcatId(fcid.to_string()).into()); - } - let mut raw = vec![0; 16]; - BASE32_NOPAD - .decode_mut(fcid.to_uppercase().as_bytes(), &mut raw) - .map_err(|_dp| ErrorKind::InvalidFatcatId(fcid.to_string()))?; - // unwrap() is safe here, because we know raw is always 16 bytes - Ok(Uuid::from_bytes(&raw).unwrap()) -} - -/// Convert UUID to fatcat ID string (base32 encoded) -pub fn uuid2fcid(id: &Uuid) -> String { - let raw = id.as_bytes(); - BASE32_NOPAD.encode(raw).to_lowercase() -} - -pub fn check_username(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^[A-Za-z][A-Za-z0-9._-]{2,24}$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedExternalId(format!( - "not a valid username: '{}' (expected, eg, 'AcidBurn')", - raw - )) - .into()) - } -} - -#[test] -fn test_check_username() { - assert!(check_username("bnewbold").is_ok()); - assert!(check_username("BNEWBOLD").is_ok()); - assert!(check_username("admin").is_ok()); - assert!(check_username("friend-bot").is_ok()); - assert!(check_username("dog").is_ok()); - assert!(check_username("g_____").is_ok()); - assert!(check_username("bnewbold2-archive").is_ok()); - assert!(check_username("bnewbold2-internetarchive").is_ok()); - - assert!(check_username("").is_err()); - assert!(check_username("_").is_err()); - assert!(check_username("gg").is_err()); - assert!(check_username("adminadminadminadminadminadminadmin").is_err()); - assert!(check_username("bryan newbold").is_err()); - assert!(check_username("01234567-3456-6780").is_err()); - assert!(check_username(".admin").is_err()); - assert!(check_username("-bot").is_err()); -} - -pub fn check_pmcid(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^PMC\d+$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedExternalId(format!( - "not a valid PubMed Central ID (PMCID): '{}' (expected, eg, 'PMC12345')", - raw - )) - .into()) - } -} - -pub fn check_pmid(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^\d+$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedExternalId(format!( - "not a valid PubMed ID (PMID): '{}' (expected, eg, '1234')", - raw - )) - .into()) - } -} - -pub fn check_wikidata_qid(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^Q\d+$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedExternalId(format!( - "not a valid Wikidata QID: '{}' (expected, eg, 'Q1234')", - raw - )) - .into()) - } -} - -pub fn check_doi(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^10.\d{3,6}/.+$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedExternalId(format!( - "not a valid DOI: '{}' (expected, eg, '10.1234/aksjdfh')", - raw - )) - .into()) - } -} - -pub fn check_issn(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedExternalId(format!( - "not a valid ISSN: '{}' (expected, eg, '1234-5678')", - raw - )) - .into()) - } -} - -pub fn check_orcid(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedExternalId(format!( - "not a valid ORCID: '{}' (expected, eg, '0123-4567-3456-6789')", - raw - )) - .into()) - } -} - -#[test] -fn test_check_orcid() { - assert!(check_orcid("0123-4567-3456-6789").is_ok()); - assert!(check_orcid("0123-4567-3456-678X").is_ok()); - assert!(check_orcid("01234567-3456-6780").is_err()); - assert!(check_orcid("0x23-4567-3456-6780").is_err()); -} - -pub fn check_md5(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^[a-f0-9]{32}$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedChecksum(format!( - "not a valid MD5: '{}' (expected lower-case hex, eg, '1b39813549077b2347c0f370c3864b40')", - raw - )) - .into()) - } -} - -#[test] -fn test_check_md5() { - assert!(check_md5("1b39813549077b2347c0f370c3864b40").is_ok()); - assert!(check_md5("1g39813549077b2347c0f370c3864b40").is_err()); - assert!(check_md5("1B39813549077B2347C0F370c3864b40").is_err()); - assert!(check_md5("1b39813549077b2347c0f370c3864b4").is_err()); - assert!(check_md5("1b39813549077b2347c0f370c3864b411").is_err()); -} - -pub fn check_sha1(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^[a-f0-9]{40}$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedChecksum(format!( - "not a valid SHA-1: '{}' (expected lower-case hex, eg, 'e9dd75237c94b209dc3ccd52722de6931a310ba3')", - raw - )) - .into()) - } -} - -#[test] -fn test_check_sha1() { - assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba3").is_ok()); - assert!(check_sha1("g9dd75237c94b209dc3ccd52722de6931a310ba3").is_err()); - assert!(check_sha1("e9DD75237C94B209DC3CCD52722de6931a310ba3").is_err()); - assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba").is_err()); - assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba33").is_err()); -} - -pub fn check_sha256(raw: &str) -> Result<()> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^[a-f0-9]{64}$").unwrap(); - } - if RE.is_match(raw) { - Ok(()) - } else { - Err(ErrorKind::MalformedChecksum(format!( - "not a valid SHA-256: '{}' (expected lower-case hex, eg, 'cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452')", - raw - )) - .into()) - } -} - -#[test] -fn test_check_sha256() { - assert!( - check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok() - ); - assert!( - check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err() - ); - assert!( - check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err() - ); - assert!( - check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err() - ); - assert!( - check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err() - ); -} - -pub fn check_release_type(raw: &str) -> Result<()> { - let valid_types = vec![ - // Citation Style Language official types - "article", - "article-magazine", - "article-newspaper", - "article-journal", - "bill", - "book", - "broadcast", - "chapter", - "dataset", - "entry", - "entry-dictionary", - "entry-encyclopedia", - "figure", - "graphic", - "interview", - "legislation", - "legal_case", - "manuscript", - "map", - "motion_picture", - "musical_score", - "pamphlet", - "paper-conference", - "patent", - "post", - "post-weblog", - "personal_communication", - "report", - "review", - "review-book", - "song", - "speech", - "thesis", - "treaty", - "webpage", - // fatcat-specific extensions - "peer_review", - "software", - "standard", - ]; - for good in valid_types { - if raw == good { - return Ok(()); - } - } - Err(ErrorKind::NotInControlledVocabulary(format!( - "not a valid release_type: '{}' (expected a CSL type, eg, 'article-journal', 'book')", - raw - )) - .into()) -} - -#[test] -fn test_check_release_type() { - assert!(check_release_type("book").is_ok()); - assert!(check_release_type("article-journal").is_ok()); - assert!(check_release_type("standard").is_ok()); - assert!(check_release_type("journal-article").is_err()); - assert!(check_release_type("BOOK").is_err()); - assert!(check_release_type("book ").is_err()); -} - -pub fn check_contrib_role(raw: &str) -> Result<()> { - let valid_types = vec![ - // Citation Style Language official role types - "author", - "collection-editor", - "composer", - "container-author", - "director", - "editor", - "editorial-director", - "editortranslator", - "illustrator", - "interviewer", - "original-author", - "recipient", - "reviewed-author", - "translator", - // common extension (for conference proceeding chair) - //"chair", - ]; - for good in valid_types { - if raw == good { - return Ok(()); - } - } - Err(ErrorKind::NotInControlledVocabulary(format!( - "not a valid contrib.role: '{}' (expected a CSL type, eg, 'author', 'editor')", - raw - )) - .into()) -} - -#[test] -fn test_check_contrib_role() { - assert!(check_contrib_role("author").is_ok()); - assert!(check_contrib_role("editor").is_ok()); - assert!(check_contrib_role("chair").is_err()); - assert!(check_contrib_role("EDITOR").is_err()); - assert!(check_contrib_role("editor ").is_err()); -} - -// TODO: make the above checks "more correct" -// TODO: check ISBN-13 -- cgit v1.2.3