From 0fb0c3759a04c025800e3175fb4cbd8d595f8c4b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 17 Nov 2020 15:48:33 -0800 Subject: rust: fatcatd changes for DOAJ+dblp identifiers --- rust/src/endpoint_handlers.rs | 63 ++++++++++++++++++++++++++++++++++++------- rust/src/endpoints.rs | 4 +++ rust/src/entity_crud.rs | 20 ++++++++++++++ rust/src/identifiers.rs | 49 +++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 10 deletions(-) (limited to 'rust/src') diff --git a/rust/src/endpoint_handlers.rs b/rust/src/endpoint_handlers.rs index 64b6ed62..cc717344 100644 --- a/rust/src/endpoint_handlers.rs +++ b/rust/src/endpoint_handlers.rs @@ -262,6 +262,8 @@ impl Server { jstor: &Option, ark: &Option, mag: &Option, + doaj: &Option, + dblp: &Option, expand_flags: ExpandFlags, hide_flags: HideFlags, ) -> Result { @@ -276,8 +278,10 @@ impl Server { jstor, ark, mag, + doaj, + dblp, ) { - (Some(doi), None, None, None, None, None, None, None, None, None) => { + (Some(doi), None, None, None, None, None, None, None, None, None, None, None) => { // DOIs always stored lower-case; lookups are case-insensitive let doi = doi.to_lowercase(); check_doi(&doi)?; @@ -288,7 +292,20 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, Some(wikidata_qid), None, None, None, None, None, None, None, None) => { + ( + None, + Some(wikidata_qid), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ) => { check_wikidata_qid(wikidata_qid)?; release_ident::table .inner_join(release_rev::table) @@ -297,7 +314,7 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, Some(isbn13), None, None, None, None, None, None, None) => { + (None, None, Some(isbn13), None, None, None, None, None, None, None, None, None) => { check_isbn13(isbn13)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -310,7 +327,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, Some(pmid), None, None, None, None, None, None) => { + (None, None, None, Some(pmid), None, None, None, None, None, None, None, None) => { check_pmid(pmid)?; release_ident::table .inner_join(release_rev::table) @@ -319,7 +336,7 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, Some(pmcid), None, None, None, None, None) => { + (None, None, None, None, Some(pmcid), None, None, None, None, None, None, None) => { check_pmcid(pmcid)?; release_ident::table .inner_join(release_rev::table) @@ -328,7 +345,7 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, Some(core), None, None, None, None) => { + (None, None, None, None, None, Some(core), None, None, None, None, None, None) => { check_core_id(core)?; release_ident::table .inner_join(release_rev::table) @@ -337,7 +354,7 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, None, Some(arxiv), None, None, None) => { + (None, None, None, None, None, None, Some(arxiv), None, None, None, None, None) => { // TODO: this allows only lookup by full, versioned arxiv identifier. Probably also // want to allow lookup by "work" style identifier? check_arxiv_id(arxiv)?; @@ -352,7 +369,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, Some(jstor), None, None) => { + (None, None, None, None, None, None, None, Some(jstor), None, None, None, None) => { check_jstor_id(jstor)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -365,7 +382,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, Some(ark), None) => { + (None, None, None, None, None, None, None, None, Some(ark), None, None, None) => { check_ark_id(ark)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -378,7 +395,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, None, Some(mag)) => { + (None, None, None, None, None, None, None, None, None, Some(mag), None, None) => { check_mag_id(mag)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -391,6 +408,32 @@ impl Server { .first(conn)?; (ident, rev) } + (None, None, None, None, None, None, None, None, None, None, Some(doaj), None) => { + check_doaj_id(doaj)?; + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("doaj".to_string())) + .filter(release_rev_extid::value.eq(doaj)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) + } + (None, None, None, None, None, None, None, None, None, None, None, Some(dblp)) => { + check_dblp_id(dblp)?; + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("dblp".to_string())) + .filter(release_rev_extid::value.eq(dblp)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) + } _ => { return Err( FatcatError::MissingOrMultipleExternalId("in lookup".to_string()).into(), diff --git a/rust/src/endpoints.rs b/rust/src/endpoints.rs index 0dd69efd..fda4688c 100644 --- a/rust/src/endpoints.rs +++ b/rust/src/endpoints.rs @@ -689,6 +689,8 @@ impl Api for Server { jstor: Option, ark: Option, mag: Option, + doaj: Option, + dblp: Option, expand: Option, hide: Option, _context: &Context, @@ -716,6 +718,8 @@ impl Api for Server { &jstor, &ark, &mag, + &doaj, + &dblp, expand_flags, hide_flags, ) diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs index 83dd26c9..89ee83bf 100644 --- a/rust/src/entity_crud.rs +++ b/rust/src/entity_crud.rs @@ -1742,6 +1742,8 @@ impl EntityCrud for ReleaseEntity { jstor: None, ark: None, mag: None, + doaj: None, + dblp: None, }, refs: None, contribs: None, @@ -2018,6 +2020,8 @@ impl EntityCrud for ReleaseEntity { jstor: None, ark: None, mag: None, + doaj: None, + dblp: None, }; let extid_rows: Vec = release_rev_extid::table @@ -2030,6 +2034,8 @@ impl EntityCrud for ReleaseEntity { "jstor" => ext_ids.jstor = Some(extid_row.value), "ark" => ext_ids.ark = Some(extid_row.value), "mag" => ext_ids.mag = Some(extid_row.value), + "doaj" => ext_ids.doaj = Some(extid_row.value), + "dblp" => ext_ids.dblp = Some(extid_row.value), _ => (), } } @@ -2290,6 +2296,20 @@ impl EntityCrud for ReleaseEntity { value: extid.clone(), }); }; + if let Some(extid) = &model.ext_ids.doaj { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "doaj".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.dblp { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "dblp".to_string(), + value: extid.clone(), + }); + }; } for (model, rev_id) in models.iter().zip(rev_ids.iter()) { diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs index 180dc43b..22ffcc79 100644 --- a/rust/src/identifiers.rs +++ b/rust/src/identifiers.rs @@ -362,6 +362,55 @@ fn test_check_isbn13() { assert!(check_isbn13("9781566199094").is_err()); } +pub fn check_doaj_id(raw: &str) -> Result<()> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[a-f0-9]{32}$").unwrap(); + } + if raw.is_ascii() && RE.is_match(raw) { + Ok(()) + } else { + Err(FatcatError::MalformedChecksum( + "DOAJ Article Identifier (expected, eg, 'e58f08a11ecb495ead55a44ad4f89808')" + .to_string(), + raw.to_string(), + ))? + } +} + +#[test] +fn test_check_doaj_id() { + assert!(check_doaj_id("e58f08a11ecb495ead55a44ad4f89808").is_ok()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b40").is_ok()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b40 ").is_err()); + assert!(check_doaj_id("1g39813549077b2347c0f370c3864b40").is_err()); + assert!(check_doaj_id("1B39813549077B2347C0F370c3864b40").is_err()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b4").is_err()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b411").is_err()); +} + +pub fn check_dblp_id(raw: &str) -> Result<()> { + lazy_static! { + // TODO: what should this actually be? more or less restrictive? + static ref RE: Regex = Regex::new(r"^[a-z]+/[a-zA-Z0-9]+/[a-zA-Z0-9/]+$").unwrap(); + } + if raw.is_ascii() && RE.is_match(raw) { + Ok(()) + } else { + Err(FatcatError::MalformedChecksum( + "dblp Article Key (expected, eg, 'journals/entcs/GoubaultM12')".to_string(), + raw.to_string(), + ))? + } +} + +#[test] +fn test_check_dblp_id() { + assert!(check_dblp_id("journals/entcs/GoubaultM12").is_ok()); + assert!(check_dblp_id("journals/entcs/GoubaultM12").is_ok()); + assert!(check_dblp_id("10.123*").is_err()); + assert!(check_dblp_id("").is_err()); +} + pub fn check_issn(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); -- cgit v1.2.3