From 0fb0c3759a04c025800e3175fb4cbd8d595f8c4b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 17 Nov 2020 15:48:33 -0800 Subject: rust: fatcatd changes for DOAJ+dblp identifiers --- rust/src/identifiers.rs | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'rust/src/identifiers.rs') diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs index 180dc43b..22ffcc79 100644 --- a/rust/src/identifiers.rs +++ b/rust/src/identifiers.rs @@ -362,6 +362,55 @@ fn test_check_isbn13() { assert!(check_isbn13("9781566199094").is_err()); } +pub fn check_doaj_id(raw: &str) -> Result<()> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[a-f0-9]{32}$").unwrap(); + } + if raw.is_ascii() && RE.is_match(raw) { + Ok(()) + } else { + Err(FatcatError::MalformedChecksum( + "DOAJ Article Identifier (expected, eg, 'e58f08a11ecb495ead55a44ad4f89808')" + .to_string(), + raw.to_string(), + ))? + } +} + +#[test] +fn test_check_doaj_id() { + assert!(check_doaj_id("e58f08a11ecb495ead55a44ad4f89808").is_ok()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b40").is_ok()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b40 ").is_err()); + assert!(check_doaj_id("1g39813549077b2347c0f370c3864b40").is_err()); + assert!(check_doaj_id("1B39813549077B2347C0F370c3864b40").is_err()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b4").is_err()); + assert!(check_doaj_id("1b39813549077b2347c0f370c3864b411").is_err()); +} + +pub fn check_dblp_id(raw: &str) -> Result<()> { + lazy_static! { + // TODO: what should this actually be? more or less restrictive? + static ref RE: Regex = Regex::new(r"^[a-z]+/[a-zA-Z0-9]+/[a-zA-Z0-9/]+$").unwrap(); + } + if raw.is_ascii() && RE.is_match(raw) { + Ok(()) + } else { + Err(FatcatError::MalformedChecksum( + "dblp Article Key (expected, eg, 'journals/entcs/GoubaultM12')".to_string(), + raw.to_string(), + ))? + } +} + +#[test] +fn test_check_dblp_id() { + assert!(check_dblp_id("journals/entcs/GoubaultM12").is_ok()); + assert!(check_dblp_id("journals/entcs/GoubaultM12").is_ok()); + assert!(check_dblp_id("10.123*").is_err()); + assert!(check_dblp_id("").is_err()); +} + pub fn check_issn(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); -- cgit v1.2.3 From 7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 19 Nov 2020 13:15:24 -0800 Subject: update fatcatd rust code for 'oai' external identifier --- rust/src/endpoint_handlers.rs | 150 ++++++++++++++++++++++++++++++++++++++---- rust/src/endpoints.rs | 2 + rust/src/entity_crud.rs | 10 +++ rust/src/identifiers.rs | 38 +++++++++++ 4 files changed, 189 insertions(+), 11 deletions(-) (limited to 'rust/src/identifiers.rs') diff --git a/rust/src/endpoint_handlers.rs b/rust/src/endpoint_handlers.rs index 1b7bd0b6..91ea2393 100644 --- a/rust/src/endpoint_handlers.rs +++ b/rust/src/endpoint_handlers.rs @@ -263,6 +263,7 @@ impl Server { mag: &Option, doaj: &Option, dblp: &Option, + oai: &Option, expand_flags: ExpandFlags, hide_flags: HideFlags, ) -> Result { @@ -279,8 +280,9 @@ impl Server { mag, doaj, dblp, + oai, ) { - (Some(doi), None, None, None, None, None, None, None, None, None, None, None) => { + (Some(doi), None, None, None, None, None, None, None, None, None, None, None, None) => { // DOIs always stored lower-case; lookups are case-insensitive let doi = doi.to_lowercase(); check_doi(&doi)?; @@ -304,6 +306,7 @@ impl Server { None, None, None, + None, ) => { check_wikidata_qid(wikidata_qid)?; release_ident::table @@ -313,7 +316,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, Some(isbn13), None, None, None, None, None, None, None, None, None) => { + ( + None, + None, + Some(isbn13), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ) => { check_isbn13(isbn13)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -326,7 +343,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, Some(pmid), None, None, None, None, None, None, None, None) => { + ( + None, + None, + None, + Some(pmid), + None, + None, + None, + None, + None, + None, + None, + None, + None, + ) => { check_pmid(pmid)?; release_ident::table .inner_join(release_rev::table) @@ -335,7 +366,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, Some(pmcid), None, None, None, None, None, None, None) => { + ( + None, + None, + None, + None, + Some(pmcid), + None, + None, + None, + None, + None, + None, + None, + None, + ) => { check_pmcid(pmcid)?; release_ident::table .inner_join(release_rev::table) @@ -344,7 +389,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, Some(core), None, None, None, None, None, None) => { + ( + None, + None, + None, + None, + None, + Some(core), + None, + None, + None, + None, + None, + None, + None, + ) => { check_core_id(core)?; release_ident::table .inner_join(release_rev::table) @@ -353,7 +412,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, None, Some(arxiv), None, None, None, None, None) => { + ( + None, + None, + None, + None, + None, + None, + Some(arxiv), + None, + None, + None, + None, + None, + None, + ) => { // TODO: this allows only lookup by full, versioned arxiv identifier. Probably also // want to allow lookup by "work" style identifier? check_arxiv_id(arxiv)?; @@ -368,7 +441,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, Some(jstor), None, None, None, None) => { + ( + None, + None, + None, + None, + None, + None, + None, + Some(jstor), + None, + None, + None, + None, + None, + ) => { check_jstor_id(jstor)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -381,7 +468,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, Some(ark), None, None, None) => { + (None, None, None, None, None, None, None, None, Some(ark), None, None, None, None) => { check_ark_id(ark)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -394,7 +481,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, None, Some(mag), None, None) => { + (None, None, None, None, None, None, None, None, None, Some(mag), None, None, None) => { check_mag_id(mag)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -407,7 +494,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, None, None, Some(doaj), None) => { + ( + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(doaj), + None, + None, + ) => { check_doaj_id(doaj)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -420,7 +521,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, None, None, None, Some(dblp)) => { + ( + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(dblp), + None, + ) => { check_dblp_id(dblp)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -433,6 +548,19 @@ impl Server { .first(conn)?; (ident, rev) } + (None, None, None, None, None, None, None, None, None, None, None, None, Some(oai)) => { + check_oai_id(oai)?; + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("oai".to_string())) + .filter(release_rev_extid::value.eq(oai)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) + } _ => { return Err( FatcatError::MissingOrMultipleExternalId("in lookup".to_string()).into(), diff --git a/rust/src/endpoints.rs b/rust/src/endpoints.rs index 7ac0a068..0dd232c6 100644 --- a/rust/src/endpoints.rs +++ b/rust/src/endpoints.rs @@ -739,6 +739,7 @@ impl Api for Server { mag: Option, doaj: Option, dblp: Option, + oai: Option, expand: Option, hide: Option, _context: &Context, @@ -768,6 +769,7 @@ impl Api for Server { &mag, &doaj, &dblp, + &oai, expand_flags, hide_flags, ) diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs index bac8c0fc..0d72788d 100644 --- a/rust/src/entity_crud.rs +++ b/rust/src/entity_crud.rs @@ -1746,6 +1746,7 @@ impl EntityCrud for ReleaseEntity { mag: None, doaj: None, dblp: None, + oai: None, }, refs: None, contribs: None, @@ -2024,6 +2025,7 @@ impl EntityCrud for ReleaseEntity { mag: None, doaj: None, dblp: None, + oai: None, }; let extid_rows: Vec = release_rev_extid::table @@ -2038,6 +2040,7 @@ impl EntityCrud for ReleaseEntity { "mag" => ext_ids.mag = Some(extid_row.value), "doaj" => ext_ids.doaj = Some(extid_row.value), "dblp" => ext_ids.dblp = Some(extid_row.value), + "oai" => ext_ids.oai = Some(extid_row.value), _ => (), } } @@ -2312,6 +2315,13 @@ impl EntityCrud for ReleaseEntity { value: extid.clone(), }); }; + if let Some(extid) = &model.ext_ids.oai { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "oai".to_string(), + value: extid.clone(), + }); + }; } for (model, rev_id) in models.iter().zip(rev_ids.iter()) { diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs index 22ffcc79..76f978f9 100644 --- a/rust/src/identifiers.rs +++ b/rust/src/identifiers.rs @@ -411,6 +411,44 @@ fn test_check_dblp_id() { assert!(check_dblp_id("").is_err()); } +pub fn check_oai_id(raw: &str) -> Result<()> { + lazy_static! { + // http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm + static ref RE: Regex = Regex::new(r"^oai:[a-zA-Z][a-zA-Z0-9\-]*(\.[a-zA-Z][a-zA-Z0-9\-]*)+:[a-zA-Z0-9\-_\.!~\*'\(\);/\?:@&=\+$,%]+$").unwrap(); + } + if raw.is_ascii() && RE.is_match(raw) { + Ok(()) + } else { + Err(FatcatError::MalformedChecksum( + "OAI-PMH identifier (expected, eg, 'oai:foo.org:some-local-id-54')".to_string(), + raw.to_string(), + ))? + } +} + +#[test] +fn test_check_oai_id() { + assert!(check_oai_id("journals/entcs/GoubaultM12").is_err()); + assert!(check_oai_id("10.123*").is_err()); + assert!(check_oai_id("").is_err()); + assert!(check_oai_id("something:arXiv.org:hep-th/9901001").is_err()); // bad schema + assert!(check_oai_id("oai:999:abc123").is_err()); // namespace-identifier must not start with digit + assert!(check_oai_id("oai:wibble:abc123").is_err()); // namespace-identifier must be domain name + assert!(check_oai_id("oai:wibble.org:ab cd").is_err()); // space not permitted (must be escaped as %20) + assert!(check_oai_id("oai:wibble.org:ab#cd").is_err()); // # not permitted + assert!(check_oai_id("oai:wibble.org:ab Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); -- cgit v1.2.3