diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-19 13:15:24 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-19 14:55:15 -0800 |
commit | 7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a (patch) | |
tree | af898a5374fcecb03d9c4783c3cd025c1237a37c /rust | |
parent | 40b5d40bed9e20e5c7a40d1741601a8317e078d8 (diff) | |
download | fatcat-7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a.tar.gz fatcat-7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a.zip |
update fatcatd rust code for 'oai' external identifier
Diffstat (limited to 'rust')
-rw-r--r-- | rust/src/endpoint_handlers.rs | 150 | ||||
-rw-r--r-- | rust/src/endpoints.rs | 2 | ||||
-rw-r--r-- | rust/src/entity_crud.rs | 10 | ||||
-rw-r--r-- | rust/src/identifiers.rs | 38 |
4 files changed, 189 insertions, 11 deletions
diff --git a/rust/src/endpoint_handlers.rs b/rust/src/endpoint_handlers.rs index 1b7bd0b6..91ea2393 100644 --- a/rust/src/endpoint_handlers.rs +++ b/rust/src/endpoint_handlers.rs @@ -263,6 +263,7 @@ impl Server { mag: &Option<String>, doaj: &Option<String>, dblp: &Option<String>, + oai: &Option<String>, expand_flags: ExpandFlags, hide_flags: HideFlags, ) -> Result<ReleaseEntity> { @@ -279,8 +280,9 @@ impl Server { mag, doaj, dblp, + oai, ) { - (Some(doi), None, None, None, None, None, None, None, None, None, None, None) => { + (Some(doi), None, None, None, None, None, None, None, None, None, None, None, None) => { // DOIs always stored lower-case; lookups are case-insensitive let doi = doi.to_lowercase(); check_doi(&doi)?; @@ -304,6 +306,7 @@ impl Server { None, None, None, + None, ) => { check_wikidata_qid(wikidata_qid)?; release_ident::table @@ -313,7 +316,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, Some(isbn13), None, None, None, None, None, None, None, None, None) => { + ( + None, + None, + Some(isbn13), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ) => { check_isbn13(isbn13)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -326,7 +343,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, Some(pmid), None, None, None, None, None, None, None, None) => { + ( + None, + None, + None, + Some(pmid), + None, + None, + None, + None, + None, + None, + None, + None, + None, + ) => { check_pmid(pmid)?; release_ident::table .inner_join(release_rev::table) @@ -335,7 +366,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, Some(pmcid), None, None, None, None, None, None, None) => { + ( + None, + None, + None, + None, + Some(pmcid), + None, + None, + None, + None, + None, + None, + None, + None, + ) => { check_pmcid(pmcid)?; release_ident::table .inner_join(release_rev::table) @@ -344,7 +389,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, Some(core), None, None, None, None, None, None) => { + ( + None, + None, + None, + None, + None, + Some(core), + None, + None, + None, + None, + None, + None, + None, + ) => { check_core_id(core)?; release_ident::table .inner_join(release_rev::table) @@ -353,7 +412,21 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, None, Some(arxiv), None, None, None, None, None) => { + ( + None, + None, + None, + None, + None, + None, + Some(arxiv), + None, + None, + None, + None, + None, + None, + ) => { // TODO: this allows only lookup by full, versioned arxiv identifier. Probably also // want to allow lookup by "work" style identifier? check_arxiv_id(arxiv)?; @@ -368,7 +441,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, Some(jstor), None, None, None, None) => { + ( + None, + None, + None, + None, + None, + None, + None, + Some(jstor), + None, + None, + None, + None, + None, + ) => { check_jstor_id(jstor)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -381,7 +468,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, Some(ark), None, None, None) => { + (None, None, None, None, None, None, None, None, Some(ark), None, None, None, None) => { check_ark_id(ark)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -394,7 +481,7 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, None, Some(mag), None, None) => { + (None, None, None, None, None, None, None, None, None, Some(mag), None, None, None) => { check_mag_id(mag)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -407,7 +494,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, None, None, Some(doaj), None) => { + ( + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(doaj), + None, + None, + ) => { check_doaj_id(doaj)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -420,7 +521,21 @@ impl Server { .first(conn)?; (ident, rev) } - (None, None, None, None, None, None, None, None, None, None, None, Some(dblp)) => { + ( + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(dblp), + None, + ) => { check_dblp_id(dblp)?; let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = release_rev::table @@ -433,6 +548,19 @@ impl Server { .first(conn)?; (ident, rev) } + (None, None, None, None, None, None, None, None, None, None, None, None, Some(oai)) => { + check_oai_id(oai)?; + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("oai".to_string())) + .filter(release_rev_extid::value.eq(oai)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) + } _ => { return Err( FatcatError::MissingOrMultipleExternalId("in lookup".to_string()).into(), diff --git a/rust/src/endpoints.rs b/rust/src/endpoints.rs index 7ac0a068..0dd232c6 100644 --- a/rust/src/endpoints.rs +++ b/rust/src/endpoints.rs @@ -739,6 +739,7 @@ impl Api for Server { mag: Option<String>, doaj: Option<String>, dblp: Option<String>, + oai: Option<String>, expand: Option<String>, hide: Option<String>, _context: &Context, @@ -768,6 +769,7 @@ impl Api for Server { &mag, &doaj, &dblp, + &oai, expand_flags, hide_flags, ) diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs index bac8c0fc..0d72788d 100644 --- a/rust/src/entity_crud.rs +++ b/rust/src/entity_crud.rs @@ -1746,6 +1746,7 @@ impl EntityCrud for ReleaseEntity { mag: None, doaj: None, dblp: None, + oai: None, }, refs: None, contribs: None, @@ -2024,6 +2025,7 @@ impl EntityCrud for ReleaseEntity { mag: None, doaj: None, dblp: None, + oai: None, }; let extid_rows: Vec<ReleaseExtidRow> = release_rev_extid::table @@ -2038,6 +2040,7 @@ impl EntityCrud for ReleaseEntity { "mag" => ext_ids.mag = Some(extid_row.value), "doaj" => ext_ids.doaj = Some(extid_row.value), "dblp" => ext_ids.dblp = Some(extid_row.value), + "oai" => ext_ids.oai = Some(extid_row.value), _ => (), } } @@ -2312,6 +2315,13 @@ impl EntityCrud for ReleaseEntity { value: extid.clone(), }); }; + if let Some(extid) = &model.ext_ids.oai { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "oai".to_string(), + value: extid.clone(), + }); + }; } for (model, rev_id) in models.iter().zip(rev_ids.iter()) { diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs index 22ffcc79..76f978f9 100644 --- a/rust/src/identifiers.rs +++ b/rust/src/identifiers.rs @@ -411,6 +411,44 @@ fn test_check_dblp_id() { assert!(check_dblp_id("").is_err()); } +pub fn check_oai_id(raw: &str) -> Result<()> { + lazy_static! { + // http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm + static ref RE: Regex = Regex::new(r"^oai:[a-zA-Z][a-zA-Z0-9\-]*(\.[a-zA-Z][a-zA-Z0-9\-]*)+:[a-zA-Z0-9\-_\.!~\*'\(\);/\?:@&=\+$,%]+$").unwrap(); + } + if raw.is_ascii() && RE.is_match(raw) { + Ok(()) + } else { + Err(FatcatError::MalformedChecksum( + "OAI-PMH identifier (expected, eg, 'oai:foo.org:some-local-id-54')".to_string(), + raw.to_string(), + ))? + } +} + +#[test] +fn test_check_oai_id() { + assert!(check_oai_id("journals/entcs/GoubaultM12").is_err()); + assert!(check_oai_id("10.123*").is_err()); + assert!(check_oai_id("").is_err()); + assert!(check_oai_id("something:arXiv.org:hep-th/9901001").is_err()); // bad schema + assert!(check_oai_id("oai:999:abc123").is_err()); // namespace-identifier must not start with digit + assert!(check_oai_id("oai:wibble:abc123").is_err()); // namespace-identifier must be domain name + assert!(check_oai_id("oai:wibble.org:ab cd").is_err()); // space not permitted (must be escaped as %20) + assert!(check_oai_id("oai:wibble.org:ab#cd").is_err()); // # not permitted + assert!(check_oai_id("oai:wibble.org:ab<cd").is_err()); // < not permitted + // the "official" regex used above allows this case + //assert!(check_oai_id("oai:wibble.org:ab%3ccd").is_err()); // < must be escaped at %3C not %3c + + assert!(check_oai_id("oai:arXiv.org:hep-th/9901001").is_ok()); + assert!(check_oai_id("oai:foo.org:some-local-id-53").is_ok()); + assert!(check_oai_id("oai:FOO.ORG:some-local-id-53").is_ok()); + assert!(check_oai_id("oai:foo.org:some-local-id-54").is_ok()); + assert!(check_oai_id("oai:foo.org:Some-Local-Id-54").is_ok()); + assert!(check_oai_id("oai:wibble.org:ab%20cd").is_ok()); + assert!(check_oai_id("oai:wibble.org:ab?cd").is_ok()); +} + pub fn check_issn(raw: &str) -> Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); |