From 7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 19 Nov 2020 13:15:24 -0800 Subject: update fatcatd rust code for 'oai' external identifier --- rust/src/identifiers.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'rust/src/identifiers.rs') diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs index 22ffcc79..76f978f9 100644 --- a/rust/src/identifiers.rs +++ b/rust/src/identifiers.rs @@ -411,6 +411,44 @@ fn test_check_dblp_id() { assert!(check_dblp_id("").is_err()); } +pub fn check_oai_id(raw: &str) -> Result<()> { + lazy_static! { + // http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm + static ref RE: Regex = Regex::new(r"^oai:[a-zA-Z][a-zA-Z0-9\-]*(\.[a-zA-Z][a-zA-Z0-9\-]*)+:[a-zA-Z0-9\-_\.!~\*'\(\);/\?:@&=\+$,%]+$").unwrap(); + } + if raw.is_ascii() && RE.is_match(raw) { + Ok(()) + } else { + Err(FatcatError::MalformedChecksum( + "OAI-PMH identifier (expected, eg, 'oai:foo.org:some-local-id-54')".to_string(), + raw.to_string(), + ))? + } +} + +#[test] +fn test_check_oai_id() { + assert!(check_oai_id("journals/entcs/GoubaultM12").is_err()); + assert!(check_oai_id("10.123*").is_err()); + assert!(check_oai_id("").is_err()); + assert!(check_oai_id("something:arXiv.org:hep-th/9901001").is_err()); // bad schema + assert!(check_oai_id("oai:999:abc123").is_err()); // namespace-identifier must not start with digit + assert!(check_oai_id("oai:wibble:abc123").is_err()); // namespace-identifier must be domain name + assert!(check_oai_id("oai:wibble.org:ab cd").is_err()); // space not permitted (must be escaped as %20) + assert!(check_oai_id("oai:wibble.org:ab#cd").is_err()); // # not permitted + assert!(check_oai_id("oai:wibble.org:ab Result<()> { lazy_static! { static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap(); -- cgit v1.2.3