From 4b8cb5ac53beec85d6d4b620518eed8ccb891bc2 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 13 Oct 2021 15:53:46 -0700 Subject: rust: expand valid hdl identifiers Based on real examples found on dblp.org --- rust/src/identifiers.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'rust/src') diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs index 3f5fc2cf..e9baf7b8 100644 --- a/rust/src/identifiers.rs +++ b/rust/src/identifiers.rs @@ -461,9 +461,9 @@ pub fn check_hdl(raw: &str) -> Result<()> { // currently strict about only allowing a fixed set of prefixes // should explicitly not allow DOIs, even though DOIs are themselves handles lazy_static! { - static ref RE: Regex = Regex::new(r"^(20|11|21|84).\d{1,6}(.\d{1,6})?/\S+$").unwrap(); + static ref RE: Regex = Regex::new(r"^\d+(\.\d+)*/\S+$").unwrap(); } - if raw.is_ascii() && RE.is_match(raw) { + if raw.is_ascii() && RE.is_match(raw) && !raw.starts_with("10.") { Ok(()) } else { Err(FatcatError::MalformedExternalId( @@ -483,9 +483,11 @@ fn test_check_hdl() { assert!(check_hdl("11.1234/aksjdfh").is_ok()); assert!(check_hdl("20.500.23456/ABC/trs12").is_ok()); assert!(check_hdl("20.500/ABC/trs12").is_ok()); + assert!(check_hdl("2381/12775").is_ok()); + assert!(check_hdl("2027/spo.bbp2372.1999.394").is_ok()); + assert!(check_hdl("0.1234/aksjdfh").is_ok()); assert!(check_hdl("10.1234/aksjdfh").is_err()); - assert!(check_hdl("0.1234/aksjdfh").is_err()); assert!(check_hdl("20.1234/ßs").is_err()); assert!(check_hdl("20.1234/aksjdfh ").is_err()); assert!(check_hdl("20.1234/ak sjdfh").is_err()); @@ -495,6 +497,8 @@ fn test_check_hdl() { assert!(check_hdl("20.1234/\naksjdfh").is_err()); assert!(check_hdl("20.1234").is_err()); assert!(check_hdl("20.1234/").is_err()); + assert!(check_hdl("20./asdf").is_err()); + assert!(check_hdl(".123/asdf").is_err()); } pub fn check_issn(raw: &str) -> Result<()> { -- cgit v1.2.3