diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-13 15:53:46 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-13 16:21:31 -0700 |
commit | 4b8cb5ac53beec85d6d4b620518eed8ccb891bc2 (patch) | |
tree | 595bd4c3edb9f34d92d3c16b3d55ce160f5bc7af /rust/src | |
parent | 2d755c83895271ad214dcefc234bf7da36e572e3 (diff) | |
download | fatcat-4b8cb5ac53beec85d6d4b620518eed8ccb891bc2.tar.gz fatcat-4b8cb5ac53beec85d6d4b620518eed8ccb891bc2.zip |
rust: expand valid hdl identifiers
Based on real examples found on dblp.org
Diffstat (limited to 'rust/src')
-rw-r--r-- | rust/src/identifiers.rs | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs index 3f5fc2cf..e9baf7b8 100644 --- a/rust/src/identifiers.rs +++ b/rust/src/identifiers.rs @@ -461,9 +461,9 @@ pub fn check_hdl(raw: &str) -> Result<()> { // currently strict about only allowing a fixed set of prefixes // should explicitly not allow DOIs, even though DOIs are themselves handles lazy_static! { - static ref RE: Regex = Regex::new(r"^(20|11|21|84).\d{1,6}(.\d{1,6})?/\S+$").unwrap(); + static ref RE: Regex = Regex::new(r"^\d+(\.\d+)*/\S+$").unwrap(); } - if raw.is_ascii() && RE.is_match(raw) { + if raw.is_ascii() && RE.is_match(raw) && !raw.starts_with("10.") { Ok(()) } else { Err(FatcatError::MalformedExternalId( @@ -483,9 +483,11 @@ fn test_check_hdl() { assert!(check_hdl("11.1234/aksjdfh").is_ok()); assert!(check_hdl("20.500.23456/ABC/trs12").is_ok()); assert!(check_hdl("20.500/ABC/trs12").is_ok()); + assert!(check_hdl("2381/12775").is_ok()); + assert!(check_hdl("2027/spo.bbp2372.1999.394").is_ok()); + assert!(check_hdl("0.1234/aksjdfh").is_ok()); assert!(check_hdl("10.1234/aksjdfh").is_err()); - assert!(check_hdl("0.1234/aksjdfh").is_err()); assert!(check_hdl("20.1234/ßs").is_err()); assert!(check_hdl("20.1234/aksjdfh ").is_err()); assert!(check_hdl("20.1234/ak sjdfh").is_err()); @@ -495,6 +497,8 @@ fn test_check_hdl() { assert!(check_hdl("20.1234/\naksjdfh").is_err()); assert!(check_hdl("20.1234").is_err()); assert!(check_hdl("20.1234/").is_err()); + assert!(check_hdl("20./asdf").is_err()); + assert!(check_hdl(".123/asdf").is_err()); } pub fn check_issn(raw: &str) -> Result<()> { |