diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-09 18:12:39 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-09 18:49:46 -0800 |
commit | 2fd90ad2cc561fa743a617315824b2744f737575 (patch) | |
tree | 0c8e80351d772a4d25953f0e3345e7168f5d206c /python/fatcat_tools | |
parent | ba7f9214d2038882952eb50cd4dc5eff4eb0e6ff (diff) | |
download | fatcat-2fd90ad2cc561fa743a617315824b2744f737575.tar.gz fatcat-2fd90ad2cc561fa743a617315824b2744f737575.zip |
clean_doi: stop mutating double-slash DOIs, except for 10.1037 prefix
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/normal.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py index 34e5c3d1..0d2c84ce 100644 --- a/python/fatcat_tools/normal.py +++ b/python/fatcat_tools/normal.py @@ -47,7 +47,7 @@ def clean_doi(raw: Optional[str]) -> Optional[str]: raw = raw[8:] if raw.startswith("dx.doi.org/"): raw = raw[11:] - if raw[7:9] == "//": + if raw[7:9] == "//" and "10.1037//" in raw: raw = raw[:8] + raw[9:] # fatcatd uses same REGEX, but Rust regex rejects these characters, while @@ -74,6 +74,7 @@ def test_clean_doi() -> None: assert clean_doi("10.1234/asdf ") == "10.1234/asdf" assert clean_doi("10.1037//0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50" assert clean_doi("10.1037/0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50" + assert clean_doi("10.1026//1616-1041.3.2.86") == "10.1026//1616-1041.3.2.86" assert clean_doi("10.23750/abm.v88i2 -s.6506") is None assert clean_doi("10.17167/mksz.2017.2.129–155") is None assert clean_doi("http://doi.org/10.1234/asdf ") == "10.1234/asdf" |