aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-09 18:12:39 -0800
committerBryan Newbold <bnewbold@robocracy.org>2021-11-09 18:49:46 -0800
commit2fd90ad2cc561fa743a617315824b2744f737575 (patch)
tree0c8e80351d772a4d25953f0e3345e7168f5d206c
parentba7f9214d2038882952eb50cd4dc5eff4eb0e6ff (diff)
downloadfatcat-2fd90ad2cc561fa743a617315824b2744f737575.tar.gz
fatcat-2fd90ad2cc561fa743a617315824b2744f737575.zip
clean_doi: stop mutating double-slash DOIs, except for 10.1037 prefix
-rw-r--r--python/fatcat_tools/normal.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index 34e5c3d1..0d2c84ce 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -47,7 +47,7 @@ def clean_doi(raw: Optional[str]) -> Optional[str]:
raw = raw[8:]
if raw.startswith("dx.doi.org/"):
raw = raw[11:]
- if raw[7:9] == "//":
+ if raw[7:9] == "//" and "10.1037//" in raw:
raw = raw[:8] + raw[9:]
# fatcatd uses same REGEX, but Rust regex rejects these characters, while
@@ -74,6 +74,7 @@ def test_clean_doi() -> None:
assert clean_doi("10.1234/asdf ") == "10.1234/asdf"
assert clean_doi("10.1037//0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50"
assert clean_doi("10.1037/0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50"
+ assert clean_doi("10.1026//1616-1041.3.2.86") == "10.1026//1616-1041.3.2.86"
assert clean_doi("10.23750/abm.v88i2 -s.6506") is None
assert clean_doi("10.17167/mksz.2017.2.129–155") is None
assert clean_doi("http://doi.org/10.1234/asdf ") == "10.1234/asdf"