figshare fix

author: Martin Czygan <martin.czygan@gmail.com> 2020-11-26 11:20:35 +0100
committer: Martin Czygan <martin.czygan@gmail.com> 2020-11-26 11:20:35 +0100
commit: 47761357e341b7355c98778ec1665ae73e5d6fe3 (patch)
tree: 14c0af2f9a6cc52bf8fd784d1ad8ab477c9eaef3
parent: 67589a0bc9217f3259f0f093b5283b9e92828d0f (diff)
download: fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.tar.gz
fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.zip
3 files changed, 12 insertions, 10 deletions
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
index 1cac668..4d1325d 100644
--- a/fuzzycat/utils.py
+++ b/fuzzycat/utils.py
@@ -1,13 +1,14 @@
 import io
 import itertools
-import string
 import re
+import string
 
 printable_no_punct = string.digits + string.ascii_letters + string.whitespace
 
 # More correct: https://www.johndcook.com/blog/2016/02/04/regular-expression-to-match-a-chemical-element/
 CHEM_FORMULA = re.compile(r"([A-Z]{1,2}[0-9]{1,2})+")
 
+
 def slugify_string(s: str) -> str:
     """
     Keeps ascii chars and single whitespace only.
@@ -89,4 +90,3 @@ def contains_chemical_formula(s):
     for token in s.split():
         if CHEM_FORMULA.search(token):
             return True
-
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index d111871..d7b2d62 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -138,7 +138,8 @@ def compare(a, b):
     """
     Compare two entities, return match status and reason.
     """
-    if a.get("doi") and b.get("doi") and a.get("doi") == b.get("doi"):
+    if a.get("ext_ids", {}).get("doi") and b.get("ext_ids", {}).get("doi") and a.get(
+            "ext_ids", {}).get("doi") == b.get("ext_ids", {}).get("doi"):
         return (Status.EXACT, OK.DOI)
     if len(a.get("title", "")) < 5:
         return (Status.AMBIGUOUS, Miss.SHORT_TITLE)
@@ -157,10 +158,11 @@ def compare(a, b):
 
     # TODO: figshare versions, "xxx.v1"
     FIGSHARE_PREFIX = "10.6084"
-    if a.get("doi") and b.get("doi") and a.get("doi").startswith(FIGSHARE_PREFIX + "/") and b.get(
-            "doi").startswith(FIGSHARE_PREFIX + "/"):
-        a_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("doi"))
-        b_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("doi"))
+    if a.get("ext_ids", {}).get("doi") and b.get("ext_ids", {}).get("doi") and a.get(
+            "ext_ids", {}).get("doi").startswith(FIGSHARE_PREFIX + "/") and b.get(
+                "ext_ids", {}).get("doi").startswith(FIGSHARE_PREFIX + "/"):
+        a_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("ext_ids", {}).get("doi", ""))
+        b_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("ext_ids", {}).get("doi", ""))
         if a_doi_v_stripped == b_doi_v_stripped:
             return (Status.STRONG, OK.FIGSHARE_VERSION)
 
@@ -3542,4 +3544,3 @@ TITLE_BLACKLIST = set([
     "週刊ダイヤモンド = diamond weekly 69(1)",
     "週刊ダイヤモンド = diamond weekly 別冊",
 ])
-
diff --git a/tests/test_verify.py b/tests/test_verify.py
index ebbb490..0c04d53 100644
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@@ -41,11 +41,12 @@ def test_compare():
                 pytest.fail("invalid test file, maybe missing a comma? {}".format(exc))
             status, reason = compare(load_release_ident(a), load_release_ident(b))
             if not expected_status or expected_status.lower() == "todo":
-                logger.warn(
+                logger.warning(
                     "skipping test {base}/release/{a} {base}/release/{b} -- no result defined (we think {status}, {reason})"
                     .format(a=a, b=b, base=FATCAT_BASE_URL, status=status, reason=reason))
             assert status == status, "status: want {}, got {} for {} {}".format(
                 expected_status, status, a, b)
             if expected_reason:
-                assert expected_reason.lower() == reason.lower(), "reason [{} {}]: want {}, got {}".format(a, b, expected_reason, reason)
+                assert expected_reason.lower() == reason.lower(
+                ), "reason [{} {}]: want {}, got {}".format(a, b, expected_reason, reason)
         logger.info("ran verification over {} cases (https://git.io/JkDgS)".format(i))
author	Martin Czygan <martin.czygan@gmail.com>	2020-11-26 11:20:35 +0100
committer	Martin Czygan <martin.czygan@gmail.com>	2020-11-26 11:20:35 +0100
commit	47761357e341b7355c98778ec1665ae73e5d6fe3 (patch)
tree	14c0af2f9a6cc52bf8fd784d1ad8ab477c9eaef3
parent	67589a0bc9217f3259f0f093b5283b9e92828d0f (diff)
download	fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.tar.gz fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.zip