figshare fix

author: Martin Czygan <martin.czygan@gmail.com> 2020-11-26 11:20:35 +0100
committer: Martin Czygan <martin.czygan@gmail.com> 2020-11-26 11:20:35 +0100
commit: 47761357e341b7355c98778ec1665ae73e5d6fe3 (patch)
tree: 14c0af2f9a6cc52bf8fd784d1ad8ab477c9eaef3 /fuzzycat
parent: 67589a0bc9217f3259f0f093b5283b9e92828d0f (diff)
download: fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.tar.gz
fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.zip
2 files changed, 9 insertions, 8 deletions
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
index 1cac668..4d1325d 100644
--- a/fuzzycat/utils.py
+++ b/fuzzycat/utils.py
@@ -1,13 +1,14 @@
 import io
 import itertools
-import string
 import re
+import string
 
 printable_no_punct = string.digits + string.ascii_letters + string.whitespace
 
 # More correct: https://www.johndcook.com/blog/2016/02/04/regular-expression-to-match-a-chemical-element/
 CHEM_FORMULA = re.compile(r"([A-Z]{1,2}[0-9]{1,2})+")
 
+
 def slugify_string(s: str) -> str:
     """
     Keeps ascii chars and single whitespace only.
@@ -89,4 +90,3 @@ def contains_chemical_formula(s):
     for token in s.split():
         if CHEM_FORMULA.search(token):
             return True
-
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index d111871..d7b2d62 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -138,7 +138,8 @@ def compare(a, b):
     """
     Compare two entities, return match status and reason.
     """
-    if a.get("doi") and b.get("doi") and a.get("doi") == b.get("doi"):
+    if a.get("ext_ids", {}).get("doi") and b.get("ext_ids", {}).get("doi") and a.get(
+            "ext_ids", {}).get("doi") == b.get("ext_ids", {}).get("doi"):
         return (Status.EXACT, OK.DOI)
     if len(a.get("title", "")) < 5:
         return (Status.AMBIGUOUS, Miss.SHORT_TITLE)
@@ -157,10 +158,11 @@ def compare(a, b):
 
     # TODO: figshare versions, "xxx.v1"
     FIGSHARE_PREFIX = "10.6084"
-    if a.get("doi") and b.get("doi") and a.get("doi").startswith(FIGSHARE_PREFIX + "/") and b.get(
-            "doi").startswith(FIGSHARE_PREFIX + "/"):
-        a_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("doi"))
-        b_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("doi"))
+    if a.get("ext_ids", {}).get("doi") and b.get("ext_ids", {}).get("doi") and a.get(
+            "ext_ids", {}).get("doi").startswith(FIGSHARE_PREFIX + "/") and b.get(
+                "ext_ids", {}).get("doi").startswith(FIGSHARE_PREFIX + "/"):
+        a_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("ext_ids", {}).get("doi", ""))
+        b_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("ext_ids", {}).get("doi", ""))
         if a_doi_v_stripped == b_doi_v_stripped:
             return (Status.STRONG, OK.FIGSHARE_VERSION)
 
@@ -3542,4 +3544,3 @@ TITLE_BLACKLIST = set([
     "週刊ダイヤモンド = diamond weekly 69(1)",
     "週刊ダイヤモンド = diamond weekly 別冊",
 ])
-
author	Martin Czygan <martin.czygan@gmail.com>	2020-11-26 11:20:35 +0100
committer	Martin Czygan <martin.czygan@gmail.com>	2020-11-26 11:20:35 +0100
commit	47761357e341b7355c98778ec1665ae73e5d6fe3 (patch)
tree	14c0af2f9a6cc52bf8fd784d1ad8ab477c9eaef3 /fuzzycat
parent	67589a0bc9217f3259f0f093b5283b9e92828d0f (diff)
download	fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.tar.gz fuzzycat-47761357e341b7355c98778ec1665ae73e5d6fe3.zip