aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-21 00:34:56 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-21 00:34:56 +0100
commite3a8b7e17b7a34d148aca4e3296afca81b845339 (patch)
tree6c1c271a1fd49268da9cd69c6595b2d179edba27 /fuzzycat
parenta5b9e0fee9f97f0205dfc96d35fc4b2cc823554b (diff)
downloadfuzzycat-e3a8b7e17b7a34d148aca4e3296afca81b845339.tar.gz
fuzzycat-e3a8b7e17b7a34d148aca4e3296afca81b845339.zip
wip: handle empty lists
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/verify.py15
1 files changed, 9 insertions, 6 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 609d617..5e41e1c 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -190,7 +190,7 @@ def compare(a, b):
# TODO: figshare versions, "xxx.v1"
FIGSHARE_PREFIX = "10.6084"
- if a.get("doi").startswith(FIGSHARE_PREFIX + "/") and b.get("doi").startswith(FIGSHARE_PREFIX +
+ if a.get("doi") and b.get("doi") and a.get("doi").startswith(FIGSHARE_PREFIX + "/") and b.get("doi").startswith(FIGSHARE_PREFIX +
"/"):
a_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("doi"))
b_doi_v_stripped = re.sub(r"[.]v[0-9]+$", "", a.get("doi"))
@@ -217,7 +217,7 @@ def compare(a, b):
continue
if rel.get("relatedIdentifierType") != "DOI":
continue
- doi = reg.get("relatedIdentifier")
+ doi = rel.get("relatedIdentifier")
if not doi:
continue
dois.add(doi)
@@ -345,10 +345,11 @@ def compare(a, b):
for _, g in itertools.groupby(scores, key=lambda s: s.a):
sorted_scores = sorted(g, key=lambda s: s.value, reverse=True)
if len(sorted_scores) > 0:
- top_scores.append(sorted_scores[0])
- avg_score = sum(top_scores) / len(top_scores)
- if avg_score > 0.5:
- return (Status.STRONG, OK.TOKENIZED_AUTHORS)
+ top_scores.append(sorted_scores[0].value)
+ if len(top_scores) > 0:
+ avg_score = sum(top_scores) / len(top_scores)
+ if avg_score > 0.5:
+ return (Status.STRONG, OK.TOKENIZED_AUTHORS)
# TODO: This misses spelling differences, e.g.
# https://fatcat.wiki/release/7nbcgsohrrak5cuyk6dnit6ega and
@@ -370,6 +371,8 @@ def jaccard(a, b):
"""
Jaccard of sets a and b.
"""
+ if len(a | b) == 0:
+ return 0
return len(a & b) / len(a | b)