aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-01 23:09:00 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-01 23:09:00 +0100
commit8183e792ae122ae66b66299da1948697ae296ac7 (patch)
tree53d24af28488737e639ef3f05efa55f7dc07c1d7 /fuzzycat
parent27e6e0b07b091cd0dd3e66a01ebed846dc9e9f50 (diff)
downloadfuzzycat-8183e792ae122ae66b66299da1948697ae296ac7.tar.gz
fuzzycat-8183e792ae122ae66b66299da1948697ae296ac7.zip
add another case
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/utils.py2
-rw-r--r--fuzzycat/verify.py33
3 files changed, 34 insertions, 2 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 3973b1e..2298185 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -25,6 +25,7 @@ class OK(str, Enum):
SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match'
TITLE_AUTHOR_MATCH = 'ok.title_author_match'
TOKENIZED_AUTHORS = 'ok.tokenized_authors'
+ CUSTOM_IEEE_ARXIV = 'ok.custom_ieee_arxiv'
class Miss(str, Enum):
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
index 4d1325d..d6beb03 100644
--- a/fuzzycat/utils.py
+++ b/fuzzycat/utils.py
@@ -13,7 +13,7 @@ def slugify_string(s: str) -> str:
"""
Keeps ascii chars and single whitespace only.
"""
- return ''.join((c for c in s.lower() if c in printable_no_punct))
+ return ' '.join(''.join((c for c in s.lower() if c in printable_no_punct)).split())
def cut(f: int = 0, sep: str = '\t', ignore_missing_column: bool = True):
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 81c97ff..84e17d8 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -240,7 +240,12 @@ def compare(a, b):
# Added "entry" via
# https://fatcat.wiki/release/xp3oxb7tqbgaxdzkzbchfkcjn4,
# https://fatcat.wiki/release/73pcaauzwbalvi7aqhv6vopxl4
- ignore_release_types = set(["article", "article-journal", "report", "paper-conference", "entry", "book"])
+ ignore_release_types = set([
+ "article",
+ "article-journal",
+ "report",
+ "paper-conference",
+ ])
if len(types & ignore_release_types) == 0:
return (Status.DIFFERENT, Miss.RELEASE_TYPE)
except PathAccessError:
@@ -270,6 +275,32 @@ def compare(a, b):
a_slug_title = slugify_string(a.get("title", "")).replace("\n", " ")
b_slug_title = slugify_string(b.get("title", "")).replace("\n", " ")
+ try:
+ if glom(a, "ext_ids.doi") == "10.1109/nssmic.2013.6829591":
+ print(a_slug_title)
+ print(b_slug_title)
+ except PathAccessError:
+ pass
+
+ if a_slug_title == b_slug_title:
+ # via: https://fatcat.wiki/release/ij3yuoh6lrh3tkrv5o7gfk6yyi
+ # https://fatcat.wiki/release/tur236mqljdfdnlzbbnks2sily
+ def ieee_arxiv_pair_check(a, b):
+ try:
+ print(a_slug_title, glom(a, "ext_ids.doi"))
+ if (glom(a, "ext_ids.doi").split("/")[0] == "10.1109"
+ and glom(b, "ext_ids.arxiv") != ""):
+ return (Status.STRONG, OK.CUSTOM_IEEE_ARXIV)
+ except PathAccessError:
+ pass
+
+ result = ieee_arxiv_pair_check(a, b)
+ if result:
+ return result
+ result = ieee_arxiv_pair_check(b, a)
+ if result:
+ return result
+
if a_slug_title == b_slug_title:
try:
a_subtitles = glom(a, "extra.subtitle") or []