aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/utils.py2
-rw-r--r--fuzzycat/verify.py33
-rw-r--r--tests/data/release/ij3yuoh6lrh3tkrv5o7gfk6yyi22
-rw-r--r--tests/data/release/tur236mqljdfdnlzbbnks2sily108
-rw-r--r--tests/data/verify.csv5
-rw-r--r--tests/test_utils.py6
-rw-r--r--tests/test_verify.py3
8 files changed, 172 insertions, 8 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 3973b1e..2298185 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -25,6 +25,7 @@ class OK(str, Enum):
SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match'
TITLE_AUTHOR_MATCH = 'ok.title_author_match'
TOKENIZED_AUTHORS = 'ok.tokenized_authors'
+ CUSTOM_IEEE_ARXIV = 'ok.custom_ieee_arxiv'
class Miss(str, Enum):
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
index 4d1325d..d6beb03 100644
--- a/fuzzycat/utils.py
+++ b/fuzzycat/utils.py
@@ -13,7 +13,7 @@ def slugify_string(s: str) -> str:
"""
Keeps ascii chars and single whitespace only.
"""
- return ''.join((c for c in s.lower() if c in printable_no_punct))
+ return ' '.join(''.join((c for c in s.lower() if c in printable_no_punct)).split())
def cut(f: int = 0, sep: str = '\t', ignore_missing_column: bool = True):
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 81c97ff..84e17d8 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -240,7 +240,12 @@ def compare(a, b):
# Added "entry" via
# https://fatcat.wiki/release/xp3oxb7tqbgaxdzkzbchfkcjn4,
# https://fatcat.wiki/release/73pcaauzwbalvi7aqhv6vopxl4
- ignore_release_types = set(["article", "article-journal", "report", "paper-conference", "entry", "book"])
+ ignore_release_types = set([
+ "article",
+ "article-journal",
+ "report",
+ "paper-conference",
+ ])
if len(types & ignore_release_types) == 0:
return (Status.DIFFERENT, Miss.RELEASE_TYPE)
except PathAccessError:
@@ -270,6 +275,32 @@ def compare(a, b):
a_slug_title = slugify_string(a.get("title", "")).replace("\n", " ")
b_slug_title = slugify_string(b.get("title", "")).replace("\n", " ")
+ try:
+ if glom(a, "ext_ids.doi") == "10.1109/nssmic.2013.6829591":
+ print(a_slug_title)
+ print(b_slug_title)
+ except PathAccessError:
+ pass
+
+ if a_slug_title == b_slug_title:
+ # via: https://fatcat.wiki/release/ij3yuoh6lrh3tkrv5o7gfk6yyi
+ # https://fatcat.wiki/release/tur236mqljdfdnlzbbnks2sily
+ def ieee_arxiv_pair_check(a, b):
+ try:
+ print(a_slug_title, glom(a, "ext_ids.doi"))
+ if (glom(a, "ext_ids.doi").split("/")[0] == "10.1109"
+ and glom(b, "ext_ids.arxiv") != ""):
+ return (Status.STRONG, OK.CUSTOM_IEEE_ARXIV)
+ except PathAccessError:
+ pass
+
+ result = ieee_arxiv_pair_check(a, b)
+ if result:
+ return result
+ result = ieee_arxiv_pair_check(b, a)
+ if result:
+ return result
+
if a_slug_title == b_slug_title:
try:
a_subtitles = glom(a, "extra.subtitle") or []
diff --git a/tests/data/release/ij3yuoh6lrh3tkrv5o7gfk6yyi b/tests/data/release/ij3yuoh6lrh3tkrv5o7gfk6yyi
new file mode 100644
index 0000000..bfb5c44
--- /dev/null
+++ b/tests/data/release/ij3yuoh6lrh3tkrv5o7gfk6yyi
@@ -0,0 +1,22 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1109/nssmic.2013.6829591"
+ },
+ "extra": {
+ "container_name": "2013 IEEE Nuclear Science Symposium and Medical Imaging Conference (2013 NSS/MIC)",
+ "crossref": {
+ "type": "proceedings-article"
+ }
+ },
+ "ident": "ij3yuoh6lrh3tkrv5o7gfk6yyi",
+ "publisher": "IEEE",
+ "refs": [],
+ "release_type": "paper-conference",
+ "release_year": 2013,
+ "revision": "86796ef0-33f9-46d2-bd17-7ffc6350077a",
+ "state": "active",
+ "title": "A balloon-borne measurement of high latitude atmospheric neutrons using a licaf neutron detector",
+ "work_id": "kfq2747aj5c7dfoaic2rrmo3ey"
+}
diff --git a/tests/data/release/tur236mqljdfdnlzbbnks2sily b/tests/data/release/tur236mqljdfdnlzbbnks2sily
new file mode 100644
index 0000000..c7b2a76
--- /dev/null
+++ b/tests/data/release/tur236mqljdfdnlzbbnks2sily
@@ -0,0 +1,108 @@
+{
+ "abstracts": [
+ {
+ "content": "PoGOLino is a scintillator-based neutron detector. Its main purpose is to\nprovide data on the neutron flux in the upper stratosphere at high latitudes at\nthermal and nonthermal energies for the PoGOLite instrument. PoGOLite is a\nballoon borne hard X-ray polarimeter for which the main source of background\nstems from high energy neutrons. No measurements of the neutron environment for\nthe planned flight latitude and altitude exist. Furthermore this neutron\nenvironment changes with altitude, latitude and solar activity, three variables\nthat will vary throughout the PoGOLite flight. PoGOLino was developed to study\nthe neutron environment and the influences from these three variables upon it.\nPoGOLino consists of two Europium doped Lithium Calcium Aluminium Fluoride\n(Eu:LiCAF) scintillators, each of which is sandwiched between 2 Bismuth\nGermanium Oxide (BGO) scintillating crystals, which serve to veto signals\nproduced by gamma-rays and charged particles. This allows the neutron flux to\nbe measured even in high radiation environments. Measurements of neutrons in\ntwo separate energy bands are achieved by placing one LiCAF detector inside a\nmoderating polyethylene shield while the second detector remains unshielded.\nThe PoGOLino instrument was launched on March 20th 2013 from the Esrange Space\nCenter in Northern Sweden to an altitude of 30.9 km. A description of the\ndetector design and read-out system is presented. A detailed set of simulations\nof the atmospheric neutron environment performed using both PLANETOCOSMICS and\nGeant4 will also be described. The comparison of the neutron flux measured\nduring flight to predictions based on these simulations will be presented and\nthe consequences for the PoGOLite background will be discussed.",
+ "lang": "en",
+ "mimetype": "text/plain",
+ "sha1": "555bc72b30fde1b0ae40c2846987dbe6c4f71eab"
+ }
+ ],
+ "contribs": [
+ {
+ "index": 0,
+ "raw_name": "Merlin Kole",
+ "role": "author"
+ },
+ {
+ "index": 1,
+ "raw_name": "Yasushi Fukazawa",
+ "role": "author"
+ },
+ {
+ "index": 2,
+ "raw_name": "Kentaro Fukuda",
+ "role": "author"
+ },
+ {
+ "index": 3,
+ "raw_name": "Sumito Ishizu",
+ "role": "author"
+ },
+ {
+ "index": 4,
+ "raw_name": "Miranda\n Jackson",
+ "role": "author"
+ },
+ {
+ "index": 5,
+ "raw_name": "Tune Kamae",
+ "role": "author"
+ },
+ {
+ "index": 6,
+ "raw_name": "Noriaki Kawaguchi",
+ "role": "author"
+ },
+ {
+ "index": 7,
+ "raw_name": "Takafumi Kawano",
+ "role": "author"
+ },
+ {
+ "index": 8,
+ "raw_name": "Mózsi Kiss",
+ "role": "author"
+ },
+ {
+ "index": 9,
+ "raw_name": "Elena\n Moretti",
+ "role": "author"
+ },
+ {
+ "index": 10,
+ "raw_name": "Maria Fernanda Muñoz Salinas",
+ "role": "author"
+ },
+ {
+ "index": 11,
+ "raw_name": "Mark Pearce",
+ "role": "author"
+ },
+ {
+ "index": 12,
+ "raw_name": "Stefan Rydström,\n Hiromitsu Takahashi",
+ "role": "author"
+ },
+ {
+ "index": 13,
+ "raw_name": "Takayuki Yanagida",
+ "role": "author"
+ }
+ ],
+ "ext_ids": {
+ "arxiv": "1311.5531v1"
+ },
+ "extra": {
+ "arxiv": {
+ "base_id": "1311.5531",
+ "categories": [
+ "astro-ph.IM",
+ "physics.ins-det"
+ ],
+ "comments": "Presented at 2013 IEEE Nuclear Science Symposium, Seoul, Korea.\n October 27 - November 2, 2013"
+ }
+ },
+ "ident": "tur236mqljdfdnlzbbnks2sily",
+ "language": "en",
+ "license_slug": "ARXIV-1.0",
+ "refs": [],
+ "release_date": "2013-11-21",
+ "release_stage": "submitted",
+ "release_type": "article",
+ "release_year": 2013,
+ "revision": "f1d2d5b4-2dbe-476e-bb3b-65397d2d062d",
+ "state": "active",
+ "title": "A Balloon-borne Measurement of High Latitude Atmospheric Neutrons Using\n a LiCAF Neutron Detector",
+ "version": "v1",
+ "work_id": "phxon3cn6rbshkf3id6cfmdsza"
+}
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index 2f2bf9b..0c3dda2 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -21,7 +21,7 @@ cwfhdsdr6nbtngqwsqpafqj72u,icrvubkwprh6fl2irtrxziqqai,Status.STRONG,OK.ARXIV_VER
qlkjwemcrzcpjeeecduiunghui,chejpgnhebcx7of4d4dkuqhkne,Status.DIFFERENT,Miss.YEAR
no7a4vrfwnfp7jqrliq6n2hpxi,rscsor4cl5fydedr2jb6o7k4zi,Status.DIFFERENT,Miss.CONTRIB_INTERSECTION_EMPTY
mxfrtcc3njeh5dscwgzhrugzsq,x7lbkuc5afb75nz5l5kyrzy2ia,Status.DIFFERENT,Miss.YEAR
-cqkm3hyn3rgcng3d3alwtciwpq,unwrwze6znf5xouud35i3jlneq,Status.STRONG,OK.SLUG_TITLE_AUTHOR_MATCH
+cqkm3hyn3rgcng3d3alwtciwpq,unwrwze6znf5xouud35i3jlneq,Status.STRONG,OK.PREPRINT_PUBLISHED
fzs6y277zbgxnbcsmmfnftyqgy,b2ggrb2mpvh4namvf6mht5nnaq,Status.DIFFERENT,Miss.YEAR
qgvu7i5eqrakpcnantqikaxpbu,kafrljfrv5favpvbgxavobh46y,Status.AMBIGUOUS,Miss.SHORT_TITLE
qbfao6tzh5gkxaqaqwmidpme3q,whyzodcvtzehjdvj5ezvbkda34,Status.DIFFERENT,Miss.SUBTITLE
@@ -62,7 +62,7 @@ iywyis7npngxxbco6fgjrclrzy,anhsfjxg3few5nkfsvheehiebq,Status.DIFFERENT,Miss.BOOK
rk7mn5uaqjaslgcxc2nl6ijpaq,td3rnxzbxzeslj6ijoce3mtxcq,Status.STRONG,OK.ARXIV_VERSION
ohkfrjjcxfcavoqoqt52wi6eke,egufgu3yubgthex3y7fdt7uupa,Status.DIFFERENT,Miss.DATASET_DOI
dklwsz4w3rdlfddif4pcxb6ngm,wsbinmv7lragjnaedbgws6bztm,Status.AMBIGUOUS,OK.DUMMY
-jizydliu2vclvpdtcrajlvuq2m,3g6mdd3tvjabdaez6mwcycso3q,Status.STRONG,OK.SLUG_TITLE_AUTHOR_MATCH
+jizydliu2vclvpdtcrajlvuq2m,3g6mdd3tvjabdaez6mwcycso3q,Status.STRONG,OK.PREPRINT_PUBLISHED
fvrscdvsznb4zlhuadd6ar7ot4,57la45yryjd73gav22bnl4lyni,Status.STRONG,OK.FIGSHARE_VERSION
6fedywjyynbxhdqv3etxjuqhba,gls2x7ca4nhzrkf437gdnj6ekq,Status.DIFFERENT,Miss.YEAR
7lepq6lyyfepdjat6ohpeqycdu,cfm6qhhxovferl2fahf6jmcsiu,Status.DIFFERENT,Miss.YEAR
@@ -102,3 +102,4 @@ r63fa4mqkfb3leafypdwnnj7jq,usifdrmhdbfhlodgaqgzwzi4da,,
bg4gzikycnfvtkfwl5qnxeywwa,fbdg4rdgw5halgkhr4qcsex25y,Status.EXACT,
omjj75lv4rayvcqmgmicnzf5ye,xxfujnvafrazbjw7kvh7bhmuvy,,
63g4ukdxajcqhdytqla6du3t3u,rz72bzfevzeofdeb342c6z45qu,Status.DIFFERENT,Miss.CUSTOM_PREFIX_10_14288
+ij3yuoh6lrh3tkrv5o7gfk6yyi,tur236mqljdfdnlzbbnks2sily,Status.STRONG,
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9357fe8..a2033ac 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -9,9 +9,9 @@ def test_slugify_string():
assert slugify_string("X") == "x"
assert slugify_string("Xx") == "xx"
assert slugify_string("Xx x") == "xx x"
- assert slugify_string("Xx x x") == "xx x x"
- assert slugify_string("Xx?x x") == "xxx x"
- assert slugify_string("Xx? ?x x") == "xx x x"
+ assert slugify_string("Xx x x") == "xx x x"
+ assert slugify_string("Xx?x x") == "xxx x"
+ assert slugify_string("Xx? ?x x") == "xx x x"
assert slugify_string("Xx?_?x--x") == "xxxx"
assert slugify_string("=?++*") == ""
diff --git a/tests/test_verify.py b/tests/test_verify.py
index d740c06..a80dcda 100644
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@@ -50,5 +50,6 @@ def test_compare():
expected_status, status, a, b)
if expected_reason:
assert expected_reason.lower() == reason.lower(
- ), "reason [{} {}]: want {}, got {}".format(a, b, expected_reason, reason)
+ ), "reason [{base}release/{a} {base}release/{b}]: want {reason}, got {expected_reason}".format(
+ base=FATCAT_BASE_URL, a=a, b=b, expected_reason=expected_reason, reason=reason)
logger.info("ran verification over {} cases (https://git.io/JkDgS)".format(i))