aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/common.py7
-rw-r--r--fuzzycat/verify.py4
-rw-r--r--tests/data/verify.csv4
3 files changed, 8 insertions, 7 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 142cb69..79e2b14 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -13,12 +13,13 @@ class Status(str, Enum):
EXACT = 'exact'
STRONG = 'strong'
WEAK = 'weak'
- TODO = 'todo' # maybe UNIMPLEMENTED
+ TODO = 'todo' # maybe UNIMPLEMENTED, TODO: change this after !MR92
class Reason(str, Enum):
"""
- Reason for assuming we have a match or miss. No hard rules on naming.
+ Reason for assuming we have a match or miss. No hard rules on naming, maybe
+ if a rule leans toward specific sources, you can use `CUSTOM_` as prefix.
"""
APPENDIX = 'appendix'
ARXIV_VERSION = 'arxiv_version'
@@ -42,7 +43,6 @@ class Reason(str, Enum):
DATACITE_VERSION = 'datacite_version'
DATASET_DOI = 'dataset_doi'
DOI = 'doi'
- DUMMY = 'dummy'
FIGSHARE_VERSION = 'figshare_version'
JACCARD_AUTHORS = 'jaccard_authors'
JSTOR_ID = 'jstor_id'
@@ -61,6 +61,7 @@ class Reason(str, Enum):
TITLE_AUTHOR_MATCH = 'title_author_match'
TITLE_FILENAME = 'title_filename'
TOKENIZED_AUTHORS = 'tokenized_authors'
+ UNKNOWN = 'unknown'
VERSIONED_DOI = 'versioned_doi'
WORK_ID = 'work_id'
YEAR = 'year'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 260dca5..c3c7179 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -42,7 +42,7 @@ Found Status Reason
2085006 Status.DIFFERENT Reason.CONTRIB_INTERSECTION_EMPTY
1397420 Status.DIFFERENT Reason.SHARED_DOI_PREFIX
1355852 Status.DIFFERENT Reason.RELEASE_TYPE
-1290162 Status.AMBIGUOUS Reason.DUMMY
+1290162 Status.AMBIGUOUS Reason.UNKNOWN
1145511 Status.DIFFERENT Reason.BOOK_CHAPTER
1009657 Status.DIFFERENT Reason.DATASET_DOI
996503 Status.STRONG Reason.PMID_DOI_PAIR
@@ -588,4 +588,4 @@ def verify(a: Dict, b: Dict, min_title_length=5) -> Tuple[str, str]:
except (ValueError, PathAccessError):
pass
- return Verify(Status.AMBIGUOUS, Reason.DUMMY)
+ return Verify(Status.AMBIGUOUS, Reason.UNKNOWN)
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index 4a02ef6..b1a0e7c 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -12,7 +12,7 @@ knwc764q25f33ib6qnwo7pyaui,n74tqiqi5jcx5d6vl5f7lpokaa,Status.DIFFERENT,YEAR
eo4qptzoqrholjslj7nemlne2y,zisq3tsezjcejinlpf7qgk6z2i,Status.DIFFERENT,YEAR
crsd5c2fhvd7hodbd4trne3lgi,4547ybo5hvf4xhlh5triaccxai,Status.DIFFERENT,YEAR
egxon2iqljf47c4stvacnccvwy,swuxb5owx5g4hff3c7ur5x3awy,Status.DIFFERENT,YEAR
-kob434ccgbhu3ecnwnqzsb6e3a,wbw3dpl44zew3bjcwfvqtk2b2q,Status.AMBIGUOUS,DUMMY
+kob434ccgbhu3ecnwnqzsb6e3a,wbw3dpl44zew3bjcwfvqtk2b2q,Status.AMBIGUOUS,UNKNOWN
c2pranaprjhrxk7x5euws32cg4,liarb7xuizewdafcubg2z3dwou,Status.DIFFERENT,CONTRIB_INTERSECTION_EMPTY
tyokc7ccfjaw5nimkkl32dl6ta,gyyxomlfkzfannusvzoypbnel4,Status.AMBIGUOUS,BLACKLISTED
2wakwcyb2zhbla2aao3g6ajfli,dryvgf7v3jeergr3gendplglqq,Status.DIFFERENT,CONTRIB_INTERSECTION_EMPTY
@@ -133,7 +133,7 @@ wm2p5fznwffknjx56lvmr7hn4q,idpgijvcsnbqrgs2dg36vzzdzm,Status.DIFFERENT,SHARED_DO
mvvbim7kdffvtosuldtv5m3uy4,b7bbygyawzdsthai7j7rmztrxe,Status.DIFFERENT,
3f3yt32rrbdzdewtcyrcyihgju,hxmxijrd3fgodd3q3crgt7d3c4,Status.STRONG,JACCARD_AUTHORS
dauh7n5w65enhk5zwdfwqxv344,jg72qhdvmncfdfxg5l47hw3uba,Status.DIFFERENT,COMPONENT
-voruupqxhvggfex4zlczcmjxxu,jg72qhdvmncfdfxg5l47hw3uba,Status.AMBIGUOUS,DUMMY
+voruupqxhvggfex4zlczcmjxxu,jg72qhdvmncfdfxg5l47hw3uba,Status.AMBIGUOUS,UNKNOWN
6ysfa7ncx5fldmvmwvjgpf2i6e,yp3rs3xb5ra2riyx5xayrlqfum,Status.EXACT,WORK_ID
arqtphat7fashokettncepu7xe,v6p7xct6kfgwtdbh57zfjqmuua,Status.AMBIGUOUS,
zwru5ugcsfcyzeuqlygfw46vwq,b3uhit7b4vhvliocdzwxr7peyy,Status.AMBIGUOUS,CUSTOM_PREFIX_10_7916