diff options
-rw-r--r-- | fuzzycat/common.py | 7 | ||||
-rw-r--r-- | fuzzycat/verify.py | 4 | ||||
-rw-r--r-- | tests/data/verify.csv | 4 |
3 files changed, 8 insertions, 7 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py index 142cb69..79e2b14 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -13,12 +13,13 @@ class Status(str, Enum): EXACT = 'exact' STRONG = 'strong' WEAK = 'weak' - TODO = 'todo' # maybe UNIMPLEMENTED + TODO = 'todo' # maybe UNIMPLEMENTED, TODO: change this after !MR92 class Reason(str, Enum): """ - Reason for assuming we have a match or miss. No hard rules on naming. + Reason for assuming we have a match or miss. No hard rules on naming, maybe + if a rule leans toward specific sources, you can use `CUSTOM_` as prefix. """ APPENDIX = 'appendix' ARXIV_VERSION = 'arxiv_version' @@ -42,7 +43,6 @@ class Reason(str, Enum): DATACITE_VERSION = 'datacite_version' DATASET_DOI = 'dataset_doi' DOI = 'doi' - DUMMY = 'dummy' FIGSHARE_VERSION = 'figshare_version' JACCARD_AUTHORS = 'jaccard_authors' JSTOR_ID = 'jstor_id' @@ -61,6 +61,7 @@ class Reason(str, Enum): TITLE_AUTHOR_MATCH = 'title_author_match' TITLE_FILENAME = 'title_filename' TOKENIZED_AUTHORS = 'tokenized_authors' + UNKNOWN = 'unknown' VERSIONED_DOI = 'versioned_doi' WORK_ID = 'work_id' YEAR = 'year' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 260dca5..c3c7179 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -42,7 +42,7 @@ Found Status Reason 2085006 Status.DIFFERENT Reason.CONTRIB_INTERSECTION_EMPTY 1397420 Status.DIFFERENT Reason.SHARED_DOI_PREFIX 1355852 Status.DIFFERENT Reason.RELEASE_TYPE -1290162 Status.AMBIGUOUS Reason.DUMMY +1290162 Status.AMBIGUOUS Reason.UNKNOWN 1145511 Status.DIFFERENT Reason.BOOK_CHAPTER 1009657 Status.DIFFERENT Reason.DATASET_DOI 996503 Status.STRONG Reason.PMID_DOI_PAIR @@ -588,4 +588,4 @@ def verify(a: Dict, b: Dict, min_title_length=5) -> Tuple[str, str]: except (ValueError, PathAccessError): pass - return Verify(Status.AMBIGUOUS, Reason.DUMMY) + return Verify(Status.AMBIGUOUS, Reason.UNKNOWN) diff --git a/tests/data/verify.csv b/tests/data/verify.csv index 4a02ef6..b1a0e7c 100644 --- a/tests/data/verify.csv +++ b/tests/data/verify.csv @@ -12,7 +12,7 @@ knwc764q25f33ib6qnwo7pyaui,n74tqiqi5jcx5d6vl5f7lpokaa,Status.DIFFERENT,YEAR eo4qptzoqrholjslj7nemlne2y,zisq3tsezjcejinlpf7qgk6z2i,Status.DIFFERENT,YEAR crsd5c2fhvd7hodbd4trne3lgi,4547ybo5hvf4xhlh5triaccxai,Status.DIFFERENT,YEAR egxon2iqljf47c4stvacnccvwy,swuxb5owx5g4hff3c7ur5x3awy,Status.DIFFERENT,YEAR -kob434ccgbhu3ecnwnqzsb6e3a,wbw3dpl44zew3bjcwfvqtk2b2q,Status.AMBIGUOUS,DUMMY +kob434ccgbhu3ecnwnqzsb6e3a,wbw3dpl44zew3bjcwfvqtk2b2q,Status.AMBIGUOUS,UNKNOWN c2pranaprjhrxk7x5euws32cg4,liarb7xuizewdafcubg2z3dwou,Status.DIFFERENT,CONTRIB_INTERSECTION_EMPTY tyokc7ccfjaw5nimkkl32dl6ta,gyyxomlfkzfannusvzoypbnel4,Status.AMBIGUOUS,BLACKLISTED 2wakwcyb2zhbla2aao3g6ajfli,dryvgf7v3jeergr3gendplglqq,Status.DIFFERENT,CONTRIB_INTERSECTION_EMPTY @@ -133,7 +133,7 @@ wm2p5fznwffknjx56lvmr7hn4q,idpgijvcsnbqrgs2dg36vzzdzm,Status.DIFFERENT,SHARED_DO mvvbim7kdffvtosuldtv5m3uy4,b7bbygyawzdsthai7j7rmztrxe,Status.DIFFERENT, 3f3yt32rrbdzdewtcyrcyihgju,hxmxijrd3fgodd3q3crgt7d3c4,Status.STRONG,JACCARD_AUTHORS dauh7n5w65enhk5zwdfwqxv344,jg72qhdvmncfdfxg5l47hw3uba,Status.DIFFERENT,COMPONENT -voruupqxhvggfex4zlczcmjxxu,jg72qhdvmncfdfxg5l47hw3uba,Status.AMBIGUOUS,DUMMY +voruupqxhvggfex4zlczcmjxxu,jg72qhdvmncfdfxg5l47hw3uba,Status.AMBIGUOUS,UNKNOWN 6ysfa7ncx5fldmvmwvjgpf2i6e,yp3rs3xb5ra2riyx5xayrlqfum,Status.EXACT,WORK_ID arqtphat7fashokettncepu7xe,v6p7xct6kfgwtdbh57zfjqmuua,Status.AMBIGUOUS, zwru5ugcsfcyzeuqlygfw46vwq,b3uhit7b4vhvliocdzwxr7peyy,Status.AMBIGUOUS,CUSTOM_PREFIX_10_7916 |