aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-26 11:04:03 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-26 11:04:03 +0100
commit331e81b708328ad6bd6f19240db9fc40348a5b77 (patch)
treef13ad39a18edaa0ac59c87e9897cb50020bd8e1a /fuzzycat
parent232ab9ea69726ea22cff68323974fe038c0d5957 (diff)
downloadfuzzycat-331e81b708328ad6bd6f19240db9fc40348a5b77.tar.gz
fuzzycat-331e81b708328ad6bd6f19240db9fc40348a5b77.zip
add doi check
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/common.py25
-rw-r--r--fuzzycat/verify.py2
2 files changed, 15 insertions, 12 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 7daec7a..2f3d1fd 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -5,11 +5,11 @@ class Status(str, Enum):
"""
Match status.
"""
- EXACT = 'exact'
+ AMBIGUOUS = 'ambigiuous'
DIFFERENT = 'different'
+ EXACT = 'exact'
STRONG = 'strong'
WEAK = 'weak'
- AMBIGUOUS = 'ambigiuous'
class OK(str, Enum):
@@ -17,32 +17,33 @@ class OK(str, Enum):
Reason for assuming we have a match.
"""
ARXIV_VERSION = 'ok.arxiv_version'
- FIGSHARE_VERSION = 'ok.figshare_version'
+ DATACITE_RELATED_ID = 'ok.datacite_related_id'
+ DOI = 'ok.doi'
DUMMY = 'ok.dummy'
- TITLE_AUTHOR_MATCH = 'ok.title_author_match'
+ FIGSHARE_VERSION = 'ok.figshare_version'
PREPRINT_PUBLISHED = 'ok.preprint_published'
SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match'
+ TITLE_AUTHOR_MATCH = 'ok.title_author_match'
TOKENIZED_AUTHORS = 'ok.tokenized_authors'
- DATACITE_RELATED_ID = 'ok.datacite_related_id'
class Miss(str, Enum):
"""
Reasons indicating mismatch.
"""
+ APPENDIX = 'miss.appendix'
ARXIV_VERSION = 'miss.arxiv_version'
BLACKLISTED = 'miss.blacklisted'
BLACKLISTED_FRAGMENT = 'miss.blacklisted_fragment'
+ BOOK_CHAPTER = 'miss.book_chapter'
+ CHEM_FORMULA = 'miss.chem_formula'
+ COMPONENT = 'miss.component'
CONTRIB_INTERSECTION_EMPTY = 'miss.contrib_intersection_empty'
- SHORT_TITLE = 'miss.short_title'
- YEAR = 'miss.year'
CUSTOM_VHS = 'miss.vhs' # https://fatcat.wiki/release/44gk5ben5vghljq6twm7lwmxla
- NUM_DIFF = 'miss.num_diff'
DATASET_DOI = 'miss.dataset_doi'
+ NUM_DIFF = 'miss.num_diff'
RELEASE_TYPE = 'miss.release_type'
- CHEM_FORMULA = 'miss.chem_formula'
+ SHORT_TITLE = 'miss.short_title'
SUBTITLE = 'miss.subtitle'
- BOOK_CHAPTER = 'miss.book_chapter'
TITLE_FILENAME = 'miss.title_filename'
- COMPONENT = 'miss.component'
- APPENDIX = 'miss.appendix'
+ YEAR = 'miss.year'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 54cabe4..d111871 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -138,6 +138,8 @@ def compare(a, b):
"""
Compare two entities, return match status and reason.
"""
+ if a.get("doi") and b.get("doi") and a.get("doi") == b.get("doi"):
+ return (Status.EXACT, OK.DOI)
if len(a.get("title", "")) < 5:
return (Status.AMBIGUOUS, Miss.SHORT_TITLE)
if a.get("title", "").lower() in TITLE_BLACKLIST: