aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-17 16:27:33 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-17 16:27:33 +0100
commit39b35a1274ac7ebdea88d2913bef8d0357063ff3 (patch)
tree45da530b6e6d9c6c6cf7e694041941020afe8154 /fuzzycat
parentaf3a185aefe726fe28ff923b8ccfc3bca329f242 (diff)
downloadfuzzycat-39b35a1274ac7ebdea88d2913bef8d0357063ff3.tar.gz
fuzzycat-39b35a1274ac7ebdea88d2913bef8d0357063ff3.zip
update stats
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/verify.py81
1 files changed, 40 insertions, 41 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 6b0a448..1cf8f3c 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -33,47 +33,46 @@ Stats from running over a full database dump. We need to run verification over
25586837 entity pairs, of which we 1346217/25586837 (or about 5%) are too
ambiguous at this time.
-TODO: rerun to adjust formatting.
-
- 3450874 OK.TITLE_AUTHOR_MATCH
- 2619990 OK.SLUG_TITLE_AUTHOR_MATCH
- 2487633 Miss.YEAR
- 2434532 OK.WORK_ID
- 2085006 Miss.CONTRIB_INTERSECTION_EMPTY
- 1397420 Miss.SHARED_DOI_PREFIX
- 1355852 Miss.RELEASE_TYPE
- 1346217 OK.DUMMY
- 1145511 Miss.BOOK_CHAPTER
- 1009657 Miss.DATASET_DOI
- 996503 OK.PMID_DOI_PAIR
- 868951 OK.DATACITE_VERSION
- 796216 OK.DATACITE_RELATED_ID
- 704154 OK.FIGSHARE_VERSION
- 534963 OK.VERSIONED_DOI
- 343310 OK.TOKENIZED_AUTHORS
- 334974 OK.JACCARD_AUTHORS
- 293835 OK.PREPRINT_PUBLISHED
- 269366 Miss.COMPONENT
- 263626 Miss.SUBTITLE
- 224021 Miss.SHORT_TITLE
- 133811 Miss.CUSTOM_PREFIX_10_5860_CHOICE_REVIEW
- 122600 Miss.CUSTOM_PREFIX_10_7916
- 96935 Miss.PAGE_COUNT
- 79664 OK.CUSTOM_IEEE_ARXIV
- 46649 Miss.CUSTOM_PREFIX_10_14288
- 39797 Miss.JSTOR_ID
- 38598 OK.CUSTOM_BSI_UNDATED
- 18907 OK.CUSTOM_BSI_SUBDOC
- 15465 OK.DOI
- 13393 Miss.CUSTOM_IOP_MA_PATTERN
- 10378 Miss.CONTAINER
- 3081 Miss.BLACKLISTED
- 2504 Miss.BLACKLISTED_FRAGMENT
- 1273 Miss.APPENDIX
- 1063 Miss.TITLE_FILENAME
- 104 Miss.NUM_DIFF
- 4 OK.ARXIV_VERSION
-
+Found Status Reason
+--------------------------------------------------------------------------
+3450874 Status.EXACT Reason.TITLE_AUTHOR_MATCH
+2619990 Status.STRONG Reason.SLUG_TITLE_AUTHOR_MATCH
+2487633 Status.DIFFERENT Reason.YEAR
+2434532 Status.EXACT Reason.WORK_ID
+2085006 Status.DIFFERENT Reason.CONTRIB_INTERSECTION_EMPTY
+1397420 Status.DIFFERENT Reason.SHARED_DOI_PREFIX
+1355852 Status.DIFFERENT Reason.RELEASE_TYPE
+1290162 Status.AMBIGUOUS Reason.DUMMY
+1145511 Status.DIFFERENT Reason.BOOK_CHAPTER
+1009657 Status.DIFFERENT Reason.DATASET_DOI
+ 996503 Status.STRONG Reason.PMID_DOI_PAIR
+ 868951 Status.EXACT Reason.DATACITE_VERSION
+ 796216 Status.STRONG Reason.DATACITE_RELATED_ID
+ 704154 Status.STRONG Reason.FIGSHARE_VERSION
+ 534963 Status.STRONG Reason.VERSIONED_DOI
+ 343310 Status.STRONG Reason.TOKENIZED_AUTHORS
+ 334974 Status.STRONG Reason.JACCARD_AUTHORS
+ 293835 Status.STRONG Reason.PREPRINT_PUBLISHED
+ 269366 Status.DIFFERENT Reason.COMPONENT
+ 263626 Status.DIFFERENT Reason.SUBTITLE
+ 224021 Status.AMBIGUOUS Reason.SHORT_TITLE
+ 152990 Status.DIFFERENT Reason.PAGE_COUNT
+ 133811 Status.AMBIGUOUS Reason.CUSTOM_PREFIX_10_5860_CHOICE_REVIEW
+ 122600 Status.AMBIGUOUS Reason.CUSTOM_PREFIX_10_7916
+ 79664 Status.STRONG Reason.CUSTOM_IEEE_ARXIV
+ 46649 Status.DIFFERENT Reason.CUSTOM_PREFIX_10_14288
+ 39797 Status.DIFFERENT Reason.JSTOR_ID
+ 38598 Status.STRONG Reason.CUSTOM_BSI_UNDATED
+ 18907 Status.STRONG Reason.CUSTOM_BSI_SUBDOC
+ 15465 Status.EXACT Reason.DOI
+ 13393 Status.DIFFERENT Reason.CUSTOM_IOP_MA_PATTERN
+ 10378 Status.DIFFERENT Reason.CONTAINER
+ 3081 Status.AMBIGUOUS Reason.BLACKLISTED
+ 2504 Status.AMBIGUOUS Reason.BLACKLISTED_FRAGMENT
+ 1273 Status.AMBIGUOUS Reason.APPENDIX
+ 1063 Status.DIFFERENT Reason.TITLE_FILENAME
+ 104 Status.DIFFERENT Reason.NUM_DIFF
+ 4 Status.STRONG Reason.ARXIV_VERSION
"""
import collections