diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-17 16:27:33 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-17 16:27:33 +0100 |
commit | 39b35a1274ac7ebdea88d2913bef8d0357063ff3 (patch) | |
tree | 45da530b6e6d9c6c6cf7e694041941020afe8154 | |
parent | af3a185aefe726fe28ff923b8ccfc3bca329f242 (diff) | |
download | fuzzycat-39b35a1274ac7ebdea88d2913bef8d0357063ff3.tar.gz fuzzycat-39b35a1274ac7ebdea88d2913bef8d0357063ff3.zip |
update stats
-rw-r--r-- | fuzzycat/verify.py | 81 |
1 files changed, 40 insertions, 41 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 6b0a448..1cf8f3c 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -33,47 +33,46 @@ Stats from running over a full database dump. We need to run verification over 25586837 entity pairs, of which we 1346217/25586837 (or about 5%) are too ambiguous at this time. -TODO: rerun to adjust formatting. - - 3450874 OK.TITLE_AUTHOR_MATCH - 2619990 OK.SLUG_TITLE_AUTHOR_MATCH - 2487633 Miss.YEAR - 2434532 OK.WORK_ID - 2085006 Miss.CONTRIB_INTERSECTION_EMPTY - 1397420 Miss.SHARED_DOI_PREFIX - 1355852 Miss.RELEASE_TYPE - 1346217 OK.DUMMY - 1145511 Miss.BOOK_CHAPTER - 1009657 Miss.DATASET_DOI - 996503 OK.PMID_DOI_PAIR - 868951 OK.DATACITE_VERSION - 796216 OK.DATACITE_RELATED_ID - 704154 OK.FIGSHARE_VERSION - 534963 OK.VERSIONED_DOI - 343310 OK.TOKENIZED_AUTHORS - 334974 OK.JACCARD_AUTHORS - 293835 OK.PREPRINT_PUBLISHED - 269366 Miss.COMPONENT - 263626 Miss.SUBTITLE - 224021 Miss.SHORT_TITLE - 133811 Miss.CUSTOM_PREFIX_10_5860_CHOICE_REVIEW - 122600 Miss.CUSTOM_PREFIX_10_7916 - 96935 Miss.PAGE_COUNT - 79664 OK.CUSTOM_IEEE_ARXIV - 46649 Miss.CUSTOM_PREFIX_10_14288 - 39797 Miss.JSTOR_ID - 38598 OK.CUSTOM_BSI_UNDATED - 18907 OK.CUSTOM_BSI_SUBDOC - 15465 OK.DOI - 13393 Miss.CUSTOM_IOP_MA_PATTERN - 10378 Miss.CONTAINER - 3081 Miss.BLACKLISTED - 2504 Miss.BLACKLISTED_FRAGMENT - 1273 Miss.APPENDIX - 1063 Miss.TITLE_FILENAME - 104 Miss.NUM_DIFF - 4 OK.ARXIV_VERSION - +Found Status Reason +-------------------------------------------------------------------------- +3450874 Status.EXACT Reason.TITLE_AUTHOR_MATCH +2619990 Status.STRONG Reason.SLUG_TITLE_AUTHOR_MATCH +2487633 Status.DIFFERENT Reason.YEAR +2434532 Status.EXACT Reason.WORK_ID +2085006 Status.DIFFERENT Reason.CONTRIB_INTERSECTION_EMPTY +1397420 Status.DIFFERENT Reason.SHARED_DOI_PREFIX +1355852 Status.DIFFERENT Reason.RELEASE_TYPE +1290162 Status.AMBIGUOUS Reason.DUMMY +1145511 Status.DIFFERENT Reason.BOOK_CHAPTER +1009657 Status.DIFFERENT Reason.DATASET_DOI + 996503 Status.STRONG Reason.PMID_DOI_PAIR + 868951 Status.EXACT Reason.DATACITE_VERSION + 796216 Status.STRONG Reason.DATACITE_RELATED_ID + 704154 Status.STRONG Reason.FIGSHARE_VERSION + 534963 Status.STRONG Reason.VERSIONED_DOI + 343310 Status.STRONG Reason.TOKENIZED_AUTHORS + 334974 Status.STRONG Reason.JACCARD_AUTHORS + 293835 Status.STRONG Reason.PREPRINT_PUBLISHED + 269366 Status.DIFFERENT Reason.COMPONENT + 263626 Status.DIFFERENT Reason.SUBTITLE + 224021 Status.AMBIGUOUS Reason.SHORT_TITLE + 152990 Status.DIFFERENT Reason.PAGE_COUNT + 133811 Status.AMBIGUOUS Reason.CUSTOM_PREFIX_10_5860_CHOICE_REVIEW + 122600 Status.AMBIGUOUS Reason.CUSTOM_PREFIX_10_7916 + 79664 Status.STRONG Reason.CUSTOM_IEEE_ARXIV + 46649 Status.DIFFERENT Reason.CUSTOM_PREFIX_10_14288 + 39797 Status.DIFFERENT Reason.JSTOR_ID + 38598 Status.STRONG Reason.CUSTOM_BSI_UNDATED + 18907 Status.STRONG Reason.CUSTOM_BSI_SUBDOC + 15465 Status.EXACT Reason.DOI + 13393 Status.DIFFERENT Reason.CUSTOM_IOP_MA_PATTERN + 10378 Status.DIFFERENT Reason.CONTAINER + 3081 Status.AMBIGUOUS Reason.BLACKLISTED + 2504 Status.AMBIGUOUS Reason.BLACKLISTED_FRAGMENT + 1273 Status.AMBIGUOUS Reason.APPENDIX + 1063 Status.DIFFERENT Reason.TITLE_FILENAME + 104 Status.DIFFERENT Reason.NUM_DIFF + 4 Status.STRONG Reason.ARXIV_VERSION """ import collections |