aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/cluster.py4
-rw-r--r--fuzzycat/verify.py27
2 files changed, 16 insertions, 15 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py
index 28d0417..723d0fa 100644
--- a/fuzzycat/cluster.py
+++ b/fuzzycat/cluster.py
@@ -22,8 +22,8 @@ Ideas:
* merge-join
```
-$ fuzzycat.main keygen -s "algo" < ours | sort -k1,1 > a.tsv
-$ fuzzycat.main keygen -s "algo" < other | sort -k1,1 > b.tsv
+$ python -m fuzzycat keygen -s "algo" < ours | sort -k1,1 > a.tsv
+$ python -m fuzzycat keygen -s "algo" < other | sort -k1,1 > b.tsv
$ merge-join a.tsv b.tsv
```
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index c322f39..fc298c1 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -28,22 +28,23 @@ WIPv1 (10m)
{
"miss.appendix": 176,
"miss.blacklisted": 12124,
- "miss.blacklisted_fragment": 6,
+ "miss.blacklisted_fragment": 9,
"miss.book_chapter": 46733,
- "miss.component": 1567,
- "miss.contrib_intersection_empty": 58823,
+ "miss.component": 2173,
+ "miss.contrib_intersection_empty": 73592,
"miss.dataset_doi": 30806,
"miss.num_diff": 1,
- "miss.release_type": 95950,
- "miss.short_title": 16270,
- "miss.subtitle": 6013,
- "miss.title_filename": 59,
- "miss.year": 105946,
+ "miss.release_type": 19767,
+ "miss.short_title": 16737,
+ "miss.subtitle": 11975,
+ "miss.title_filename": 87,
+ "miss.year": 123288,
"ok.arxiv_version": 90726,
- "ok.dummy": 88807,
- "ok.preprint_published": 8762,
- "ok.slug_title_author_match": 41114,
- "ok.title_author_match": 61564,
+ "ok.dummy": 106196,
+ "ok.preprint_published": 10495,
+ "ok.slug_title_author_match": 47285,
+ "ok.title_author_match": 65685,
+ "ok.tokenized_authors": 7592,
"skip.container_name_blacklist": 20,
"skip.publisher_blacklist": 456,
"skip.too_large": 7430,
@@ -170,7 +171,7 @@ class GroupVerifier:
def compare(a, b):
"""
- Compare two entities, return match status.
+ Compare two entities, return match status and reason.
"""
if len(a.get("title", "")) < 5:
return (Status.AMBIGUOUS, Miss.SHORT_TITLE)