aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-17 22:35:32 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-17 23:03:08 -0800
commit256130846a27d55009acc9ea337a0f53f337ffdd (patch)
tree2c954162fcba1d3c458e0ec5ea496f99be279303 /python
parentb0e04db9a5b54e5e80e57ad02988b2550612ed23 (diff)
downloadfatcat-256130846a27d55009acc9ea337a0f53f337ffdd.tar.gz
fatcat-256130846a27d55009acc9ea337a0f53f337ffdd.zip
dblp: run fuzzy matching at try_update time (same as DOAJ)
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/importers/dblp_release.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py
index 9170dd2f..6cf7037b 100644
--- a/python/fatcat_tools/importers/dblp_release.py
+++ b/python/fatcat_tools/importers/dblp_release.py
@@ -329,7 +329,14 @@ class DblpReleaseImporter(EntityImporter):
return False
break
- # TODO: in the future could do fuzzy match here, eg using elasticsearch
+ if not existing and self.do_fuzzy_match:
+ fuzzy_result = self.match_existing_release_fuzzy(re)
+ # TODO: in the future, could assign work_id for clustering, or for
+ # "EXACT" match, set existing and allow (optional) update code path
+ # to run
+ if fuzzy_result is not None:
+ self.counts["exists-fuzzy"] += 1
+ return False
# create entity
if not existing: