From 256130846a27d55009acc9ea337a0f53f337ffdd Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 17 Dec 2020 22:35:32 -0800 Subject: dblp: run fuzzy matching at try_update time (same as DOAJ) --- python/fatcat_tools/importers/dblp_release.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py index 9170dd2f..6cf7037b 100644 --- a/python/fatcat_tools/importers/dblp_release.py +++ b/python/fatcat_tools/importers/dblp_release.py @@ -329,7 +329,14 @@ class DblpReleaseImporter(EntityImporter): return False break - # TODO: in the future could do fuzzy match here, eg using elasticsearch + if not existing and self.do_fuzzy_match: + fuzzy_result = self.match_existing_release_fuzzy(re) + # TODO: in the future, could assign work_id for clustering, or for + # "EXACT" match, set existing and allow (optional) update code path + # to run + if fuzzy_result is not None: + self.counts["exists-fuzzy"] += 1 + return False # create entity if not existing: -- cgit v1.2.3