diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-17 22:35:32 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-17 23:03:08 -0800 | 
| commit | 256130846a27d55009acc9ea337a0f53f337ffdd (patch) | |
| tree | 2c954162fcba1d3c458e0ec5ea496f99be279303 | |
| parent | b0e04db9a5b54e5e80e57ad02988b2550612ed23 (diff) | |
| download | fatcat-256130846a27d55009acc9ea337a0f53f337ffdd.tar.gz fatcat-256130846a27d55009acc9ea337a0f53f337ffdd.zip | |
dblp: run fuzzy matching at try_update time (same as DOAJ)
| -rw-r--r-- | python/fatcat_tools/importers/dblp_release.py | 9 | 
1 files changed, 8 insertions, 1 deletions
| diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py index 9170dd2f..6cf7037b 100644 --- a/python/fatcat_tools/importers/dblp_release.py +++ b/python/fatcat_tools/importers/dblp_release.py @@ -329,7 +329,14 @@ class DblpReleaseImporter(EntityImporter):                          return False                      break -        # TODO: in the future could do fuzzy match here, eg using elasticsearch +        if not existing and self.do_fuzzy_match: +            fuzzy_result = self.match_existing_release_fuzzy(re) +            # TODO: in the future, could assign work_id for clustering, or for +            # "EXACT" match, set existing and allow (optional) update code path +            # to run +            if fuzzy_result is not None: +                self.counts["exists-fuzzy"] += 1 +                return False          # create entity          if not existing: | 
