summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/doaj_article.py
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2020-12-18 02:13:47 +0000
committerbnewbold <bnewbold@archive.org>2020-12-18 02:13:47 +0000
commit443243e8cccba3e779b7c56d0cdb6dcd992a3100 (patch)
treeb279887d9038daa19b72e53509658f016eaec452 /python/fatcat_tools/importers/doaj_article.py
parent7d90a0404e3ecb44c7d0ca93b2c32a0f66b8d88a (diff)
parent5eeb7a9d61beb8cb40fd89bd91fcd9dd820035aa (diff)
downloadfatcat-443243e8cccba3e779b7c56d0cdb6dcd992a3100.tar.gz
fatcat-443243e8cccba3e779b7c56d0cdb6dcd992a3100.zip
Merge branch 'bnewbold-doaj-fuzzy' into 'master'
DOAJ import fuzzy match filter See merge request webgroup/fatcat!92
Diffstat (limited to 'python/fatcat_tools/importers/doaj_article.py')
-rw-r--r--python/fatcat_tools/importers/doaj_article.py11
1 files changed, 9 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/doaj_article.py b/python/fatcat_tools/importers/doaj_article.py
index 03752484..191a65d8 100644
--- a/python/fatcat_tools/importers/doaj_article.py
+++ b/python/fatcat_tools/importers/doaj_article.py
@@ -217,9 +217,16 @@ class DoajArticleImporter(EntityImporter):
return False
break
- # TODO: in the future could do fuzzy match here, eg using elasticsearch
+ if not existing and self.do_fuzzy_match:
+ fuzzy_result = self.match_existing_release_fuzzy(re)
+ # TODO: in the future, could assign work_id for clustering, or for
+ # "EXACT" match, set existing and allow (optional) update code path
+ # to run
+ if fuzzy_result is not None:
+ self.counts["exists-fuzzy"] += 1
+ return False
- # create entity
+ # if no fuzzy existing match, create entity
if not existing:
return True