From 50023c3a6dbbac3da8cbf444ef5b5e47850394e0 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 22 May 2020 16:02:07 -0700 Subject: ingest importer: don't use glutton matches Until reviewing I didn't realize we were even doing this currently. Hopefluly has not impacted too many imports, as almost all ingests use an external identifer, so only those with identifers not in fatcat for whatever reason. --- python/fatcat_tools/importers/ingest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 4772bfaa..7d5211fc 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -19,6 +19,7 @@ class IngestFileResultImporter(EntityImporter): editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs) + self.use_glutton_match = False self.default_link_rel = kwargs.get("default_link_rel", "web") assert self.default_link_rel self.require_grobid = require_grobid @@ -109,7 +110,7 @@ class IngestFileResultImporter(EntityImporter): continue release_ident = release.ident break - if not release_ident and row.get('grobid'): + if self.use_glutton_match and not release_ident and row.get('grobid'): # try biblio-glutton extracted hit if row['grobid'].get('fatcat_release'): release_ident = row['grobid']['fatcat_release'].split('_')[-1] @@ -197,8 +198,7 @@ class IngestFileResultImporter(EntityImporter): if not existing: return True - # the following checks all assume there is an existing item - + # NOTE: the following checks all assume there is an existing item if (fe.release_ids[0] in existing.release_ids) and existing.urls: # TODO: could still, in theory update with the new URL? self.counts['exists'] += 1 -- cgit v1.2.3