summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-05-22 16:02:07 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-05-22 16:03:10 -0700
commit50023c3a6dbbac3da8cbf444ef5b5e47850394e0 (patch)
treef7d94ca6db9de142b75a19baff04188f776cfd2d /python/fatcat_tools
parent0e14ec400426162f4d2d6c4bcf624ab1515fb07f (diff)
downloadfatcat-50023c3a6dbbac3da8cbf444ef5b5e47850394e0.tar.gz
fatcat-50023c3a6dbbac3da8cbf444ef5b5e47850394e0.zip
ingest importer: don't use glutton matches
Until reviewing I didn't realize we were even doing this currently. Hopefluly has not impacted too many imports, as almost all ingests use an external identifer, so only those with identifers not in fatcat for whatever reason.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/ingest.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 4772bfaa..7d5211fc 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -19,6 +19,7 @@ class IngestFileResultImporter(EntityImporter):
editgroup_description=eg_desc,
editgroup_extra=eg_extra,
**kwargs)
+ self.use_glutton_match = False
self.default_link_rel = kwargs.get("default_link_rel", "web")
assert self.default_link_rel
self.require_grobid = require_grobid
@@ -109,7 +110,7 @@ class IngestFileResultImporter(EntityImporter):
continue
release_ident = release.ident
break
- if not release_ident and row.get('grobid'):
+ if self.use_glutton_match and not release_ident and row.get('grobid'):
# try biblio-glutton extracted hit
if row['grobid'].get('fatcat_release'):
release_ident = row['grobid']['fatcat_release'].split('_')[-1]
@@ -197,8 +198,7 @@ class IngestFileResultImporter(EntityImporter):
if not existing:
return True
- # the following checks all assume there is an existing item
-
+ # NOTE: the following checks all assume there is an existing item
if (fe.release_ids[0] in existing.release_ids) and existing.urls:
# TODO: could still, in theory update with the new URL?
self.counts['exists'] += 1