aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-11-15 16:49:21 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-11-15 16:51:55 -0800
commit4693394d69667570a81126ea727e9ad0ed8e1582 (patch)
tree9bd99a425bfc0b8cbc19617d045f26afac18402f /python/fatcat_tools
parent24010700fcf9717d2166cdef319d4916d87529fb (diff)
downloadfatcat-4693394d69667570a81126ea727e9ad0ed8e1582.tar.gz
fatcat-4693394d69667570a81126ea727e9ad0ed8e1582.zip
project -> ingest_request_source
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/ingest.py12
-rw-r--r--python/fatcat_tools/transforms/ingest.py4
-rw-r--r--python/fatcat_tools/workers/changelog.py2
3 files changed, 9 insertions, 9 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index dbb8149e..12e14e52 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -28,8 +28,8 @@ class IngestFileResultImporter(EntityImporter):
print("Requiring GROBID status == 200")
else:
print("NOT checking GROBID success")
- #self.project_whitelist = ['fatcat-changelog']
- self.project_whitelist = []
+ self.ingest_request_source_whitelist = ['fatcat-changelog']
+ #self.ingest_request_source_whitelist = []
def want(self, row):
"""
@@ -49,8 +49,8 @@ class IngestFileResultImporter(EntityImporter):
if self.require_grobid and not row.get('grobid', {}).get('status_code') == 200:
self.counts['skip-grobid'] += 1
return False
- if self.project_whitelist and row.get('project') not in self.project_whitelist:
- self.counts['skip-project'] += 1
+ if self.ingest_request_source_whitelist and row.get('ingest_request_source') not in self.ingest_request_source_whitelist:
+ self.counts['skip-ingest_request_source'] += 1
return False
if row.get('hit') == True and row.get('file_meta'):
return True
@@ -122,10 +122,10 @@ class IngestFileResultImporter(EntityImporter):
)
if fatcat and fatcat.get('edit_extra'):
fe.edit_extra = fatcat['edit_extra']
- if request.get('project'):
+ if request.get('ingest_request_source'):
if not fe.edit_extra:
fe.edit_extra = dict()
- fe.edit_extra['project'] = request['project']
+ fe.edit_extra['ingest_request_source'] = request['ingest_request_source']
return fe
def try_update(self, fe):
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py
index 83cc83be..c2ae6e0f 100644
--- a/python/fatcat_tools/transforms/ingest.py
+++ b/python/fatcat_tools/transforms/ingest.py
@@ -1,7 +1,7 @@
from .elasticsearch import release_to_elasticsearch
-def release_ingest_request(release, oa_only=False, project='fatcat'):
+def release_ingest_request(release, oa_only=False, ingest_request_source='fatcat'):
"""
Takes a full release entity object and returns an ingest request (as dict),
or None if it seems like this release shouldn't be ingested.
@@ -52,7 +52,7 @@ def release_ingest_request(release, oa_only=False, project='fatcat'):
ingest_request = {
'ingest_type': 'file',
- 'project': project,
+ 'ingest_request_source': ingest_request_source,
'base_url': url,
'fatcat': {
'release_stage': release.release_stage,
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index fe5c55be..e1a72217 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -222,7 +222,7 @@ class EntityUpdatesWorker(FatcatWorker):
)
# filter to "new" active releases with no matched files
if release.ident in new_release_ids:
- ir = release_ingest_request(release, project='fatcat-changelog', oa_only=self.ingest_oa_only)
+ ir = release_ingest_request(release, ingest_request_source='fatcat-changelog', oa_only=self.ingest_oa_only)
if ir and ir['ingest_type'] == 'file' and not release.files:
producer.produce(
self.ingest_file_request_topic,