diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-11-15 16:49:21 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-11-15 16:51:55 -0800 |
commit | 4693394d69667570a81126ea727e9ad0ed8e1582 (patch) | |
tree | 9bd99a425bfc0b8cbc19617d045f26afac18402f /python/fatcat_tools | |
parent | 24010700fcf9717d2166cdef319d4916d87529fb (diff) | |
download | fatcat-4693394d69667570a81126ea727e9ad0ed8e1582.tar.gz fatcat-4693394d69667570a81126ea727e9ad0ed8e1582.zip |
project -> ingest_request_source
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/ingest.py | 12 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/ingest.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/workers/changelog.py | 2 |
3 files changed, 9 insertions, 9 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index dbb8149e..12e14e52 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -28,8 +28,8 @@ class IngestFileResultImporter(EntityImporter): print("Requiring GROBID status == 200") else: print("NOT checking GROBID success") - #self.project_whitelist = ['fatcat-changelog'] - self.project_whitelist = [] + self.ingest_request_source_whitelist = ['fatcat-changelog'] + #self.ingest_request_source_whitelist = [] def want(self, row): """ @@ -49,8 +49,8 @@ class IngestFileResultImporter(EntityImporter): if self.require_grobid and not row.get('grobid', {}).get('status_code') == 200: self.counts['skip-grobid'] += 1 return False - if self.project_whitelist and row.get('project') not in self.project_whitelist: - self.counts['skip-project'] += 1 + if self.ingest_request_source_whitelist and row.get('ingest_request_source') not in self.ingest_request_source_whitelist: + self.counts['skip-ingest_request_source'] += 1 return False if row.get('hit') == True and row.get('file_meta'): return True @@ -122,10 +122,10 @@ class IngestFileResultImporter(EntityImporter): ) if fatcat and fatcat.get('edit_extra'): fe.edit_extra = fatcat['edit_extra'] - if request.get('project'): + if request.get('ingest_request_source'): if not fe.edit_extra: fe.edit_extra = dict() - fe.edit_extra['project'] = request['project'] + fe.edit_extra['ingest_request_source'] = request['ingest_request_source'] return fe def try_update(self, fe): diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 83cc83be..c2ae6e0f 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -1,7 +1,7 @@ from .elasticsearch import release_to_elasticsearch -def release_ingest_request(release, oa_only=False, project='fatcat'): +def release_ingest_request(release, oa_only=False, ingest_request_source='fatcat'): """ Takes a full release entity object and returns an ingest request (as dict), or None if it seems like this release shouldn't be ingested. @@ -52,7 +52,7 @@ def release_ingest_request(release, oa_only=False, project='fatcat'): ingest_request = { 'ingest_type': 'file', - 'project': project, + 'ingest_request_source': ingest_request_source, 'base_url': url, 'fatcat': { 'release_stage': release.release_stage, diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index fe5c55be..e1a72217 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -222,7 +222,7 @@ class EntityUpdatesWorker(FatcatWorker): ) # filter to "new" active releases with no matched files if release.ident in new_release_ids: - ir = release_ingest_request(release, project='fatcat-changelog', oa_only=self.ingest_oa_only) + ir = release_ingest_request(release, ingest_request_source='fatcat-changelog', oa_only=self.ingest_oa_only) if ir and ir['ingest_type'] == 'file' and not release.files: producer.produce( self.ingest_file_request_topic, |