diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-11-15 16:49:21 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-11-15 16:51:55 -0800 | 
| commit | 4693394d69667570a81126ea727e9ad0ed8e1582 (patch) | |
| tree | 9bd99a425bfc0b8cbc19617d045f26afac18402f | |
| parent | 24010700fcf9717d2166cdef319d4916d87529fb (diff) | |
| download | fatcat-4693394d69667570a81126ea727e9ad0ed8e1582.tar.gz fatcat-4693394d69667570a81126ea727e9ad0ed8e1582.zip | |
project -> ingest_request_source
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 12 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms/ingest.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/workers/changelog.py | 2 | 
3 files changed, 9 insertions, 9 deletions
| diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index dbb8149e..12e14e52 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -28,8 +28,8 @@ class IngestFileResultImporter(EntityImporter):              print("Requiring GROBID status == 200")          else:              print("NOT checking GROBID success") -        #self.project_whitelist = ['fatcat-changelog'] -        self.project_whitelist = [] +        self.ingest_request_source_whitelist = ['fatcat-changelog'] +        #self.ingest_request_source_whitelist = []      def want(self, row):          """ @@ -49,8 +49,8 @@ class IngestFileResultImporter(EntityImporter):          if self.require_grobid and not row.get('grobid', {}).get('status_code') == 200:              self.counts['skip-grobid'] += 1              return False -        if self.project_whitelist and row.get('project') not in self.project_whitelist: -            self.counts['skip-project'] += 1 +        if self.ingest_request_source_whitelist and row.get('ingest_request_source') not in self.ingest_request_source_whitelist: +            self.counts['skip-ingest_request_source'] += 1              return False          if row.get('hit') == True and row.get('file_meta'):              return True @@ -122,10 +122,10 @@ class IngestFileResultImporter(EntityImporter):          )          if fatcat and fatcat.get('edit_extra'):              fe.edit_extra = fatcat['edit_extra'] -        if request.get('project'): +        if request.get('ingest_request_source'):              if not fe.edit_extra:                  fe.edit_extra = dict() -            fe.edit_extra['project'] = request['project'] +            fe.edit_extra['ingest_request_source'] = request['ingest_request_source']          return fe      def try_update(self, fe): diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 83cc83be..c2ae6e0f 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -1,7 +1,7 @@  from .elasticsearch import release_to_elasticsearch -def release_ingest_request(release, oa_only=False, project='fatcat'): +def release_ingest_request(release, oa_only=False, ingest_request_source='fatcat'):      """      Takes a full release entity object and returns an ingest request (as dict),      or None if it seems like this release shouldn't be ingested. @@ -52,7 +52,7 @@ def release_ingest_request(release, oa_only=False, project='fatcat'):      ingest_request = {          'ingest_type': 'file', -        'project': project, +        'ingest_request_source': ingest_request_source,          'base_url': url,          'fatcat': {              'release_stage': release.release_stage, diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index fe5c55be..e1a72217 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -222,7 +222,7 @@ class EntityUpdatesWorker(FatcatWorker):                  )                  # filter to "new" active releases with no matched files                  if release.ident in new_release_ids: -                    ir = release_ingest_request(release, project='fatcat-changelog', oa_only=self.ingest_oa_only) +                    ir = release_ingest_request(release, ingest_request_source='fatcat-changelog', oa_only=self.ingest_oa_only)                      if ir and ir['ingest_type'] == 'file' and not release.files:                          producer.produce(                              self.ingest_file_request_topic, | 
