aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-02-18 16:42:36 -0800
committerBryan Newbold <bnewbold@archive.org>2020-02-18 16:42:36 -0800
commit832a9e42bc068c1b1656526b4a2cb7108c9b8334 (patch)
tree6fb1e0a0d7403659667265175a7e9dbbf6a30ac2 /python
parentf613f69a40fcc9a445f21cadd35d7c36c8061db8 (diff)
downloadsandcrawler-832a9e42bc068c1b1656526b4a2cb7108c9b8334.tar.gz
sandcrawler-832a9e42bc068c1b1656526b4a2cb7108c9b8334.zip
include rel and oa_status in ingest request 'extra'
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/db.py2
-rw-r--r--python/sandcrawler/persist.py2
2 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/db.py b/python/sandcrawler/db.py
index ddb71a0..673912c 100644
--- a/python/sandcrawler/db.py
+++ b/python/sandcrawler/db.py
@@ -248,7 +248,7 @@ class SandcrawlerPostgresClient:
for r in batch:
# in case these fields were already packed into 'request'
extra = r.get('request', {})
- for k in ('ext_ids', 'fatcat_release', 'edit_extra'):
+ for k in ('ext_ids', 'fatcat_release', 'edit_extra', 'rel'):
if r.get(k):
extra[k] = r[k]
if extra:
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index bfd8247..3f2762a 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -110,7 +110,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
request['release_stage'] = raw['release_stage']
if raw.get('fatcat', {}).get('release_ident'):
request['request']['release_ident'] = raw['fatcat']['release_ident']
- for k in ('ext_ids', 'edit_extra'):
+ for k in ('ext_ids', 'edit_extra', 'rel'):
if raw.get(k):
request['request'][k] = raw[k]
# if this dict is empty, trim it to save DB space