aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-02-05 23:10:26 -0800
committerBryan Newbold <bnewbold@archive.org>2020-02-05 23:10:26 -0800
commitbac2d50d00211c6750705ded725c6f0bc6bacabf (patch)
tree62b1a146bc2ef0dfec2aaa1429ca639466fa3f79
parent40b6702116fccd86f9cd3ddeb572c03d3a5977ea (diff)
downloadsandcrawler-bac2d50d00211c6750705ded725c6f0bc6bacabf.tar.gz
sandcrawler-bac2d50d00211c6750705ded725c6f0bc6bacabf.zip
fix bug where ingest_request extra fields not persisted
-rw-r--r--python/sandcrawler/db.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/db.py b/python/sandcrawler/db.py
index 3ec325e..53d159f 100644
--- a/python/sandcrawler/db.py
+++ b/python/sandcrawler/db.py
@@ -189,7 +189,8 @@ class SandcrawlerPostgresClient:
raise NotImplementedError("on_conflict: {}".format(on_conflict))
sql += " RETURNING xmax;"
for r in batch:
- extra = dict()
+ # in case these fields were already packed into 'request'
+ extra = r.get('request', {})
for k in ('ext_ids', 'fatcat_release', 'edit_extra'):
if r.get(k):
extra[k] = r[k]