diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-02-05 23:10:26 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-02-05 23:10:26 -0800 |
commit | bac2d50d00211c6750705ded725c6f0bc6bacabf (patch) | |
tree | 62b1a146bc2ef0dfec2aaa1429ca639466fa3f79 | |
parent | 40b6702116fccd86f9cd3ddeb572c03d3a5977ea (diff) | |
download | sandcrawler-bac2d50d00211c6750705ded725c6f0bc6bacabf.tar.gz sandcrawler-bac2d50d00211c6750705ded725c6f0bc6bacabf.zip |
fix bug where ingest_request extra fields not persisted
-rw-r--r-- | python/sandcrawler/db.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/db.py b/python/sandcrawler/db.py index 3ec325e..53d159f 100644 --- a/python/sandcrawler/db.py +++ b/python/sandcrawler/db.py @@ -189,7 +189,8 @@ class SandcrawlerPostgresClient: raise NotImplementedError("on_conflict: {}".format(on_conflict)) sql += " RETURNING xmax;" for r in batch: - extra = dict() + # in case these fields were already packed into 'request' + extra = r.get('request', {}) for k in ('ext_ids', 'fatcat_release', 'edit_extra'): if r.get(k): extra[k] = r[k] |