aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-02-05 23:14:51 -0800
committerBryan Newbold <bnewbold@archive.org>2020-02-05 23:14:51 -0800
commitd4233fe651043b6bb8175bb0d22fbab95b11fb70 (patch)
treeba0ef35ee9836acde6b08a08fb8c7b5d7e693e2c
parentbac2d50d00211c6750705ded725c6f0bc6bacabf (diff)
downloadsandcrawler-d4233fe651043b6bb8175bb0d22fbab95b11fb70.tar.gz
sandcrawler-d4233fe651043b6bb8175bb0d22fbab95b11fb70.zip
fix persist bug where ingest_request_source not saved
-rw-r--r--python/sandcrawler/persist.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index 9ef3e93..6469940 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -102,6 +102,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
'base_url': raw['base_url'],
'link_source': raw['link_source'],
'link_source_id': raw['link_source_id'],
+ 'ingest_request_source': raw.get('ingest_request_source'),
'request': {},
}
# extra/optional fields