diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-05-20 21:08:18 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-05-20 21:08:18 -0700 |
commit | 2783004d2a497fe67029fc77c31de6b7405c41a5 (patch) | |
tree | 8522f174fb44f6ac5d6f3906e55fa68bf61563fb | |
parent | 686a188d79a6f152fcd3195db5847c710fb1231d (diff) | |
download | fatcat-scholar-2783004d2a497fe67029fc77c31de6b7405c41a5.tar.gz fatcat-scholar-2783004d2a497fe67029fc77c31de6b7405c41a5.zip |
skip SIM items w/o page_numbers (instead of asserting)
-rw-r--r-- | fatcat_scholar/sim_pipeline.py | 4 | ||||
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 4 |
2 files changed, 6 insertions, 2 deletions
diff --git a/fatcat_scholar/sim_pipeline.py b/fatcat_scholar/sim_pipeline.py index 4315e70..c96681a 100644 --- a/fatcat_scholar/sim_pipeline.py +++ b/fatcat_scholar/sim_pipeline.py @@ -64,7 +64,9 @@ class SimPipeline(): leaf_index = dict() leaf_list = [] - assert 'page_numbers' in issue_meta + if not 'page_numbers' in issue_meta: + # TODO: warn + return None for entry in issue_meta['page_numbers'].get('pages', []): page_num = entry['pageNumber'] leaf_index[entry['leafNum']] = page_num diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 9ce72b1..5bfc1b6 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -166,7 +166,9 @@ class WorkPipeline(): leaf_index = dict() leaf_list = [] - assert 'page_numbers' in issue_meta + if not 'page_numbers' in issue_meta: + # TODO: warn + return None for entry in issue_meta['page_numbers'].get('pages', []): page_num = entry['pageNumber'] leaf_index[entry['leafNum']] = page_num |