diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-01-22 19:44:20 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-01-22 19:44:20 -0800 |
commit | 2124a68f7659826d9fec80803af45e86e0374204 (patch) | |
tree | 6546b47342e0a528e9204729fc8e459bc6fad46a | |
parent | 2b5cfb73097f17f383457380e36bf977cd0da5ed (diff) | |
download | fatcat-scholar-2124a68f7659826d9fec80803af45e86e0374204.tar.gz fatcat-scholar-2124a68f7659826d9fec80803af45e86e0374204.zip |
bug fix: more html_fulltext not getting processed
-rw-r--r-- | fatcat_scholar/transform.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index ba79e50..ec0ed12 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -680,6 +680,7 @@ def run_transform(infile: Sequence) -> None: pdftotext_fulltext=obj.get("pdftotext_fulltext"), pdf_meta=obj.get("pdf_meta"), sim_fulltext=obj.get("sim_fulltext"), + html_fulltext=obj.get("html_fulltext"), ) es_doc = transform_heavy(heavy) if not es_doc: @@ -702,6 +703,7 @@ def run_refs(infile: Sequence) -> None: pdftotext_fulltext=obj.get("pdftotext_fulltext"), pdf_meta=obj.get("pdf_meta"), sim_fulltext=obj.get("sim_fulltext"), + html_fulltext=obj.get("html_fulltext"), ) refs = refs_from_heavy(heavy) for ref in refs: |