diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 14:33:09 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:25:58 -0700 |
commit | b4d0bfb643842e6070a4ee36cfc52e2292e3b5ba (patch) | |
tree | 1988743c61f5c053148d66f6939ff32f398ae06e /fatcat_scholar/transform.py | |
parent | 956e64f47f7d47f2539cd6575c25ec0b6a33e567 (diff) | |
download | fatcat-scholar-b4d0bfb643842e6070a4ee36cfc52e2292e3b5ba.tar.gz fatcat-scholar-b4d0bfb643842e6070a4ee36cfc52e2292e3b5ba.zip |
make fmt (black 21.9b0)
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r-- | fatcat_scholar/transform.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 7264540..db631cf 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -246,13 +246,18 @@ def es_fulltext_from_pdftotext( if raw_text and len(raw_text) > MAX_BODY_CHARS: raw_text = raw_text[:MAX_BODY_CHARS] ret = ScholarFulltext( - lang_code=re.language, body=raw_text, acknowledgement=None, annex=None, + lang_code=re.language, + body=raw_text, + acknowledgement=None, + annex=None, ) return _add_file_release_meta(ret, pdf_meta, re, fe) def es_fulltext_from_html( - html_fulltext: Dict[str, Any], re: ReleaseEntity, wc: WebcaptureEntity, + html_fulltext: Dict[str, Any], + re: ReleaseEntity, + wc: WebcaptureEntity, ) -> Optional[ScholarFulltext]: if not wc.archive_urls or not html_fulltext.get("tei_xml"): @@ -546,7 +551,9 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: if f.ident == heavy.html_fulltext["webcapture_ident"] ][0] html_fulltext = es_fulltext_from_html( - heavy.html_fulltext, fulltext_release, fulltext_webcapture, + heavy.html_fulltext, + fulltext_release, + fulltext_webcapture, ) if exclude_web_fulltext and html_fulltext: fulltext = html_fulltext.remove_access() |