diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:24:19 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:25:58 -0700 |
commit | 560d5f7cc1672f95e2a953ab5908f4205151a703 (patch) | |
tree | 04b35084358786bbd2329491be07cde35a4d2289 /fatcat_scholar/schema.py | |
parent | 33211915773a0c77d064c55c1b02ceed6f455feb (diff) | |
download | fatcat-scholar-560d5f7cc1672f95e2a953ab5908f4205151a703.tar.gz fatcat-scholar-560d5f7cc1672f95e2a953ab5908f4205151a703.zip |
refactor use of grobid_tei_xml
Diffstat (limited to 'fatcat_scholar/schema.py')
-rw-r--r-- | fatcat_scholar/schema.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 6c9307d..b170f12 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional import ftfy from bs4 import BeautifulSoup from fatcat_openapi_client import ReleaseContrib, ReleaseEntity +from grobid_tei_xml import GrobidDocument # pytype: disable=import-error from pydantic import BaseModel @@ -494,12 +495,12 @@ def contrib_affiliation(contrib: ReleaseContrib) -> Optional[str]: return None -def es_abstracts_from_grobid(tei_dict: dict) -> List[ScholarAbstract]: +def es_abstracts_from_grobid(tei_doc: GrobidDocument) -> List[ScholarAbstract]: - if tei_dict.get("abstract"): - body = scrub_text(tei_dict["abstract"]) + if tei_doc.abstract: + body = scrub_text(tei_doc.abstract) if body: - return [ScholarAbstract(lang_code=tei_dict.get("lang"), body=body)] + return [ScholarAbstract(lang_code=tei_doc.language_code, body=body)] return [] |