aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/schema.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-27 18:24:19 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-27 18:25:58 -0700
commit560d5f7cc1672f95e2a953ab5908f4205151a703 (patch)
tree04b35084358786bbd2329491be07cde35a4d2289 /fatcat_scholar/schema.py
parent33211915773a0c77d064c55c1b02ceed6f455feb (diff)
downloadfatcat-scholar-560d5f7cc1672f95e2a953ab5908f4205151a703.tar.gz
fatcat-scholar-560d5f7cc1672f95e2a953ab5908f4205151a703.zip
refactor use of grobid_tei_xml
Diffstat (limited to 'fatcat_scholar/schema.py')
-rw-r--r--fatcat_scholar/schema.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index 6c9307d..b170f12 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional
import ftfy
from bs4 import BeautifulSoup
from fatcat_openapi_client import ReleaseContrib, ReleaseEntity
+from grobid_tei_xml import GrobidDocument
# pytype: disable=import-error
from pydantic import BaseModel
@@ -494,12 +495,12 @@ def contrib_affiliation(contrib: ReleaseContrib) -> Optional[str]:
return None
-def es_abstracts_from_grobid(tei_dict: dict) -> List[ScholarAbstract]:
+def es_abstracts_from_grobid(tei_doc: GrobidDocument) -> List[ScholarAbstract]:
- if tei_dict.get("abstract"):
- body = scrub_text(tei_dict["abstract"])
+ if tei_doc.abstract:
+ body = scrub_text(tei_doc.abstract)
if body:
- return [ScholarAbstract(lang_code=tei_dict.get("lang"), body=body)]
+ return [ScholarAbstract(lang_code=tei_doc.language_code, body=body)]
return []