diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-04-14 15:40:15 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-04-14 15:40:15 -0700 |
commit | 567727e8606d2565098ddbcd63a1526aa44ff97f (patch) | |
tree | 7ac3c2cbe63c661ac377098db5177777dc7043ac /fuzzycat/grobid2json.py | |
parent | 0e184b9700f8a0ee21f5acbfc08437c8e3445ebf (diff) | |
download | fuzzycat-567727e8606d2565098ddbcd63a1526aa44ff97f.tar.gz fuzzycat-567727e8606d2565098ddbcd63a1526aa44ff97f.zip |
GROBID API unstructured citation parsing utility code
Diffstat (limited to 'fuzzycat/grobid2json.py')
-rwxr-xr-x | fuzzycat/grobid2json.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/fuzzycat/grobid2json.py b/fuzzycat/grobid2json.py index 49f265a..c5aa0d2 100755 --- a/fuzzycat/grobid2json.py +++ b/fuzzycat/grobid2json.py @@ -26,7 +26,8 @@ def all_authors(elem: Optional[ET.Element], ns: str = ns) -> List[Dict[str, Any] continue given_name = pn.findtext("./{%s}forename" % ns) or None surname = pn.findtext("./{%s}surname" % ns) or None - full_name = " ".join(pn.itertext()) + full_name = " ".join(pn.itertext()).strip() + full_name = " ".join(full_name.split()) obj: Dict[str, Any] = dict(name=full_name) if given_name: obj["given_name"] = given_name |