aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat/grobid2json.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-14 15:40:15 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-14 15:40:15 -0700
commit567727e8606d2565098ddbcd63a1526aa44ff97f (patch)
tree7ac3c2cbe63c661ac377098db5177777dc7043ac /fuzzycat/grobid2json.py
parent0e184b9700f8a0ee21f5acbfc08437c8e3445ebf (diff)
downloadfuzzycat-567727e8606d2565098ddbcd63a1526aa44ff97f.tar.gz
fuzzycat-567727e8606d2565098ddbcd63a1526aa44ff97f.zip
GROBID API unstructured citation parsing utility code
Diffstat (limited to 'fuzzycat/grobid2json.py')
-rwxr-xr-xfuzzycat/grobid2json.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/fuzzycat/grobid2json.py b/fuzzycat/grobid2json.py
index 49f265a..c5aa0d2 100755
--- a/fuzzycat/grobid2json.py
+++ b/fuzzycat/grobid2json.py
@@ -26,7 +26,8 @@ def all_authors(elem: Optional[ET.Element], ns: str = ns) -> List[Dict[str, Any]
continue
given_name = pn.findtext("./{%s}forename" % ns) or None
surname = pn.findtext("./{%s}surname" % ns) or None
- full_name = " ".join(pn.itertext())
+ full_name = " ".join(pn.itertext()).strip()
+ full_name = " ".join(full_name.split())
obj: Dict[str, Any] = dict(name=full_name)
if given_name:
obj["given_name"] = given_name