grobid2json: add namespace override option for some parsing

This is to make the reference parsing code re-usable with the simpler processCitations response, which is an XML fragment with no namespace. Should have no impact on existing code paths.
author: Bryan Newbold <bnewbold@archive.org> 2021-01-18 23:47:29 -0800
committer: Bryan Newbold <bnewbold@archive.org> 2021-01-19 19:49:04 -0800
commit: 4249da27c244406291133453bf209057e29aacef (patch)
tree: c96277d04efe9202df4c7dd641b3fc09cc3c94c2 /fatcat_scholar/grobid2json.py
parent: f18afbb3bc7036ac73591ac98a1fdc646955bb94 (diff)
download: fatcat-scholar-4249da27c244406291133453bf209057e29aacef.tar.gz
fatcat-scholar-4249da27c244406291133453bf209057e29aacef.zip
1 files changed, 3 insertions, 3 deletions
diff --git a/fatcat_scholar/grobid2json.py b/fatcat_scholar/grobid2json.py
index 4c8543a..e94bed2 100755
--- a/fatcat_scholar/grobid2json.py
+++ b/fatcat_scholar/grobid2json.py
@@ -35,7 +35,7 @@ xml_ns = "http://www.w3.org/XML/1998/namespace"
 ns = "http://www.tei-c.org/ns/1.0"
 
 
-def all_authors(elem: Optional[ET.Element]) -> List[Dict[str, Any]]:
+def all_authors(elem: Optional[ET.Element], ns: str = ns) -> List[Dict[str, Any]]:
     if not elem:
         return []
     names = []
@@ -96,7 +96,7 @@ def journal_info(elem: ET.Element) -> Dict[str, Any]:
     return journal
 
 
-def biblio_info(elem: ET.Element) -> Dict[str, Any]:
+def biblio_info(elem: ET.Element, ns: str = ns) -> Dict[str, Any]:
     ref: Dict[str, Any] = dict()
     ref["id"] = elem.attrib.get("{http://www.w3.org/XML/1998/namespace}id")
     ref["unstructured"] = elem.findtext('.//{%s}note[@type="raw_reference"]' % ns)
@@ -109,7 +109,7 @@ def biblio_info(elem: ET.Element) -> Dict[str, Any]:
         else:
             ref["journal"] = None
             ref["title"] = other_title
-    ref["authors"] = all_authors(elem)
+    ref["authors"] = all_authors(elem, ns=ns)
     ref["publisher"] = elem.findtext(".//{%s}publicationStmt/{%s}publisher" % (ns, ns))
     if not ref["publisher"]:
         ref["publisher"] = elem.findtext(".//{%s}imprint/{%s}publisher" % (ns, ns))
author	Bryan Newbold <bnewbold@archive.org>	2021-01-18 23:47:29 -0800
committer	Bryan Newbold <bnewbold@archive.org>	2021-01-19 19:49:04 -0800
commit	4249da27c244406291133453bf209057e29aacef (patch)
tree	c96277d04efe9202df4c7dd641b3fc09cc3c94c2 /fatcat_scholar/grobid2json.py
parent	f18afbb3bc7036ac73591ac98a1fdc646955bb94 (diff)
download	fatcat-scholar-4249da27c244406291133453bf209057e29aacef.tar.gz fatcat-scholar-4249da27c244406291133453bf209057e29aacef.zip