diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 19:10:35 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 19:10:35 -0700 | 
| commit | a0e275a4bad46ef41585f0207d6dfa1e3c38bc35 (patch) | |
| tree | 92dead8a85e6ff38808beefada8a42693261ceff /python/scripts | |
| parent | 40adf5ed09d917b8a4b8f75680bbf90c147848b3 (diff) | |
| download | sandcrawler-a0e275a4bad46ef41585f0207d6dfa1e3c38bc35.tar.gz sandcrawler-a0e275a4bad46ef41585f0207d6dfa1e3c38bc35.zip | |
remove grobid2json helper file, replace with grobid_tei_xml
Diffstat (limited to 'python/scripts')
| -rwxr-xr-x | python/scripts/grobid_affiliations.py | 6 | 
1 files changed, 4 insertions, 2 deletions
| diff --git a/python/scripts/grobid_affiliations.py b/python/scripts/grobid_affiliations.py index b01e46a..90a0f77 100755 --- a/python/scripts/grobid_affiliations.py +++ b/python/scripts/grobid_affiliations.py @@ -12,7 +12,7 @@ Run in bulk like:  import json  import sys -from grobid2json import teixml2json +from grobid_tei_xml import parse_document_xml  def parse_hbase(line): @@ -38,7 +38,9 @@ def run(mode="hbase"):          else:              raise NotImplementedError("parse mode: {}".format(mode)) -        obj = teixml2json(tei_xml, encumbered=False) +        tei_doc = parse_document_xml(tei_xml) +        tei_doc.remove_encumbered() +        obj = tei_doc.to_legacy_dict()          affiliations = []          for author in obj["authors"]: | 
