diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:54:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:54:37 -0700 |
commit | 05bd7cbcc62588e431c5efd533189e246b2a997e (patch) | |
tree | abcc707a451e77ea1e8c5ac9a5925b97a4bd139a /python/scripts/grobid_affiliations.py | |
parent | f3f424e42f2f4f383103cf80b30a00cfa6cfc179 (diff) | |
download | sandcrawler-05bd7cbcc62588e431c5efd533189e246b2a997e.tar.gz sandcrawler-05bd7cbcc62588e431c5efd533189e246b2a997e.zip |
make fmt
Diffstat (limited to 'python/scripts/grobid_affiliations.py')
-rwxr-xr-x | python/scripts/grobid_affiliations.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/scripts/grobid_affiliations.py b/python/scripts/grobid_affiliations.py index d391f60..b42153c 100755 --- a/python/scripts/grobid_affiliations.py +++ b/python/scripts/grobid_affiliations.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - """ Takes old (HBase) or new (pg) style JSON wrappers of GROBID XML extraction output, converts the XML to JSON, filters out raw affiliation strings, and @@ -24,10 +23,12 @@ def parse_hbase(line): tei_xml = obj['tei_xml'] return sha1hex, tei_xml + def parse_pg(line): obj = json.loads(line) return obj['sha1hex'], obj['tei_xml'] + def run(mode='hbase'): for line in sys.stdin: if mode == 'hbase': @@ -49,5 +50,6 @@ def run(mode='hbase'): affiliations = [json.loads(a) for a in affiliations] print('\t'.join([sha1hex, json.dumps(affiliations)])) -if __name__=='__main__': + +if __name__ == '__main__': run() |