diff options
Diffstat (limited to 'python/scripts/grobid_affiliations.py')
-rwxr-xr-x | python/scripts/grobid_affiliations.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/scripts/grobid_affiliations.py b/python/scripts/grobid_affiliations.py index d391f60..b42153c 100755 --- a/python/scripts/grobid_affiliations.py +++ b/python/scripts/grobid_affiliations.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - """ Takes old (HBase) or new (pg) style JSON wrappers of GROBID XML extraction output, converts the XML to JSON, filters out raw affiliation strings, and @@ -24,10 +23,12 @@ def parse_hbase(line): tei_xml = obj['tei_xml'] return sha1hex, tei_xml + def parse_pg(line): obj = json.loads(line) return obj['sha1hex'], obj['tei_xml'] + def run(mode='hbase'): for line in sys.stdin: if mode == 'hbase': @@ -49,5 +50,6 @@ def run(mode='hbase'): affiliations = [json.loads(a) for a in affiliations] print('\t'.join([sha1hex, json.dumps(affiliations)])) -if __name__=='__main__': + +if __name__ == '__main__': run() |