diff options
Diffstat (limited to 'python/scripts/grobid_affiliations.py')
-rwxr-xr-x | python/scripts/grobid_affiliations.py | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/python/scripts/grobid_affiliations.py b/python/scripts/grobid_affiliations.py index b42153c..b01e46a 100755 --- a/python/scripts/grobid_affiliations.py +++ b/python/scripts/grobid_affiliations.py @@ -16,40 +16,40 @@ from grobid2json import teixml2json def parse_hbase(line): - line = line.split('\t') + line = line.split("\t") assert len(line) == 2 sha1hex = line[0] obj = json.loads(line[1]) - tei_xml = obj['tei_xml'] + tei_xml = obj["tei_xml"] return sha1hex, tei_xml def parse_pg(line): obj = json.loads(line) - return obj['sha1hex'], obj['tei_xml'] + return obj["sha1hex"], obj["tei_xml"] -def run(mode='hbase'): +def run(mode="hbase"): for line in sys.stdin: - if mode == 'hbase': + if mode == "hbase": sha1hex, tei_xml = parse_hbase(line) - elif mode == 'pg': + elif mode == "pg": sha1hex, tei_xml = parse_pg(line) else: - raise NotImplementedError('parse mode: {}'.format(mode)) + raise NotImplementedError("parse mode: {}".format(mode)) obj = teixml2json(tei_xml, encumbered=False) affiliations = [] - for author in obj['authors']: - if author.get('affiliation'): - affiliations.append(author['affiliation']) + for author in obj["authors"]: + if author.get("affiliation"): + affiliations.append(author["affiliation"]) if affiliations: # don't duplicate affiliations; only the unique ones affiliations = list(set([json.dumps(a) for a in affiliations])) affiliations = [json.loads(a) for a in affiliations] - print('\t'.join([sha1hex, json.dumps(affiliations)])) + print("\t".join([sha1hex, json.dumps(affiliations)])) -if __name__ == '__main__': +if __name__ == "__main__": run() |