aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts/grobid_affiliations.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/scripts/grobid_affiliations.py')
-rwxr-xr-xpython/scripts/grobid_affiliations.py24
1 files changed, 12 insertions, 12 deletions
diff --git a/python/scripts/grobid_affiliations.py b/python/scripts/grobid_affiliations.py
index b42153c..b01e46a 100755
--- a/python/scripts/grobid_affiliations.py
+++ b/python/scripts/grobid_affiliations.py
@@ -16,40 +16,40 @@ from grobid2json import teixml2json
def parse_hbase(line):
- line = line.split('\t')
+ line = line.split("\t")
assert len(line) == 2
sha1hex = line[0]
obj = json.loads(line[1])
- tei_xml = obj['tei_xml']
+ tei_xml = obj["tei_xml"]
return sha1hex, tei_xml
def parse_pg(line):
obj = json.loads(line)
- return obj['sha1hex'], obj['tei_xml']
+ return obj["sha1hex"], obj["tei_xml"]
-def run(mode='hbase'):
+def run(mode="hbase"):
for line in sys.stdin:
- if mode == 'hbase':
+ if mode == "hbase":
sha1hex, tei_xml = parse_hbase(line)
- elif mode == 'pg':
+ elif mode == "pg":
sha1hex, tei_xml = parse_pg(line)
else:
- raise NotImplementedError('parse mode: {}'.format(mode))
+ raise NotImplementedError("parse mode: {}".format(mode))
obj = teixml2json(tei_xml, encumbered=False)
affiliations = []
- for author in obj['authors']:
- if author.get('affiliation'):
- affiliations.append(author['affiliation'])
+ for author in obj["authors"]:
+ if author.get("affiliation"):
+ affiliations.append(author["affiliation"])
if affiliations:
# don't duplicate affiliations; only the unique ones
affiliations = list(set([json.dumps(a) for a in affiliations]))
affiliations = [json.loads(a) for a in affiliations]
- print('\t'.join([sha1hex, json.dumps(affiliations)]))
+ print("\t".join([sha1hex, json.dumps(affiliations)]))
-if __name__ == '__main__':
+if __name__ == "__main__":
run()