1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
#!/usr/bin/env python
import fileinput
import json
# pup 'dl json{}' < A_abrvjt.html | jq -rc .[0].children[] | grep "children"
# {"children":[{"tag":"b"}],"tag":"dt","text":"A + U-ARCHITECTURE AND URBANISM"}
# {"children":[{"tag":"b","text":"A U-ARCHIT URBAN"}],"tag":"dd"}
# {"children":[{"tag":"b"}],"tag":"dt","text":"A CRITICAL REVIEW: LASER TECHNOLOGIES FOR DEFENSE AND SECURITY"}
# {"children":[{"tag":"b","text":"P SOC PHOTO-OPT INS"}],"tag":"dd"}
# {"children":[{"tag":"b"}],"tag":"dt","text":"A KALEIDOSCOPIC VIEW OF NATURAL RESOURCES"}
# {"children":[{"tag":"b"}],"tag":"dd"}
# {"children":[{"tag":"b"}],"tag":"dt","text":"A MIDSUMMER NIGHT'S DREAM"}
# {"children":[{"tag":"b","text":"SHAKESPEARE SURV"}],"tag":"dd"}
# {"children":[{"tag":"b"}],"tag":"dt","text":"A N A E-APPROCHE NEUROPSYCHOLOGIQUE DES APPRENTISSAGES CHEZ L ENFANT"}
current, abbrevs = {}, []
for i, line in enumerate(fileinput.input()):
line = line.strip()
doc = json.loads(line)
if doc.get("tag") == "dt":
if current:
abbrevs.append(current)
current = {}
current["name"] = doc.get("text")
else:
abbrev = doc["children"][0].get("text")
if abbrev:
current["abbrev"] = abbrev
for abbrev in abbrevs:
print(json.dumps(abbrev))
|