diff options
Diffstat (limited to 'extra/abbrev/parse_dls.py')
-rw-r--r-- | extra/abbrev/parse_dls.py | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/extra/abbrev/parse_dls.py b/extra/abbrev/parse_dls.py new file mode 100644 index 0000000..625ab16 --- /dev/null +++ b/extra/abbrev/parse_dls.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +import fileinput +import json + +# pup 'dl json{}' < A_abrvjt.html | jq -rc .[0].children[] | grep "children" + +# {"children":[{"tag":"b"}],"tag":"dt","text":"A + U-ARCHITECTURE AND URBANISM"} +# {"children":[{"tag":"b","text":"A U-ARCHIT URBAN"}],"tag":"dd"} +# {"children":[{"tag":"b"}],"tag":"dt","text":"A CRITICAL REVIEW: LASER TECHNOLOGIES FOR DEFENSE AND SECURITY"} +# {"children":[{"tag":"b","text":"P SOC PHOTO-OPT INS"}],"tag":"dd"} +# {"children":[{"tag":"b"}],"tag":"dt","text":"A KALEIDOSCOPIC VIEW OF NATURAL RESOURCES"} +# {"children":[{"tag":"b"}],"tag":"dd"} +# {"children":[{"tag":"b"}],"tag":"dt","text":"A MIDSUMMER NIGHT'S DREAM"} +# {"children":[{"tag":"b","text":"SHAKESPEARE SURV"}],"tag":"dd"} +# {"children":[{"tag":"b"}],"tag":"dt","text":"A N A E-APPROCHE NEUROPSYCHOLOGIQUE DES APPRENTISSAGES CHEZ L ENFANT"} + +current, abbrevs = {}, [] +for i, line in enumerate(fileinput.input()): + line = line.strip() + doc = json.loads(line) + if doc.get("tag") == "dt": + if current: + abbrevs.append(current) + current = {} + current["name"] = doc.get("text") + else: + abbrev = doc["children"][0].get("text") + if abbrev: + current["abbrev"] = abbrev + +for abbrev in abbrevs: + print(json.dumps(abbrev)) |