aboutsummaryrefslogtreecommitdiffstats
path: root/extra/abbrev/parse_dls.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-09 12:33:46 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-09 12:33:46 +0200
commit521f4fdfa3db52043686fdce232f402b468362ff (patch)
tree1cb7e1c4673a69f74d3dd24992457014cb1b8309 /extra/abbrev/parse_dls.py
parentd2931da01a5c7d7254b5c7f4e4f8c1fa20513235 (diff)
downloadrefcat-521f4fdfa3db52043686fdce232f402b468362ff.tar.gz
refcat-521f4fdfa3db52043686fdce232f402b468362ff.zip
add abbreviation list, 117143 titles
Diffstat (limited to 'extra/abbrev/parse_dls.py')
-rw-r--r--extra/abbrev/parse_dls.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/extra/abbrev/parse_dls.py b/extra/abbrev/parse_dls.py
new file mode 100644
index 0000000..625ab16
--- /dev/null
+++ b/extra/abbrev/parse_dls.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+import fileinput
+import json
+
+# pup 'dl json{}' < A_abrvjt.html | jq -rc .[0].children[] | grep "children"
+
+# {"children":[{"tag":"b"}],"tag":"dt","text":"A + U-ARCHITECTURE AND URBANISM"}
+# {"children":[{"tag":"b","text":"A U-ARCHIT URBAN"}],"tag":"dd"}
+# {"children":[{"tag":"b"}],"tag":"dt","text":"A CRITICAL REVIEW: LASER TECHNOLOGIES FOR DEFENSE AND SECURITY"}
+# {"children":[{"tag":"b","text":"P SOC PHOTO-OPT INS"}],"tag":"dd"}
+# {"children":[{"tag":"b"}],"tag":"dt","text":"A KALEIDOSCOPIC VIEW OF NATURAL RESOURCES"}
+# {"children":[{"tag":"b"}],"tag":"dd"}
+# {"children":[{"tag":"b"}],"tag":"dt","text":"A MIDSUMMER NIGHT&#39;S DREAM"}
+# {"children":[{"tag":"b","text":"SHAKESPEARE SURV"}],"tag":"dd"}
+# {"children":[{"tag":"b"}],"tag":"dt","text":"A N A E-APPROCHE NEUROPSYCHOLOGIQUE DES APPRENTISSAGES CHEZ L ENFANT"}
+
+current, abbrevs = {}, []
+for i, line in enumerate(fileinput.input()):
+ line = line.strip()
+ doc = json.loads(line)
+ if doc.get("tag") == "dt":
+ if current:
+ abbrevs.append(current)
+ current = {}
+ current["name"] = doc.get("text")
+ else:
+ abbrev = doc["children"][0].get("text")
+ if abbrev:
+ current["abbrev"] = abbrev
+
+for abbrev in abbrevs:
+ print(json.dumps(abbrev))