aboutsummaryrefslogtreecommitdiffstats
path: root/extraction/xml2json.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-04-04 11:47:41 -0700
committerBryan Newbold <bnewbold@archive.org>2018-04-04 11:47:41 -0700
commite8eb959fbdd5d13cd53421ddf2487811d049c4e8 (patch)
treefbc8c052aac7d4eeb83da0a2d181fb585d2e4a8b /extraction/xml2json.py
parent7056c83d4a6bc107155eedb1b39f38dc6d290a39 (diff)
downloadsandcrawler-e8eb959fbdd5d13cd53421ddf2487811d049c4e8.tar.gz
sandcrawler-e8eb959fbdd5d13cd53421ddf2487811d049c4e8.zip
more WIP on extractor
Diffstat (limited to 'extraction/xml2json.py')
-rw-r--r--extraction/xml2json.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/extraction/xml2json.py b/extraction/xml2json.py
new file mode 100644
index 0000000..f956014
--- /dev/null
+++ b/extraction/xml2json.py
@@ -0,0 +1,8 @@
+
+import json
+import sys
+import xmltodict
+
+with open('tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml', 'rb') as f:
+ thing = xmltodict.parse(f, process_namespaces=False)
+ print(json.dumps(thing))