aboutsummaryrefslogtreecommitdiffstats
path: root/mapreduce/xml2json.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-04-04 12:06:38 -0700
committerBryan Newbold <bnewbold@archive.org>2018-04-04 12:06:38 -0700
commit1dad0d9e54bfae93eebea47f8a3cb291cdd645c5 (patch)
tree97a8c9bcaf93734e2dbd8f431d37213520b55fbd /mapreduce/xml2json.py
parent427dd875958c8a6d2d791d55f9dda300ebdc853b (diff)
downloadsandcrawler-1dad0d9e54bfae93eebea47f8a3cb291cdd645c5.tar.gz
sandcrawler-1dad0d9e54bfae93eebea47f8a3cb291cdd645c5.zip
extraction -> mapreduce
Diffstat (limited to 'mapreduce/xml2json.py')
-rw-r--r--mapreduce/xml2json.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/mapreduce/xml2json.py b/mapreduce/xml2json.py
new file mode 100644
index 0000000..f956014
--- /dev/null
+++ b/mapreduce/xml2json.py
@@ -0,0 +1,8 @@
+
+import json
+import sys
+import xmltodict
+
+with open('tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml', 'rb') as f:
+ thing = xmltodict.parse(f, process_namespaces=False)
+ print(json.dumps(thing))