aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/xml.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-06 18:32:35 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-06 18:32:35 -0800
commit175019c96fced3e21d0f60ea1a4a37da6b8872ac (patch)
treef42fbbe9c8ac06ae9eb06373ab9eec96d2b3a177 /python/sandcrawler/xml.py
parentb0b66c20c6ffb9d8acc626068964d7dfd5d3bcdc (diff)
parent47ca1a273912c8836630b0930b71a4e66fd2c85b (diff)
downloadsandcrawler-175019c96fced3e21d0f60ea1a4a37da6b8872ac.tar.gz
sandcrawler-175019c96fced3e21d0f60ea1a4a37da6b8872ac.zip
Merge branch 'bnewbold-html-ingest'
Diffstat (limited to 'python/sandcrawler/xml.py')
-rw-r--r--python/sandcrawler/xml.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/python/sandcrawler/xml.py b/python/sandcrawler/xml.py
new file mode 100644
index 0000000..7a0086d
--- /dev/null
+++ b/python/sandcrawler/xml.py
@@ -0,0 +1,7 @@
+
+import xml.etree.ElementTree as ET
+
+
+def xml_reserialize(raw: bytes) -> str:
+ root = ET.fromstring(raw)
+ return '<?xml version="1.0" encoding="UTF-8"?>\n' + ET.tostring(root, encoding="unicode")