diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 22:40:14 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 22:40:14 -0800 |
commit | 653fac9632c6ae9dd036ad844454cf419cd5320b (patch) | |
tree | c09d8a3d8a2524a991f082ab500bce53d1986caa /python/sandcrawler/xml.py | |
parent | 9beafd7c5fc98571ec26b49d223ce660378d7b9e (diff) | |
download | sandcrawler-653fac9632c6ae9dd036ad844454cf419cd5320b.tar.gz sandcrawler-653fac9632c6ae9dd036ad844454cf419cd5320b.zip |
xml: re-encode XML docs into UTF-8 for persisting
Diffstat (limited to 'python/sandcrawler/xml.py')
-rw-r--r-- | python/sandcrawler/xml.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/python/sandcrawler/xml.py b/python/sandcrawler/xml.py new file mode 100644 index 0000000..7a0086d --- /dev/null +++ b/python/sandcrawler/xml.py @@ -0,0 +1,7 @@ + +import xml.etree.ElementTree as ET + + +def xml_reserialize(raw: bytes) -> str: + root = ET.fromstring(raw) + return '<?xml version="1.0" encoding="UTF-8"?>\n' + ET.tostring(root, encoding="unicode") |