summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/common.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-22 11:13:24 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-22 11:13:24 -0700
commitc0faa77cce85ec8ade96927c9ce2ff5dd166aff6 (patch)
tree91bcdc1c36233ab2228293f28a950c9d86d275cc /python/fatcat_tools/importers/common.py
parent374dd6aac627468a17b9a0b09051845e0aaebacc (diff)
downloadfatcat-c0faa77cce85ec8ade96927c9ce2ff5dd166aff6.tar.gz
fatcat-c0faa77cce85ec8ade96927c9ce2ff5dd166aff6.zip
bs4 XML parse cleanup
Diffstat (limited to 'python/fatcat_tools/importers/common.py')
-rw-r--r--python/fatcat_tools/importers/common.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index c0742914..4d7b29fb 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -583,6 +583,7 @@ class Bs4XmlLinesPusher(RecordPusher):
continue
soup = BeautifulSoup(line, "xml")
self.importer.push_record(soup)
+ soup.decompose()
counts = self.importer.finish()
print(counts)
return counts
@@ -600,6 +601,7 @@ class Bs4XmlFilePusher(RecordPusher):
for record in soup.find_all(self.record_tag):
self.importer.push_record(record)
counts = self.importer.finish()
+ soup.decompose()
print(counts)
return counts