summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-11-16 17:50:06 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-11-16 17:50:06 -0800
commita73b73c2944b3df2a62886c4e6b69c93f5e74222 (patch)
tree3e5b13af8ba46b240f9ae53d5f522fb7ee02c219 /python/fatcat_tools/importers
parentb1b34d44ce1a416ee70be665b71b99ba9f98d9a3 (diff)
downloadfatcat-a73b73c2944b3df2a62886c4e6b69c93f5e74222.tar.gz
fatcat-a73b73c2944b3df2a62886c4e6b69c93f5e74222.zip
html ingest: actual xhtml mimetype
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/ingest.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 4dcb1ec3..4fbd19f1 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -392,7 +392,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
if row['request'].get('ingest_type') != 'html':
self.counts['skip-ingest-type'] += 1
return False
- if row['file_meta'].get('mimetype') not in ("text/html", "application/html"):
+ if row['file_meta'].get('mimetype') not in ("text/html", "application/xhtml+xml"):
self.counts['skip-mimetype'] += 1
return False
@@ -407,7 +407,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
if request.get('ingest_type') != "html":
self.counts['skip-ingest-type'] += 1
return None
- if file_meta['mimetype'] not in ("text/html", "application/html"):
+ if file_meta['mimetype'] not in ("text/html", "application/xhtml+xml"):
self.counts['skip-mimetype'] += 1
return None