diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-16 17:50:06 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-16 17:50:06 -0800 |
commit | a73b73c2944b3df2a62886c4e6b69c93f5e74222 (patch) | |
tree | 3e5b13af8ba46b240f9ae53d5f522fb7ee02c219 /python/fatcat_tools/importers | |
parent | b1b34d44ce1a416ee70be665b71b99ba9f98d9a3 (diff) | |
download | fatcat-a73b73c2944b3df2a62886c4e6b69c93f5e74222.tar.gz fatcat-a73b73c2944b3df2a62886c4e6b69c93f5e74222.zip |
html ingest: actual xhtml mimetype
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/ingest.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 4dcb1ec3..4fbd19f1 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -392,7 +392,7 @@ class IngestWebResultImporter(IngestFileResultImporter): if row['request'].get('ingest_type') != 'html': self.counts['skip-ingest-type'] += 1 return False - if row['file_meta'].get('mimetype') not in ("text/html", "application/html"): + if row['file_meta'].get('mimetype') not in ("text/html", "application/xhtml+xml"): self.counts['skip-mimetype'] += 1 return False @@ -407,7 +407,7 @@ class IngestWebResultImporter(IngestFileResultImporter): if request.get('ingest_type') != "html": self.counts['skip-ingest-type'] += 1 return None - if file_meta['mimetype'] not in ("text/html", "application/html"): + if file_meta['mimetype'] not in ("text/html", "application/xhtml+xml"): self.counts['skip-mimetype'] += 1 return None |