diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-16 17:50:06 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-16 17:50:06 -0800 | 
| commit | a73b73c2944b3df2a62886c4e6b69c93f5e74222 (patch) | |
| tree | 3e5b13af8ba46b240f9ae53d5f522fb7ee02c219 /python/fatcat_tools/importers | |
| parent | b1b34d44ce1a416ee70be665b71b99ba9f98d9a3 (diff) | |
| download | fatcat-a73b73c2944b3df2a62886c4e6b69c93f5e74222.tar.gz fatcat-a73b73c2944b3df2a62886c4e6b69c93f5e74222.zip  | |
html ingest: actual xhtml mimetype
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 4 | 
1 files changed, 2 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 4dcb1ec3..4fbd19f1 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -392,7 +392,7 @@ class IngestWebResultImporter(IngestFileResultImporter):          if row['request'].get('ingest_type') != 'html':              self.counts['skip-ingest-type'] += 1              return False -        if row['file_meta'].get('mimetype') not in ("text/html", "application/html"): +        if row['file_meta'].get('mimetype') not in ("text/html", "application/xhtml+xml"):              self.counts['skip-mimetype'] += 1              return False @@ -407,7 +407,7 @@ class IngestWebResultImporter(IngestFileResultImporter):          if request.get('ingest_type') != "html":              self.counts['skip-ingest-type'] += 1              return None -        if file_meta['mimetype'] not in ("text/html", "application/html"): +        if file_meta['mimetype'] not in ("text/html", "application/xhtml+xml"):              self.counts['skip-mimetype'] += 1              return None  | 
