From 97aa00038fbce39097d5f78b8891bbb88b71af75 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 24 Oct 2022 10:19:17 -0700 Subject: html: pubpub platform detection --- python/sandcrawler/ingest_html.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/ingest_html.py b/python/sandcrawler/ingest_html.py index 1c2c3fd..fb42e71 100644 --- a/python/sandcrawler/ingest_html.py +++ b/python/sandcrawler/ingest_html.py @@ -260,6 +260,8 @@ def html_guess_platform( in doc.html ): return "ojs" + if 'Published with' in doc.html: + return "pubpub" if 'Powered by ' in doc.html: return "arpha" if "" in doc.html: -- cgit v1.2.3