diff options
-rw-r--r-- | python/sandcrawler/ingest_html.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest_html.py b/python/sandcrawler/ingest_html.py index 1c2c3fd..fb42e71 100644 --- a/python/sandcrawler/ingest_html.py +++ b/python/sandcrawler/ingest_html.py @@ -260,6 +260,8 @@ def html_guess_platform( in doc.html ): return "ojs" + if '<a href="https://www.pubpub.org">Published with' in doc.html: + return "pubpub" if 'Powered by <a target="_blank" href="http://arphahub.com">' in doc.html: return "arpha" if "<meta property='og:image' content='http://cms.galenos.com.tr' />" in doc.html: |