aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-10-24 10:19:17 -0700
committerBryan Newbold <bnewbold@archive.org>2022-10-24 10:19:17 -0700
commit97aa00038fbce39097d5f78b8891bbb88b71af75 (patch)
tree853d27c41da25be16df9b257c0667efa7ae0f0d6 /python
parent1657d4783a29eaee4adb610e0134a10b0126e202 (diff)
downloadsandcrawler-97aa00038fbce39097d5f78b8891bbb88b71af75.tar.gz
sandcrawler-97aa00038fbce39097d5f78b8891bbb88b71af75.zip
html: pubpub platform detection
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/ingest_html.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest_html.py b/python/sandcrawler/ingest_html.py
index 1c2c3fd..fb42e71 100644
--- a/python/sandcrawler/ingest_html.py
+++ b/python/sandcrawler/ingest_html.py
@@ -260,6 +260,8 @@ def html_guess_platform(
in doc.html
):
return "ojs"
+ if '<a href="https://www.pubpub.org">Published with' in doc.html:
+ return "pubpub"
if 'Powered by <a target="_blank" href="http://arphahub.com">' in doc.html:
return "arpha"
if "<meta property='og:image' content='http://cms.galenos.com.tr' />" in doc.html: