aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/html_metadata.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/html_metadata.py')
-rw-r--r--python/sandcrawler/html_metadata.py12
1 files changed, 12 insertions, 0 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py
index 2fb500c..edaf89f 100644
--- a/python/sandcrawler/html_metadata.py
+++ b/python/sandcrawler/html_metadata.py
@@ -255,6 +255,12 @@ HTML_FULLTEXT_PATTERNS: List[Dict[str, str]] = [
"attr": "href",
"technique": "dovepress fulltext link",
},
+ {
+ "in_doc_url": "://doaj.org/article/",
+ "selector": "section.col-md-8 a[target='_blank'].button--primary",
+ "attr": "href",
+ "technique": "doaj.org access link",
+ },
]
COMPONENT_FULLTEXT_PATTERNS: List[Dict[str, str]] = [
@@ -627,6 +633,12 @@ PDF_FULLTEXT_PATTERNS: List[Dict[str, str]] = [
"attr": "href",
"technique": "PDF link (scitemed.com)",
},
+ {
+ "in_doc_url": "://doaj.org/article/",
+ "selector": "section.col-md-8 a[target='_blank'].button--primary",
+ "attr": "href",
+ "technique": "doaj.org access link",
+ },
]
FULLTEXT_URL_PATTERNS_SKIP: List[str] = [