diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/html_metadata.py | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py index 1a328ef..23bf136 100644 --- a/python/sandcrawler/html_metadata.py +++ b/python/sandcrawler/html_metadata.py @@ -583,6 +583,14 @@ PDF_FULLTEXT_PATTERNS: List[dict] = [ "technique": "PDF Download link (e-manuscripta.ch)", "example_page": "https://www.e-manuscripta.ch/zut/doi/10.7891/e-manuscripta-112176", }, + { + "in_doc_url": "journals.uchicago.edu", + "in_fulltext_url": "pdf", + "selector": "nav.article__navbar a.ctrl--pdf", + "attr": "href", + "technique": "PDF Download link (journals.uchicago.edu)", + "example_page": "https://www.journals.uchicago.edu/doi/10.14318/hau1.1.008", + }, ] FULLTEXT_URL_PATTERNS_SKIP = [ |