From 7f7b1bd0ff45bcb7968dc3da912c2ff3dd74d07c Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@archive.org>
Date: Mon, 11 Oct 2021 16:34:23 -0700
Subject: pdf ingest: journals.uchicago.edu pattern

---
 python/sandcrawler/html_metadata.py | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'python')

diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py
index 1a328ef..23bf136 100644
--- a/python/sandcrawler/html_metadata.py
+++ b/python/sandcrawler/html_metadata.py
@@ -583,6 +583,14 @@ PDF_FULLTEXT_PATTERNS: List[dict] = [
         "technique": "PDF Download link (e-manuscripta.ch)",
         "example_page": "https://www.e-manuscripta.ch/zut/doi/10.7891/e-manuscripta-112176",
     },
+    {
+        "in_doc_url": "journals.uchicago.edu",
+        "in_fulltext_url": "pdf",
+        "selector": "nav.article__navbar a.ctrl--pdf",
+        "attr": "href",
+        "technique": "PDF Download link (journals.uchicago.edu)",
+        "example_page": "https://www.journals.uchicago.edu/doi/10.14318/hau1.1.008",
+    },
 ]
 
 FULLTEXT_URL_PATTERNS_SKIP = [
-- 
cgit v1.2.3