aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-11-30 14:22:15 -0800
committerBryan Newbold <bnewbold@archive.org>2021-11-30 14:22:15 -0800
commit2cf76354dabacdb54e4101cc5df13c6b9ccade70 (patch)
treed4dbb60ce4baf46edb06efe5edd7413f3fee27e9
parent55539e94edc9a49fca1dafdd9468966abd33fe10 (diff)
downloadchocula-2cf76354dabacdb54e4101cc5df13c6b9ccade70.tar.gz
chocula-2cf76354dabacdb54e4101cc5df13c6b9ccade70.zip
make fmt
-rwxr-xr-xcheck_issn_urls.py4
-rw-r--r--chocula/directories/mag.py22
-rw-r--r--chocula/directories/openalex.py30
3 files changed, 29 insertions, 27 deletions
diff --git a/check_issn_urls.py b/check_issn_urls.py
index b00609f..cea7c81 100755
--- a/check_issn_urls.py
+++ b/check_issn_urls.py
@@ -120,6 +120,7 @@ def check_gwb(url, match_type="exact"):
else:
return None
+
HOST_SKIP_LIST = [
"www.jstor.org",
"www.tandfonline.com",
@@ -133,6 +134,7 @@ HOST_SKIP_LIST = [
"catalog.hathitrust.org",
]
+
def check_url(issnl, url):
# print("Fetching: %s" % url)
info = dict(issnl=issnl, url=url)
@@ -140,7 +142,7 @@ def check_url(issnl, url):
info["error"] = "bad-url"
info["terminal_status_code"] = -1
return info
- if not url.startswith('http'):
+ if not url.startswith("http"):
info["error"] = "url-not-http"
info["terminal_status_code"] = -1
return info
diff --git a/chocula/directories/mag.py b/chocula/directories/mag.py
index d6849ee..2b7b8e5 100644
--- a/chocula/directories/mag.py
+++ b/chocula/directories/mag.py
@@ -10,17 +10,17 @@ class MagLoader(DirectoryLoader):
"""
TSV Columns (from schema docs):
- 1 JournalId long PRIMARY KEY
- 2 Rank uint See FAQ
- 3 NormalizedName string
- 4 DisplayName string
- 5 Issn string
- 6 Publisher string
- 7 Webpage string
- 8 PaperCount long
- 9 PaperFamilyCount long See FAQ
- 10 CitationCount long
- 11 CreatedDate DateTime
+ 1 JournalId long PRIMARY KEY
+ 2 Rank uint See FAQ
+ 3 NormalizedName string
+ 4 DisplayName string
+ 5 Issn string
+ 6 Publisher string
+ 7 Webpage string
+ 8 PaperCount long
+ 9 PaperFamilyCount long See FAQ
+ 10 CitationCount long
+ 11 CreatedDate DateTime
"""
diff --git a/chocula/directories/openalex.py b/chocula/directories/openalex.py
index 478c814..fcc04b1 100644
--- a/chocula/directories/openalex.py
+++ b/chocula/directories/openalex.py
@@ -10,21 +10,21 @@ class OpenAlexLoader(DirectoryLoader):
"""
TSV Columns (from schema docs):
- 1 JournalId long PRIMARY KEY
- 2 Rank uint (DEPRECATED)
- 3 NormalizedName string
- 4 DisplayName string
- 5 Issn string (ISSN-L)
- 6 Issns JSON list
- 7 IsOa bool
- 8 IsInDoaj bool
- 9 Publisher string
- 10 Webpage string
- 11 PaperCount long
- 12 PaperFamilyCount long (DEPRECATED)
- 13 CitationCount long
- 14 CreatedDate DateTime
- 15 UpdatedDate DateTime
+ 1 JournalId long PRIMARY KEY
+ 2 Rank uint (DEPRECATED)
+ 3 NormalizedName string
+ 4 DisplayName string
+ 5 Issn string (ISSN-L)
+ 6 Issns JSON list
+ 7 IsOa bool
+ 8 IsInDoaj bool
+ 9 Publisher string
+ 10 Webpage string
+ 11 PaperCount long
+ 12 PaperFamilyCount long (DEPRECATED)
+ 13 CitationCount long
+ 14 CreatedDate DateTime
+ 15 UpdatedDate DateTime
"""