diff options
-rwxr-xr-x | check_issn_urls.py | 4 | ||||
-rw-r--r-- | chocula/directories/mag.py | 22 | ||||
-rw-r--r-- | chocula/directories/openalex.py | 30 |
3 files changed, 29 insertions, 27 deletions
diff --git a/check_issn_urls.py b/check_issn_urls.py index b00609f..cea7c81 100755 --- a/check_issn_urls.py +++ b/check_issn_urls.py @@ -120,6 +120,7 @@ def check_gwb(url, match_type="exact"): else: return None + HOST_SKIP_LIST = [ "www.jstor.org", "www.tandfonline.com", @@ -133,6 +134,7 @@ HOST_SKIP_LIST = [ "catalog.hathitrust.org", ] + def check_url(issnl, url): # print("Fetching: %s" % url) info = dict(issnl=issnl, url=url) @@ -140,7 +142,7 @@ def check_url(issnl, url): info["error"] = "bad-url" info["terminal_status_code"] = -1 return info - if not url.startswith('http'): + if not url.startswith("http"): info["error"] = "url-not-http" info["terminal_status_code"] = -1 return info diff --git a/chocula/directories/mag.py b/chocula/directories/mag.py index d6849ee..2b7b8e5 100644 --- a/chocula/directories/mag.py +++ b/chocula/directories/mag.py @@ -10,17 +10,17 @@ class MagLoader(DirectoryLoader): """ TSV Columns (from schema docs): - 1 JournalId long PRIMARY KEY - 2 Rank uint See FAQ - 3 NormalizedName string - 4 DisplayName string - 5 Issn string - 6 Publisher string - 7 Webpage string - 8 PaperCount long - 9 PaperFamilyCount long See FAQ - 10 CitationCount long - 11 CreatedDate DateTime + 1 JournalId long PRIMARY KEY + 2 Rank uint See FAQ + 3 NormalizedName string + 4 DisplayName string + 5 Issn string + 6 Publisher string + 7 Webpage string + 8 PaperCount long + 9 PaperFamilyCount long See FAQ + 10 CitationCount long + 11 CreatedDate DateTime """ diff --git a/chocula/directories/openalex.py b/chocula/directories/openalex.py index 478c814..fcc04b1 100644 --- a/chocula/directories/openalex.py +++ b/chocula/directories/openalex.py @@ -10,21 +10,21 @@ class OpenAlexLoader(DirectoryLoader): """ TSV Columns (from schema docs): - 1 JournalId long PRIMARY KEY - 2 Rank uint (DEPRECATED) - 3 NormalizedName string - 4 DisplayName string - 5 Issn string (ISSN-L) - 6 Issns JSON list - 7 IsOa bool - 8 IsInDoaj bool - 9 Publisher string - 10 Webpage string - 11 PaperCount long - 12 PaperFamilyCount long (DEPRECATED) - 13 CitationCount long - 14 CreatedDate DateTime - 15 UpdatedDate DateTime + 1 JournalId long PRIMARY KEY + 2 Rank uint (DEPRECATED) + 3 NormalizedName string + 4 DisplayName string + 5 Issn string (ISSN-L) + 6 Issns JSON list + 7 IsOa bool + 8 IsInDoaj bool + 9 Publisher string + 10 Webpage string + 11 PaperCount long + 12 PaperFamilyCount long (DEPRECATED) + 13 CitationCount long + 14 CreatedDate DateTime + 15 UpdatedDate DateTime """ |