aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-09-13 22:12:03 -0700
committerBryan Newbold <bnewbold@archive.org>2020-09-13 22:12:03 -0700
commit2dec107e37280b55dddae74cd0328f2d5c7979b6 (patch)
tree3083bad52ef8814368c5d6cadf2ccdb89266e764 /chocula
parent043b35040e4385c674267aa88c4056bdfdd9cb6c (diff)
downloadchocula-2dec107e37280b55dddae74cd0328f2d5c7979b6.tar.gz
chocula-2dec107e37280b55dddae74cd0328f2d5c7979b6.zip
update vanished journal importer for 2020-09-03 dataset
Diffstat (limited to 'chocula')
-rw-r--r--chocula/directories/vanished_disapeared.py16
-rw-r--r--chocula/directories/vanished_inactive.py32
2 files changed, 18 insertions, 30 deletions
diff --git a/chocula/directories/vanished_disapeared.py b/chocula/directories/vanished_disapeared.py
index c9d2bf9..de25434 100644
--- a/chocula/directories/vanished_disapeared.py
+++ b/chocula/directories/vanished_disapeared.py
@@ -19,32 +19,29 @@ class VanishedDisapearedLoader(DirectoryLoader):
- E-ISSN
- URL
- Publisher
- - blank
+ - <blank>
- Language(s)
- Country
- society_affiliation
- other_sci_affiliation
- - Discipline
- Discipline Group
- Start Year
- End Year
- Last Year Online
- Actively Publishing
- Internet Archive Link
- - Verified
- - Comments
- - The Keepers (archived)
- - Archive Link
- - Mikael (1 = agree with Lisa)
"""
source_slug = "vanished_disapeared"
def open_file(self) -> Iterable:
- return csv.DictReader(open(self.config.vanished_disapeared.filepath))
+ return csv.DictReader(open(self.config.vanished_disapeared.filepath), delimiter=";")
def parse_record(self, record) -> Optional[DirectoryInfo]:
+ if not record["Journal Name"]:
+ return None
+
info = DirectoryInfo(
directory_slug=self.source_slug,
raw_issn=clean_issn(record["ISSN"]),
@@ -57,7 +54,4 @@ class VanishedDisapearedLoader(DirectoryLoader):
homepage = HomepageUrl.from_url(record["Internet Archive Link"])
if homepage:
info.homepage_urls.append(homepage)
- homepage = HomepageUrl.from_url(record["Archive Link"])
- if homepage:
- info.homepage_urls.append(homepage)
return info
diff --git a/chocula/directories/vanished_inactive.py b/chocula/directories/vanished_inactive.py
index 253940c..8b23525 100644
--- a/chocula/directories/vanished_inactive.py
+++ b/chocula/directories/vanished_inactive.py
@@ -1,9 +1,9 @@
import csv
from typing import Iterable, Optional
-from chocula.util import clean_str, clean_issn, parse_lang, parse_country
+from chocula.util import clean_str, clean_issn
from chocula.common import DirectoryLoader
-from chocula.database import DirectoryInfo
+from chocula.database import DirectoryInfo, HomepageUrl
class VanishedInactiveLoader(DirectoryLoader):
@@ -13,38 +13,32 @@ class VanishedInactiveLoader(DirectoryLoader):
CSV headers:
- - Source
- Title
- - Identifier
- - Publisher
- - Comment
- - Language
+ - URL
- ISSN
- EISSN
- - Keyword
- - Start Year
- - End Year
- - Added on date
- - Subjects
- - Country
- - Publication fee
- - Further Information
"""
source_slug = "vanished_inactive"
def open_file(self) -> Iterable:
- return csv.DictReader(open(self.config.vanished_inactive.filepath))
+ return csv.DictReader(open(self.config.vanished_inactive.filepath), delimiter=";")
def parse_record(self, record) -> Optional[DirectoryInfo]:
+ # HACK
+ record["Title"] = record["\ufeffTitle"]
+ if not record["Title"]:
+ return None
+
info = DirectoryInfo(
directory_slug=self.source_slug,
raw_issn=clean_issn(record["ISSN"]),
issne=clean_issn(record["EISSN"]),
name=clean_str(record["Title"]),
- publisher=clean_str(record["Publisher"]),
- langs=[lang for lang in [parse_lang(record["Language"])] if lang],
- country=parse_country(record["Country"]),
)
+
+ homepage = HomepageUrl.from_url(record["URL"])
+ if homepage:
+ info.homepage_urls.append(homepage)
return info