aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-22 13:46:42 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-22 13:46:42 -0700
commitdb40b9e70b917dbbbfda48f6d77a2fc509366a82 (patch)
treee66626601ec32965c4cee7fad16982530408019b /chocula/directories
parent5d3ce061d24a5188fc015012b2f70a4c6f568969 (diff)
downloadchocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.tar.gz
chocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.zip
fmt (black)
Diffstat (limited to 'chocula/directories')
-rw-r--r--chocula/directories/__init__.py17
-rw-r--r--chocula/directories/crossref.py15
-rw-r--r--chocula/directories/doaj.py58
-rw-r--r--chocula/directories/entrez.py14
-rw-r--r--chocula/directories/ezb.py29
-rw-r--r--chocula/directories/gold_oa.py13
-rw-r--r--chocula/directories/norwegian.py31
-rw-r--r--chocula/directories/openapc.py19
-rw-r--r--chocula/directories/road.py29
-rw-r--r--chocula/directories/scielo.py32
-rw-r--r--chocula/directories/sherpa_romeo.py33
-rw-r--r--chocula/directories/sim.py41
-rw-r--r--chocula/directories/szczepanski.py25
-rw-r--r--chocula/directories/wikidata.py27
14 files changed, 211 insertions, 172 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py
index a233a26..90e6f26 100644
--- a/chocula/directories/__init__.py
+++ b/chocula/directories/__init__.py
@@ -1,4 +1,3 @@
-
from chocula.directories.crossref import CrossrefLoader
from chocula.directories.doaj import DoajLoader
from chocula.directories.entrez import EntrezLoader
@@ -14,7 +13,17 @@ from chocula.directories.szczepanski import SzczepanskiLoader
from chocula.directories.wikidata import WikidataLoader
ALL_CHOCULA_DIR_CLASSES = [
- CrossrefLoader, DoajLoader, EntrezLoader,EzbLoader, GoldOALoader,
- NorwegianLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader,
- SzczepanskiLoader, WikidataLoader, SimLoader, ScieloLoader,
+ CrossrefLoader,
+ DoajLoader,
+ EntrezLoader,
+ EzbLoader,
+ GoldOALoader,
+ NorwegianLoader,
+ OpenAPCLoader,
+ RoadLoader,
+ SherpaRomeoLoader,
+ SzczepanskiLoader,
+ WikidataLoader,
+ SimLoader,
+ ScieloLoader,
]
diff --git a/chocula/directories/crossref.py b/chocula/directories/crossref.py
index 4208008..a494021 100644
--- a/chocula/directories/crossref.py
+++ b/chocula/directories/crossref.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -23,14 +22,14 @@ class CrossrefLoader(DirectoryLoader):
def parse_record(self, record) -> Optional[DirectoryInfo]:
info = DirectoryInfo(
directory_slug=self.source_slug,
- issne=record['eissn'],
- issnp=record['pissn'],
- custom_id=record.get('doi').strip() or None,
- name=clean_str(record.get('JournalTitle')),
- publisher=clean_str(record.get('Publisher')),
+ issne=record["eissn"],
+ issnp=record["pissn"],
+ custom_id=record.get("doi").strip() or None,
+ name=clean_str(record.get("JournalTitle")),
+ publisher=clean_str(record.get("Publisher")),
)
- if record['additionalIssns']:
- info.raw_issn = record['additionalIssns'][0]
+ if record["additionalIssns"]:
+ info.raw_issn = record["additionalIssns"][0]
return info
diff --git a/chocula/directories/doaj.py b/chocula/directories/doaj.py
index 7968dc2..795ce68 100644
--- a/chocula/directories/doaj.py
+++ b/chocula/directories/doaj.py
@@ -1,8 +1,13 @@
-
from typing import Iterable, Optional, Dict, Any
import csv
-from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP
+from chocula.util import (
+ clean_str,
+ parse_mimetypes,
+ parse_country,
+ parse_lang,
+ PLATFORM_MAP,
+)
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
@@ -81,40 +86,43 @@ class DoajLoader(DirectoryLoader):
info = DirectoryInfo(
directory_slug=self.source_slug,
- issnp=row['Journal ISSN (print version)'],
- issne=row['Journal EISSN (online version)'],
- name=clean_str(row['Journal title']),
- publisher=clean_str(row['Publisher']),
- platform=PLATFORM_MAP.get(row['Platform, host or aggregator']),
- country=parse_country(row['Country of publisher']),
+ issnp=row["Journal ISSN (print version)"],
+ issne=row["Journal EISSN (online version)"],
+ name=clean_str(row["Journal title"]),
+ publisher=clean_str(row["Publisher"]),
+ platform=PLATFORM_MAP.get(row["Platform, host or aggregator"]),
+ country=parse_country(row["Country of publisher"]),
)
- lang = parse_lang(row['Full text language'])
+ lang = parse_lang(row["Full text language"])
if lang:
info.langs.append(lang)
extra: Dict[str, Any] = dict(doaj=dict())
- extra['mimetypes'] = parse_mimetypes(row['Full text formats'])
- extra['doaj']['as_of'] = self.config.snapshot.date
- if row['DOAJ Seal']:
- extra['doaj']['seal'] = {"no": False, "yes": True}[row['DOAJ Seal'].lower()]
+ extra["mimetypes"] = parse_mimetypes(row["Full text formats"])
+ extra["doaj"]["as_of"] = self.config.snapshot.date
+ if row["DOAJ Seal"]:
+ extra["doaj"]["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()]
- if row['Digital archiving policy or program(s)']:
- extra['archive'] = [a.strip() for a in row['Digital archiving policy or program(s)'].split(',') if a.strip()]
- elif row['Archiving: national library']:
- extra['archive'] = ['national-library']
+ if row["Digital archiving policy or program(s)"]:
+ extra["archive"] = [
+ a.strip()
+ for a in row["Digital archiving policy or program(s)"].split(",")
+ if a.strip()
+ ]
+ elif row["Archiving: national library"]:
+ extra["archive"] = ["national-library"]
- crawl_permission = row['Journal full-text crawl permission']
+ crawl_permission = row["Journal full-text crawl permission"]
if crawl_permission:
- extra['crawl-permission'] = dict(Yes=True, No=False)[crawl_permission]
- default_license = row['Journal license']
- if default_license and default_license.startswith('CC'):
- extra['default_license'] = default_license.replace('CC ', 'CC-').strip()
+ extra["crawl-permission"] = dict(Yes=True, No=False)[crawl_permission]
+ default_license = row["Journal license"]
+ if default_license and default_license.startswith("CC"):
+ extra["default_license"] = default_license.replace("CC ", "CC-").strip()
- url = row['Journal URL']
+ url = row["Journal URL"]
if url:
- homepage = HomepageUrl.from_url(row['Journal URL'])
+ homepage = HomepageUrl.from_url(row["Journal URL"])
if homepage:
info.homepage_urls.append(homepage)
return info
-
diff --git a/chocula/directories/entrez.py b/chocula/directories/entrez.py
index b30f04d..f9f6d23 100644
--- a/chocula/directories/entrez.py
+++ b/chocula/directories/entrez.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -26,14 +25,13 @@ class EntrezLoader(DirectoryLoader):
return csv.DictReader(open(self.config.entrez_simple.filepath))
def parse_record(self, record) -> Optional[DirectoryInfo]:
- if not (record.get('ISSN (Online)') or record.get('ISSN (Print)')):
+ if not (record.get("ISSN (Online)") or record.get("ISSN (Print)")):
return None
return DirectoryInfo(
directory_slug=self.source_slug,
- issne=record.get('ISSN (Online)'),
- issnp=record.get('ISSN (Print)'),
- custom_id=record.get('NlmId').strip() or None,
- name=clean_str(record.get('JournalTitle')),
- abbrev=clean_str(record['IsoAbbr']),
+ issne=record.get("ISSN (Online)"),
+ issnp=record.get("ISSN (Print)"),
+ custom_id=record.get("NlmId").strip() or None,
+ name=clean_str(record.get("JournalTitle")),
+ abbrev=clean_str(record["IsoAbbr"]),
)
-
diff --git a/chocula/directories/ezb.py b/chocula/directories/ezb.py
index 1573048..056350d 100644
--- a/chocula/directories/ezb.py
+++ b/chocula/directories/ezb.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import json
@@ -16,7 +15,7 @@ class EzbLoader(DirectoryLoader):
source_slug = "ezb"
def open_file(self) -> Iterable:
- return open(self.config.ezb.filepath, 'r')
+ return open(self.config.ezb.filepath, "r")
def parse_record(self, row) -> Optional[DirectoryInfo]:
@@ -26,21 +25,29 @@ class EzbLoader(DirectoryLoader):
info = DirectoryInfo(
directory_slug=self.source_slug,
- issne=row.get('issne'),
- issnp=row.get('issnp'),
- custom_id=row['ezb_id'],
- name=clean_str(row['title']),
- publisher=clean_str(row.get('publisher')),
+ issne=row.get("issne"),
+ issnp=row.get("issnp"),
+ custom_id=row["ezb_id"],
+ name=clean_str(row["title"]),
+ publisher=clean_str(row.get("publisher")),
)
info.extra = dict()
- for k in ('ezb_color', 'subjects', 'keywords', 'zdb_id',
- 'first_volume', 'first_issue', 'first_year',
- 'appearance', 'costs'):
+ for k in (
+ "ezb_color",
+ "subjects",
+ "keywords",
+ "zdb_id",
+ "first_volume",
+ "first_issue",
+ "first_year",
+ "appearance",
+ "costs",
+ ):
if row.get(k):
info.extra[k] = row[k]
- url = HomepageUrl.from_url(row.get('url'))
+ url = HomepageUrl.from_url(row.get("url"))
if url:
info.homepage_urls.append(url)
diff --git a/chocula/directories/gold_oa.py b/chocula/directories/gold_oa.py
index a75944d..d0c6e8b 100644
--- a/chocula/directories/gold_oa.py
+++ b/chocula/directories/gold_oa.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -21,11 +20,11 @@ class GoldOALoader(DirectoryLoader):
def parse_record(self, row) -> Optional[DirectoryInfo]:
- if not (row.get('ISSN_L') and row.get('TITLE')):
+ if not (row.get("ISSN_L") and row.get("TITLE")):
return None
# TODO: also add for other non-direct indices
- #for ind in ('WOS', 'SCOPUS'):
+ # for ind in ('WOS', 'SCOPUS'):
# issnl, status = self.add_issn(
# ind.lower(),
# raw_issn=row['ISSN_L'],
@@ -33,12 +32,12 @@ class GoldOALoader(DirectoryLoader):
# )
extra = dict()
- for ind in ('DOAJ', 'ROAD', 'PMC', 'OAPC', 'WOS', 'SCOPUS'):
- extra['in_' + ind.lower()] = bool(int(row['JOURNAL_IN_' + ind]))
+ for ind in ("DOAJ", "ROAD", "PMC", "OAPC", "WOS", "SCOPUS"):
+ extra["in_" + ind.lower()] = bool(int(row["JOURNAL_IN_" + ind]))
return DirectoryInfo(
directory_slug=self.source_slug,
- raw_issn=row['ISSN_L'],
- name=clean_str(row['TITLE']),
+ raw_issn=row["ISSN_L"],
+ name=clean_str(row["TITLE"]),
extra=extra,
)
diff --git a/chocula/directories/norwegian.py b/chocula/directories/norwegian.py
index 2b83961..2425318 100644
--- a/chocula/directories/norwegian.py
+++ b/chocula/directories/norwegian.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -52,29 +51,31 @@ class NorwegianLoader(DirectoryLoader):
source_slug = "norwegian"
def open_file(self) -> Iterable:
- return csv.DictReader(open(self.config.norwegian.filepath, encoding="ISO-8859-1"), delimiter=";")
+ return csv.DictReader(
+ open(self.config.norwegian.filepath, encoding="ISO-8859-1"), delimiter=";"
+ )
def parse_record(self, row) -> Optional[DirectoryInfo]:
info = DirectoryInfo(
directory_slug=self.source_slug,
- issnp=row['Print ISSN'],
- issne=row['Online ISSN'],
- country=parse_country(row['Country of publication']),
- name=clean_str(row.get('International title')),
- langs=[l for l in [parse_lang(row['Language'])] if l],
+ issnp=row["Print ISSN"],
+ issne=row["Online ISSN"],
+ country=parse_country(row["Country of publication"]),
+ name=clean_str(row.get("International title")),
+ langs=[l for l in [parse_lang(row["Language"])] if l],
)
- info.extra['norwegian'] = dict(as_of=self.config.norwegian.date)
- if row['Level 2019']:
- info.extra['norwegian']['level'] = int(row['Level 2019'])
+ info.extra["norwegian"] = dict(as_of=self.config.norwegian.date)
+ if row["Level 2019"]:
+ info.extra["norwegian"]["level"] = int(row["Level 2019"])
- if row['Original title'] != row['International title']:
- info.original_name = clean_str(row['Original title'])
+ if row["Original title"] != row["International title"]:
+ info.original_name = clean_str(row["Original title"])
- identifier=row['NSD tidsskrift_id'],
- publisher=row['Publisher'],
+ identifier = (row["NSD tidsskrift_id"],)
+ publisher = (row["Publisher"],)
- url = HomepageUrl.from_url(row['URL'])
+ url = HomepageUrl.from_url(row["URL"])
if url:
info.homepage_urls.append(url)
diff --git a/chocula/directories/openapc.py b/chocula/directories/openapc.py
index c2acd95..99304c3 100644
--- a/chocula/directories/openapc.py
+++ b/chocula/directories/openapc.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -21,24 +20,22 @@ class OpenAPCLoader(DirectoryLoader):
def parse_record(self, row) -> Optional[DirectoryInfo]:
- if not row.get('issn'):
+ if not row.get("issn"):
return None
info = DirectoryInfo(
directory_slug=self.source_slug,
- issne=row['issn_electronic'],
- issnp=row['issn_print'],
- raw_issn=row['issn_l'] or row['issn'],
- name=clean_str(row['journal_full_title']),
- publisher=clean_str(row['publisher']),
+ issne=row["issn_electronic"],
+ issnp=row["issn_print"],
+ raw_issn=row["issn_l"] or row["issn"],
+ name=clean_str(row["journal_full_title"]),
+ publisher=clean_str(row["publisher"]),
)
- info.extra['is_hybrid'] = bool(row['is_hybrid'])
+ info.extra["is_hybrid"] = bool(row["is_hybrid"])
- homepage = HomepageUrl.from_url(row['url'])
+ homepage = HomepageUrl.from_url(row["url"])
if homepage:
info.homepage_urls.append(homepage)
return info
-
-
diff --git a/chocula/directories/road.py b/chocula/directories/road.py
index 23cca65..bc550fd 100644
--- a/chocula/directories/road.py
+++ b/chocula/directories/road.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -26,27 +25,39 @@ class RoadLoader(DirectoryLoader):
source_slug = "road"
def open_file(self) -> Iterable:
- return csv.DictReader(open(self.config.road.filepath), delimiter='\t',
- fieldnames=("ISSN", "ISSN-L", "Short Title", "Title", "Publisher", "URL1", "URL2", "Region", "Lang1", "Lang2")
+ return csv.DictReader(
+ open(self.config.road.filepath),
+ delimiter="\t",
+ fieldnames=(
+ "ISSN",
+ "ISSN-L",
+ "Short Title",
+ "Title",
+ "Publisher",
+ "URL1",
+ "URL2",
+ "Region",
+ "Lang1",
+ "Lang2",
+ ),
)
def parse_record(self, row) -> Optional[DirectoryInfo]:
info = DirectoryInfo(
directory_slug=self.source_slug,
- raw_issn=row['ISSN-L'],
- name=clean_str(row['Short Title']),
- publisher=clean_str(row['Publisher']),
- langs=[l for l in (row['Lang1'], row['Lang2']) if l],
+ raw_issn=row["ISSN-L"],
+ name=clean_str(row["Short Title"]),
+ publisher=clean_str(row["Publisher"]),
+ langs=[l for l in (row["Lang1"], row["Lang2"]) if l],
)
# TODO: region mapping: "Europe and North America"
# TODO: lang mapping: already alpha-3
# homepages
- for url in [u for u in (row['URL1'], row['URL2']) if u]:
+ for url in [u for u in (row["URL1"], row["URL2"]) if u]:
homepage = HomepageUrl.from_url(url)
if homepage:
info.homepage_urls.append(homepage)
return info
-
diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py
index 247866b..0ed8fde 100644
--- a/chocula/directories/scielo.py
+++ b/chocula/directories/scielo.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import json
@@ -17,32 +16,31 @@ class ScieloLoader(DirectoryLoader):
def parse_record(self, line) -> Optional[DirectoryInfo]:
record = json.loads(line)
extra = dict(
- status=clean_str(record.get('current_status')),
- first_year=record.get('first_year'),
- collection=record.get('collection_acronym'),
+ status=clean_str(record.get("current_status")),
+ first_year=record.get("first_year"),
+ collection=record.get("collection_acronym"),
)
for k in list(extra.keys()):
if extra[k] is None:
extra.pop(k)
country: Optional[str] = None
- if record['publisher_country'] and len(record['publisher_country'][0]) == 2:
- country = record['publisher_country'][0].lower()
+ if record["publisher_country"] and len(record["publisher_country"][0]) == 2:
+ country = record["publisher_country"][0].lower()
info = DirectoryInfo(
directory_slug=self.source_slug,
- issne=clean_issn(record.get('electronic_issn') or ''),
- issnp=clean_issn(record.get('print_issn') or ''),
- custom_id=clean_str(record.get('scielo_issn')),
- name=clean_str(record.get('fulltitle')),
- publisher=clean_str((record.get('publisher_name') or [''])[0]),
- abbrev=clean_str(record['abbreviated_iso_title']),
- platform='scielo',
- langs=list(filter(lambda s: len(s) == 2, record['languages'])),
+ issne=clean_issn(record.get("electronic_issn") or ""),
+ issnp=clean_issn(record.get("print_issn") or ""),
+ custom_id=clean_str(record.get("scielo_issn")),
+ name=clean_str(record.get("fulltitle")),
+ publisher=clean_str((record.get("publisher_name") or [""])[0]),
+ abbrev=clean_str(record["abbreviated_iso_title"]),
+ platform="scielo",
+ langs=list(filter(lambda s: len(s) == 2, record["languages"])),
country=country,
extra=extra,
)
- if record['url']:
- homepage = HomepageUrl.from_url(record['url'])
+ if record["url"]:
+ homepage = HomepageUrl.from_url(record["url"])
if homepage:
info.homepage_urls.append(homepage)
return info
-
diff --git a/chocula/directories/sherpa_romeo.py b/chocula/directories/sherpa_romeo.py
index e92dc69..a8ba1b0 100644
--- a/chocula/directories/sherpa_romeo.py
+++ b/chocula/directories/sherpa_romeo.py
@@ -1,4 +1,3 @@
-
import sys
from typing import Iterable, Optional, Dict, Any
import csv
@@ -27,32 +26,38 @@ class SherpaRomeoLoader(DirectoryLoader):
# first load policies
print("##### Loading SHERPA/ROMEO policies...", file=sys.stderr)
- fixed_policy_file = ftfy.fix_file(open(self.config.sherpa_romeo_policies_simple.filepath, 'rb'))
+ fixed_policy_file = ftfy.fix_file(
+ open(self.config.sherpa_romeo_policies_simple.filepath, "rb")
+ )
policy_reader = csv.DictReader(fixed_policy_file)
for row in policy_reader:
- self.sherpa_policies[row['RoMEO Record ID']] = row
+ self.sherpa_policies[row["RoMEO Record ID"]] = row
# then open regular file
- raw_file = open(self.config.sherpa_romeo_journals_simple.filepath, 'rb').read().decode(errors='replace')
+ raw_file = (
+ open(self.config.sherpa_romeo_journals_simple.filepath, "rb")
+ .read()
+ .decode(errors="replace")
+ )
fixed_file = ftfy.fix_text(raw_file)
- return csv.DictReader(fixed_file.split('\n'))
+ return csv.DictReader(fixed_file.split("\n"))
def parse_record(self, row) -> Optional[DirectoryInfo]:
# super mangled :(
- row.update(self.sherpa_policies[row['RoMEO Record ID']])
+ row.update(self.sherpa_policies[row["RoMEO Record ID"]])
info = DirectoryInfo(
directory_slug=self.source_slug,
- issnp=row['ISSN'],
- issne=row['ESSN'],
- name=clean_str(row['Journal Title']),
- publisher=clean_str(row['Publisher']),
- country=parse_country(row['Country']),
- custom_id=row['RoMEO Record ID'],
+ issnp=row["ISSN"],
+ issne=row["ESSN"],
+ name=clean_str(row["Journal Title"]),
+ publisher=clean_str(row["Publisher"]),
+ country=parse_country(row["Country"]),
+ custom_id=row["RoMEO Record ID"],
)
- if row['RoMEO colour']:
- info.extra['sherpa_romeo'] = dict(color=row['RoMEO colour'])
+ if row["RoMEO colour"]:
+ info.extra["sherpa_romeo"] = dict(color=row["RoMEO colour"])
return info
diff --git a/chocula/directories/sim.py b/chocula/directories/sim.py
index ff5cce3..97f84d2 100644
--- a/chocula/directories/sim.py
+++ b/chocula/directories/sim.py
@@ -1,8 +1,14 @@
-
from typing import Iterable, Optional, Dict, Any
import csv
-from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP, gaps_to_spans
+from chocula.util import (
+ clean_str,
+ parse_mimetypes,
+ parse_country,
+ parse_lang,
+ PLATFORM_MAP,
+ gaps_to_spans,
+)
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
@@ -37,35 +43,34 @@ class SimLoader(DirectoryLoader):
# TODO: 'Pub Type'
extra: Dict[str, Any] = {}
- first_year = row['First Volume']
+ first_year = row["First Volume"]
if first_year:
first_year = int(first_year)
- extra['first_year'] = int(row['First Volume'])
+ extra["first_year"] = int(row["First Volume"])
else:
first_year = None
- last_year = row['Last Volume']
+ last_year = row["Last Volume"]
if last_year:
last_year = int(last_year)
- extra['last_year'] = last_year
+ extra["last_year"] = last_year
else:
last_year = None
- gaps = [int(g) for g in row['NA Gaps'].split(';') if g.strip()]
+ gaps = [int(g) for g in row["NA Gaps"].split(";") if g.strip()]
if gaps:
- extra['gaps'] = gaps
+ extra["gaps"] = gaps
if first_year and last_year:
- extra['year_spans'] = gaps_to_spans(first_year, last_year, gaps)
- extra['scholarly_peer_reviewed'] = row["Scholarly / Peer-\nReviewed"]
- extra['peer_reviewed'] = row["Peer-\nReviewed"]
- extra['pub_type'] = clean_str(row["Pub Type"])
+ extra["year_spans"] = gaps_to_spans(first_year, last_year, gaps)
+ extra["scholarly_peer_reviewed"] = row["Scholarly / Peer-\nReviewed"]
+ extra["peer_reviewed"] = row["Peer-\nReviewed"]
+ extra["pub_type"] = clean_str(row["Pub Type"])
info = DirectoryInfo(
directory_slug=self.source_slug,
- name=clean_str(row['Title']),
- publisher=clean_str(row['Publisher']),
- raw_issn=row['ISSN'][:9],
- custom_id=row.get('NA Pub Cat ID').strip() or None,
- langs=[parse_lang(row['Pub Language'])],
+ name=clean_str(row["Title"]),
+ publisher=clean_str(row["Publisher"]),
+ raw_issn=row["ISSN"][:9],
+ custom_id=row.get("NA Pub Cat ID").strip() or None,
+ langs=[parse_lang(row["Pub Language"])],
extra=extra,
)
return info
-
diff --git a/chocula/directories/szczepanski.py b/chocula/directories/szczepanski.py
index 0d1558a..3586acb 100644
--- a/chocula/directories/szczepanski.py
+++ b/chocula/directories/szczepanski.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import json
@@ -16,7 +15,7 @@ class SzczepanskiLoader(DirectoryLoader):
source_slug = "szczepanski"
def open_file(self) -> Iterable:
- return open(self.config.szczepanski.filepath, 'r')
+ return open(self.config.szczepanski.filepath, "r")
def parse_record(self, row) -> Optional[DirectoryInfo]:
@@ -27,21 +26,21 @@ class SzczepanskiLoader(DirectoryLoader):
info = DirectoryInfo(
directory_slug=self.source_slug,
- issne=row.get('issne'),
- issnp=row.get('issnp'),
- raw_issn=row.get('issn'),
- name=clean_str(row['title']),
- publisher=clean_str(row.get('ed')),
+ issne=row.get("issne"),
+ issnp=row.get("issnp"),
+ raw_issn=row.get("issn"),
+ name=clean_str(row["title"]),
+ publisher=clean_str(row.get("ed")),
)
- info.extra['szczepanski'] = dict(as_of=self.config.szczepanski.date)
- if row.get('extra'):
- info.extra['szczepanski']['notes'] = row.get('extra')
- for k in ('other_titles', 'year_spans', 'ed'):
+ info.extra["szczepanski"] = dict(as_of=self.config.szczepanski.date)
+ if row.get("extra"):
+ info.extra["szczepanski"]["notes"] = row.get("extra")
+ for k in ("other_titles", "year_spans", "ed"):
if row.get(k):
- info.extra['szczepanski'][k] = row[k]
+ info.extra["szczepanski"][k] = row[k]
- url = HomepageUrl.from_url(row.get('url'))
+ url = HomepageUrl.from_url(row.get("url"))
if url:
info.homepage_urls.append(url)
diff --git a/chocula/directories/wikidata.py b/chocula/directories/wikidata.py
index d16d8df..5ffe6fb 100644
--- a/chocula/directories/wikidata.py
+++ b/chocula/directories/wikidata.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -16,27 +15,31 @@ class WikidataLoader(DirectoryLoader):
source_slug = "wikidata"
def open_file(self) -> Iterable:
- return csv.DictReader(open(self.config.wikidata.filepath), delimiter='\t')
+ return csv.DictReader(open(self.config.wikidata.filepath), delimiter="\t")
def parse_record(self, row) -> Optional[DirectoryInfo]:
- if not (row.get('issn') and row.get('title')):
+ if not (row.get("issn") and row.get("title")):
return None
- wikidata_qid = row['item'].strip().split('/')[-1]
- publisher = row['publisher_name']
- if (publisher.startswith('Q') and publisher[1].isdigit()) or publisher.startswith('t1') or not publisher:
+ wikidata_qid = row["item"].strip().split("/")[-1]
+ publisher = row["publisher_name"]
+ if (
+ (publisher.startswith("Q") and publisher[1].isdigit())
+ or publisher.startswith("t1")
+ or not publisher
+ ):
publisher = None
- info =DirectoryInfo(
+ info = DirectoryInfo(
directory_slug=self.source_slug,
- raw_issn=row['issn'],
+ raw_issn=row["issn"],
custom_id=wikidata_qid,
- name=clean_str(row['title']),
+ name=clean_str(row["title"]),
publisher=clean_str(publisher),
)
- if row.get('start_year'):
- info.extra['start_year'] = row['start_year']
+ if row.get("start_year"):
+ info.extra["start_year"] = row["start_year"]
- url = HomepageUrl.from_url(row.get('websiteurl'))
+ url = HomepageUrl.from_url(row.get("websiteurl"))
if url:
info.homepage_urls.append(url)