From db40b9e70b917dbbbfda48f6d77a2fc509366a82 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 22 Jun 2020 13:46:42 -0700 Subject: fmt (black) --- chocula/directories/__init__.py | 17 ++++++++--- chocula/directories/crossref.py | 15 +++++----- chocula/directories/doaj.py | 58 +++++++++++++++++++++---------------- chocula/directories/entrez.py | 14 ++++----- chocula/directories/ezb.py | 29 ++++++++++++------- chocula/directories/gold_oa.py | 13 ++++----- chocula/directories/norwegian.py | 31 ++++++++++---------- chocula/directories/openapc.py | 19 +++++------- chocula/directories/road.py | 29 +++++++++++++------ chocula/directories/scielo.py | 32 ++++++++++---------- chocula/directories/sherpa_romeo.py | 33 ++++++++++++--------- chocula/directories/sim.py | 41 ++++++++++++++------------ chocula/directories/szczepanski.py | 25 ++++++++-------- chocula/directories/wikidata.py | 27 +++++++++-------- 14 files changed, 211 insertions(+), 172 deletions(-) (limited to 'chocula/directories') diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py index a233a26..90e6f26 100644 --- a/chocula/directories/__init__.py +++ b/chocula/directories/__init__.py @@ -1,4 +1,3 @@ - from chocula.directories.crossref import CrossrefLoader from chocula.directories.doaj import DoajLoader from chocula.directories.entrez import EntrezLoader @@ -14,7 +13,17 @@ from chocula.directories.szczepanski import SzczepanskiLoader from chocula.directories.wikidata import WikidataLoader ALL_CHOCULA_DIR_CLASSES = [ - CrossrefLoader, DoajLoader, EntrezLoader,EzbLoader, GoldOALoader, - NorwegianLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader, - SzczepanskiLoader, WikidataLoader, SimLoader, ScieloLoader, + CrossrefLoader, + DoajLoader, + EntrezLoader, + EzbLoader, + GoldOALoader, + NorwegianLoader, + OpenAPCLoader, + RoadLoader, + SherpaRomeoLoader, + SzczepanskiLoader, + WikidataLoader, + SimLoader, + ScieloLoader, ] diff --git a/chocula/directories/crossref.py b/chocula/directories/crossref.py index 4208008..a494021 100644 --- a/chocula/directories/crossref.py +++ b/chocula/directories/crossref.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -23,14 +22,14 @@ class CrossrefLoader(DirectoryLoader): def parse_record(self, record) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, - issne=record['eissn'], - issnp=record['pissn'], - custom_id=record.get('doi').strip() or None, - name=clean_str(record.get('JournalTitle')), - publisher=clean_str(record.get('Publisher')), + issne=record["eissn"], + issnp=record["pissn"], + custom_id=record.get("doi").strip() or None, + name=clean_str(record.get("JournalTitle")), + publisher=clean_str(record.get("Publisher")), ) - if record['additionalIssns']: - info.raw_issn = record['additionalIssns'][0] + if record["additionalIssns"]: + info.raw_issn = record["additionalIssns"][0] return info diff --git a/chocula/directories/doaj.py b/chocula/directories/doaj.py index 7968dc2..795ce68 100644 --- a/chocula/directories/doaj.py +++ b/chocula/directories/doaj.py @@ -1,8 +1,13 @@ - from typing import Iterable, Optional, Dict, Any import csv -from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP +from chocula.util import ( + clean_str, + parse_mimetypes, + parse_country, + parse_lang, + PLATFORM_MAP, +) from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl @@ -81,40 +86,43 @@ class DoajLoader(DirectoryLoader): info = DirectoryInfo( directory_slug=self.source_slug, - issnp=row['Journal ISSN (print version)'], - issne=row['Journal EISSN (online version)'], - name=clean_str(row['Journal title']), - publisher=clean_str(row['Publisher']), - platform=PLATFORM_MAP.get(row['Platform, host or aggregator']), - country=parse_country(row['Country of publisher']), + issnp=row["Journal ISSN (print version)"], + issne=row["Journal EISSN (online version)"], + name=clean_str(row["Journal title"]), + publisher=clean_str(row["Publisher"]), + platform=PLATFORM_MAP.get(row["Platform, host or aggregator"]), + country=parse_country(row["Country of publisher"]), ) - lang = parse_lang(row['Full text language']) + lang = parse_lang(row["Full text language"]) if lang: info.langs.append(lang) extra: Dict[str, Any] = dict(doaj=dict()) - extra['mimetypes'] = parse_mimetypes(row['Full text formats']) - extra['doaj']['as_of'] = self.config.snapshot.date - if row['DOAJ Seal']: - extra['doaj']['seal'] = {"no": False, "yes": True}[row['DOAJ Seal'].lower()] + extra["mimetypes"] = parse_mimetypes(row["Full text formats"]) + extra["doaj"]["as_of"] = self.config.snapshot.date + if row["DOAJ Seal"]: + extra["doaj"]["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()] - if row['Digital archiving policy or program(s)']: - extra['archive'] = [a.strip() for a in row['Digital archiving policy or program(s)'].split(',') if a.strip()] - elif row['Archiving: national library']: - extra['archive'] = ['national-library'] + if row["Digital archiving policy or program(s)"]: + extra["archive"] = [ + a.strip() + for a in row["Digital archiving policy or program(s)"].split(",") + if a.strip() + ] + elif row["Archiving: national library"]: + extra["archive"] = ["national-library"] - crawl_permission = row['Journal full-text crawl permission'] + crawl_permission = row["Journal full-text crawl permission"] if crawl_permission: - extra['crawl-permission'] = dict(Yes=True, No=False)[crawl_permission] - default_license = row['Journal license'] - if default_license and default_license.startswith('CC'): - extra['default_license'] = default_license.replace('CC ', 'CC-').strip() + extra["crawl-permission"] = dict(Yes=True, No=False)[crawl_permission] + default_license = row["Journal license"] + if default_license and default_license.startswith("CC"): + extra["default_license"] = default_license.replace("CC ", "CC-").strip() - url = row['Journal URL'] + url = row["Journal URL"] if url: - homepage = HomepageUrl.from_url(row['Journal URL']) + homepage = HomepageUrl.from_url(row["Journal URL"]) if homepage: info.homepage_urls.append(homepage) return info - diff --git a/chocula/directories/entrez.py b/chocula/directories/entrez.py index b30f04d..f9f6d23 100644 --- a/chocula/directories/entrez.py +++ b/chocula/directories/entrez.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -26,14 +25,13 @@ class EntrezLoader(DirectoryLoader): return csv.DictReader(open(self.config.entrez_simple.filepath)) def parse_record(self, record) -> Optional[DirectoryInfo]: - if not (record.get('ISSN (Online)') or record.get('ISSN (Print)')): + if not (record.get("ISSN (Online)") or record.get("ISSN (Print)")): return None return DirectoryInfo( directory_slug=self.source_slug, - issne=record.get('ISSN (Online)'), - issnp=record.get('ISSN (Print)'), - custom_id=record.get('NlmId').strip() or None, - name=clean_str(record.get('JournalTitle')), - abbrev=clean_str(record['IsoAbbr']), + issne=record.get("ISSN (Online)"), + issnp=record.get("ISSN (Print)"), + custom_id=record.get("NlmId").strip() or None, + name=clean_str(record.get("JournalTitle")), + abbrev=clean_str(record["IsoAbbr"]), ) - diff --git a/chocula/directories/ezb.py b/chocula/directories/ezb.py index 1573048..056350d 100644 --- a/chocula/directories/ezb.py +++ b/chocula/directories/ezb.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import json @@ -16,7 +15,7 @@ class EzbLoader(DirectoryLoader): source_slug = "ezb" def open_file(self) -> Iterable: - return open(self.config.ezb.filepath, 'r') + return open(self.config.ezb.filepath, "r") def parse_record(self, row) -> Optional[DirectoryInfo]: @@ -26,21 +25,29 @@ class EzbLoader(DirectoryLoader): info = DirectoryInfo( directory_slug=self.source_slug, - issne=row.get('issne'), - issnp=row.get('issnp'), - custom_id=row['ezb_id'], - name=clean_str(row['title']), - publisher=clean_str(row.get('publisher')), + issne=row.get("issne"), + issnp=row.get("issnp"), + custom_id=row["ezb_id"], + name=clean_str(row["title"]), + publisher=clean_str(row.get("publisher")), ) info.extra = dict() - for k in ('ezb_color', 'subjects', 'keywords', 'zdb_id', - 'first_volume', 'first_issue', 'first_year', - 'appearance', 'costs'): + for k in ( + "ezb_color", + "subjects", + "keywords", + "zdb_id", + "first_volume", + "first_issue", + "first_year", + "appearance", + "costs", + ): if row.get(k): info.extra[k] = row[k] - url = HomepageUrl.from_url(row.get('url')) + url = HomepageUrl.from_url(row.get("url")) if url: info.homepage_urls.append(url) diff --git a/chocula/directories/gold_oa.py b/chocula/directories/gold_oa.py index a75944d..d0c6e8b 100644 --- a/chocula/directories/gold_oa.py +++ b/chocula/directories/gold_oa.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -21,11 +20,11 @@ class GoldOALoader(DirectoryLoader): def parse_record(self, row) -> Optional[DirectoryInfo]: - if not (row.get('ISSN_L') and row.get('TITLE')): + if not (row.get("ISSN_L") and row.get("TITLE")): return None # TODO: also add for other non-direct indices - #for ind in ('WOS', 'SCOPUS'): + # for ind in ('WOS', 'SCOPUS'): # issnl, status = self.add_issn( # ind.lower(), # raw_issn=row['ISSN_L'], @@ -33,12 +32,12 @@ class GoldOALoader(DirectoryLoader): # ) extra = dict() - for ind in ('DOAJ', 'ROAD', 'PMC', 'OAPC', 'WOS', 'SCOPUS'): - extra['in_' + ind.lower()] = bool(int(row['JOURNAL_IN_' + ind])) + for ind in ("DOAJ", "ROAD", "PMC", "OAPC", "WOS", "SCOPUS"): + extra["in_" + ind.lower()] = bool(int(row["JOURNAL_IN_" + ind])) return DirectoryInfo( directory_slug=self.source_slug, - raw_issn=row['ISSN_L'], - name=clean_str(row['TITLE']), + raw_issn=row["ISSN_L"], + name=clean_str(row["TITLE"]), extra=extra, ) diff --git a/chocula/directories/norwegian.py b/chocula/directories/norwegian.py index 2b83961..2425318 100644 --- a/chocula/directories/norwegian.py +++ b/chocula/directories/norwegian.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -52,29 +51,31 @@ class NorwegianLoader(DirectoryLoader): source_slug = "norwegian" def open_file(self) -> Iterable: - return csv.DictReader(open(self.config.norwegian.filepath, encoding="ISO-8859-1"), delimiter=";") + return csv.DictReader( + open(self.config.norwegian.filepath, encoding="ISO-8859-1"), delimiter=";" + ) def parse_record(self, row) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, - issnp=row['Print ISSN'], - issne=row['Online ISSN'], - country=parse_country(row['Country of publication']), - name=clean_str(row.get('International title')), - langs=[l for l in [parse_lang(row['Language'])] if l], + issnp=row["Print ISSN"], + issne=row["Online ISSN"], + country=parse_country(row["Country of publication"]), + name=clean_str(row.get("International title")), + langs=[l for l in [parse_lang(row["Language"])] if l], ) - info.extra['norwegian'] = dict(as_of=self.config.norwegian.date) - if row['Level 2019']: - info.extra['norwegian']['level'] = int(row['Level 2019']) + info.extra["norwegian"] = dict(as_of=self.config.norwegian.date) + if row["Level 2019"]: + info.extra["norwegian"]["level"] = int(row["Level 2019"]) - if row['Original title'] != row['International title']: - info.original_name = clean_str(row['Original title']) + if row["Original title"] != row["International title"]: + info.original_name = clean_str(row["Original title"]) - identifier=row['NSD tidsskrift_id'], - publisher=row['Publisher'], + identifier = (row["NSD tidsskrift_id"],) + publisher = (row["Publisher"],) - url = HomepageUrl.from_url(row['URL']) + url = HomepageUrl.from_url(row["URL"]) if url: info.homepage_urls.append(url) diff --git a/chocula/directories/openapc.py b/chocula/directories/openapc.py index c2acd95..99304c3 100644 --- a/chocula/directories/openapc.py +++ b/chocula/directories/openapc.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -21,24 +20,22 @@ class OpenAPCLoader(DirectoryLoader): def parse_record(self, row) -> Optional[DirectoryInfo]: - if not row.get('issn'): + if not row.get("issn"): return None info = DirectoryInfo( directory_slug=self.source_slug, - issne=row['issn_electronic'], - issnp=row['issn_print'], - raw_issn=row['issn_l'] or row['issn'], - name=clean_str(row['journal_full_title']), - publisher=clean_str(row['publisher']), + issne=row["issn_electronic"], + issnp=row["issn_print"], + raw_issn=row["issn_l"] or row["issn"], + name=clean_str(row["journal_full_title"]), + publisher=clean_str(row["publisher"]), ) - info.extra['is_hybrid'] = bool(row['is_hybrid']) + info.extra["is_hybrid"] = bool(row["is_hybrid"]) - homepage = HomepageUrl.from_url(row['url']) + homepage = HomepageUrl.from_url(row["url"]) if homepage: info.homepage_urls.append(homepage) return info - - diff --git a/chocula/directories/road.py b/chocula/directories/road.py index 23cca65..bc550fd 100644 --- a/chocula/directories/road.py +++ b/chocula/directories/road.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -26,27 +25,39 @@ class RoadLoader(DirectoryLoader): source_slug = "road" def open_file(self) -> Iterable: - return csv.DictReader(open(self.config.road.filepath), delimiter='\t', - fieldnames=("ISSN", "ISSN-L", "Short Title", "Title", "Publisher", "URL1", "URL2", "Region", "Lang1", "Lang2") + return csv.DictReader( + open(self.config.road.filepath), + delimiter="\t", + fieldnames=( + "ISSN", + "ISSN-L", + "Short Title", + "Title", + "Publisher", + "URL1", + "URL2", + "Region", + "Lang1", + "Lang2", + ), ) def parse_record(self, row) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, - raw_issn=row['ISSN-L'], - name=clean_str(row['Short Title']), - publisher=clean_str(row['Publisher']), - langs=[l for l in (row['Lang1'], row['Lang2']) if l], + raw_issn=row["ISSN-L"], + name=clean_str(row["Short Title"]), + publisher=clean_str(row["Publisher"]), + langs=[l for l in (row["Lang1"], row["Lang2"]) if l], ) # TODO: region mapping: "Europe and North America" # TODO: lang mapping: already alpha-3 # homepages - for url in [u for u in (row['URL1'], row['URL2']) if u]: + for url in [u for u in (row["URL1"], row["URL2"]) if u]: homepage = HomepageUrl.from_url(url) if homepage: info.homepage_urls.append(homepage) return info - diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py index 247866b..0ed8fde 100644 --- a/chocula/directories/scielo.py +++ b/chocula/directories/scielo.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import json @@ -17,32 +16,31 @@ class ScieloLoader(DirectoryLoader): def parse_record(self, line) -> Optional[DirectoryInfo]: record = json.loads(line) extra = dict( - status=clean_str(record.get('current_status')), - first_year=record.get('first_year'), - collection=record.get('collection_acronym'), + status=clean_str(record.get("current_status")), + first_year=record.get("first_year"), + collection=record.get("collection_acronym"), ) for k in list(extra.keys()): if extra[k] is None: extra.pop(k) country: Optional[str] = None - if record['publisher_country'] and len(record['publisher_country'][0]) == 2: - country = record['publisher_country'][0].lower() + if record["publisher_country"] and len(record["publisher_country"][0]) == 2: + country = record["publisher_country"][0].lower() info = DirectoryInfo( directory_slug=self.source_slug, - issne=clean_issn(record.get('electronic_issn') or ''), - issnp=clean_issn(record.get('print_issn') or ''), - custom_id=clean_str(record.get('scielo_issn')), - name=clean_str(record.get('fulltitle')), - publisher=clean_str((record.get('publisher_name') or [''])[0]), - abbrev=clean_str(record['abbreviated_iso_title']), - platform='scielo', - langs=list(filter(lambda s: len(s) == 2, record['languages'])), + issne=clean_issn(record.get("electronic_issn") or ""), + issnp=clean_issn(record.get("print_issn") or ""), + custom_id=clean_str(record.get("scielo_issn")), + name=clean_str(record.get("fulltitle")), + publisher=clean_str((record.get("publisher_name") or [""])[0]), + abbrev=clean_str(record["abbreviated_iso_title"]), + platform="scielo", + langs=list(filter(lambda s: len(s) == 2, record["languages"])), country=country, extra=extra, ) - if record['url']: - homepage = HomepageUrl.from_url(record['url']) + if record["url"]: + homepage = HomepageUrl.from_url(record["url"]) if homepage: info.homepage_urls.append(homepage) return info - diff --git a/chocula/directories/sherpa_romeo.py b/chocula/directories/sherpa_romeo.py index e92dc69..a8ba1b0 100644 --- a/chocula/directories/sherpa_romeo.py +++ b/chocula/directories/sherpa_romeo.py @@ -1,4 +1,3 @@ - import sys from typing import Iterable, Optional, Dict, Any import csv @@ -27,32 +26,38 @@ class SherpaRomeoLoader(DirectoryLoader): # first load policies print("##### Loading SHERPA/ROMEO policies...", file=sys.stderr) - fixed_policy_file = ftfy.fix_file(open(self.config.sherpa_romeo_policies_simple.filepath, 'rb')) + fixed_policy_file = ftfy.fix_file( + open(self.config.sherpa_romeo_policies_simple.filepath, "rb") + ) policy_reader = csv.DictReader(fixed_policy_file) for row in policy_reader: - self.sherpa_policies[row['RoMEO Record ID']] = row + self.sherpa_policies[row["RoMEO Record ID"]] = row # then open regular file - raw_file = open(self.config.sherpa_romeo_journals_simple.filepath, 'rb').read().decode(errors='replace') + raw_file = ( + open(self.config.sherpa_romeo_journals_simple.filepath, "rb") + .read() + .decode(errors="replace") + ) fixed_file = ftfy.fix_text(raw_file) - return csv.DictReader(fixed_file.split('\n')) + return csv.DictReader(fixed_file.split("\n")) def parse_record(self, row) -> Optional[DirectoryInfo]: # super mangled :( - row.update(self.sherpa_policies[row['RoMEO Record ID']]) + row.update(self.sherpa_policies[row["RoMEO Record ID"]]) info = DirectoryInfo( directory_slug=self.source_slug, - issnp=row['ISSN'], - issne=row['ESSN'], - name=clean_str(row['Journal Title']), - publisher=clean_str(row['Publisher']), - country=parse_country(row['Country']), - custom_id=row['RoMEO Record ID'], + issnp=row["ISSN"], + issne=row["ESSN"], + name=clean_str(row["Journal Title"]), + publisher=clean_str(row["Publisher"]), + country=parse_country(row["Country"]), + custom_id=row["RoMEO Record ID"], ) - if row['RoMEO colour']: - info.extra['sherpa_romeo'] = dict(color=row['RoMEO colour']) + if row["RoMEO colour"]: + info.extra["sherpa_romeo"] = dict(color=row["RoMEO colour"]) return info diff --git a/chocula/directories/sim.py b/chocula/directories/sim.py index ff5cce3..97f84d2 100644 --- a/chocula/directories/sim.py +++ b/chocula/directories/sim.py @@ -1,8 +1,14 @@ - from typing import Iterable, Optional, Dict, Any import csv -from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP, gaps_to_spans +from chocula.util import ( + clean_str, + parse_mimetypes, + parse_country, + parse_lang, + PLATFORM_MAP, + gaps_to_spans, +) from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl @@ -37,35 +43,34 @@ class SimLoader(DirectoryLoader): # TODO: 'Pub Type' extra: Dict[str, Any] = {} - first_year = row['First Volume'] + first_year = row["First Volume"] if first_year: first_year = int(first_year) - extra['first_year'] = int(row['First Volume']) + extra["first_year"] = int(row["First Volume"]) else: first_year = None - last_year = row['Last Volume'] + last_year = row["Last Volume"] if last_year: last_year = int(last_year) - extra['last_year'] = last_year + extra["last_year"] = last_year else: last_year = None - gaps = [int(g) for g in row['NA Gaps'].split(';') if g.strip()] + gaps = [int(g) for g in row["NA Gaps"].split(";") if g.strip()] if gaps: - extra['gaps'] = gaps + extra["gaps"] = gaps if first_year and last_year: - extra['year_spans'] = gaps_to_spans(first_year, last_year, gaps) - extra['scholarly_peer_reviewed'] = row["Scholarly / Peer-\nReviewed"] - extra['peer_reviewed'] = row["Peer-\nReviewed"] - extra['pub_type'] = clean_str(row["Pub Type"]) + extra["year_spans"] = gaps_to_spans(first_year, last_year, gaps) + extra["scholarly_peer_reviewed"] = row["Scholarly / Peer-\nReviewed"] + extra["peer_reviewed"] = row["Peer-\nReviewed"] + extra["pub_type"] = clean_str(row["Pub Type"]) info = DirectoryInfo( directory_slug=self.source_slug, - name=clean_str(row['Title']), - publisher=clean_str(row['Publisher']), - raw_issn=row['ISSN'][:9], - custom_id=row.get('NA Pub Cat ID').strip() or None, - langs=[parse_lang(row['Pub Language'])], + name=clean_str(row["Title"]), + publisher=clean_str(row["Publisher"]), + raw_issn=row["ISSN"][:9], + custom_id=row.get("NA Pub Cat ID").strip() or None, + langs=[parse_lang(row["Pub Language"])], extra=extra, ) return info - diff --git a/chocula/directories/szczepanski.py b/chocula/directories/szczepanski.py index 0d1558a..3586acb 100644 --- a/chocula/directories/szczepanski.py +++ b/chocula/directories/szczepanski.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import json @@ -16,7 +15,7 @@ class SzczepanskiLoader(DirectoryLoader): source_slug = "szczepanski" def open_file(self) -> Iterable: - return open(self.config.szczepanski.filepath, 'r') + return open(self.config.szczepanski.filepath, "r") def parse_record(self, row) -> Optional[DirectoryInfo]: @@ -27,21 +26,21 @@ class SzczepanskiLoader(DirectoryLoader): info = DirectoryInfo( directory_slug=self.source_slug, - issne=row.get('issne'), - issnp=row.get('issnp'), - raw_issn=row.get('issn'), - name=clean_str(row['title']), - publisher=clean_str(row.get('ed')), + issne=row.get("issne"), + issnp=row.get("issnp"), + raw_issn=row.get("issn"), + name=clean_str(row["title"]), + publisher=clean_str(row.get("ed")), ) - info.extra['szczepanski'] = dict(as_of=self.config.szczepanski.date) - if row.get('extra'): - info.extra['szczepanski']['notes'] = row.get('extra') - for k in ('other_titles', 'year_spans', 'ed'): + info.extra["szczepanski"] = dict(as_of=self.config.szczepanski.date) + if row.get("extra"): + info.extra["szczepanski"]["notes"] = row.get("extra") + for k in ("other_titles", "year_spans", "ed"): if row.get(k): - info.extra['szczepanski'][k] = row[k] + info.extra["szczepanski"][k] = row[k] - url = HomepageUrl.from_url(row.get('url')) + url = HomepageUrl.from_url(row.get("url")) if url: info.homepage_urls.append(url) diff --git a/chocula/directories/wikidata.py b/chocula/directories/wikidata.py index d16d8df..5ffe6fb 100644 --- a/chocula/directories/wikidata.py +++ b/chocula/directories/wikidata.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -16,27 +15,31 @@ class WikidataLoader(DirectoryLoader): source_slug = "wikidata" def open_file(self) -> Iterable: - return csv.DictReader(open(self.config.wikidata.filepath), delimiter='\t') + return csv.DictReader(open(self.config.wikidata.filepath), delimiter="\t") def parse_record(self, row) -> Optional[DirectoryInfo]: - if not (row.get('issn') and row.get('title')): + if not (row.get("issn") and row.get("title")): return None - wikidata_qid = row['item'].strip().split('/')[-1] - publisher = row['publisher_name'] - if (publisher.startswith('Q') and publisher[1].isdigit()) or publisher.startswith('t1') or not publisher: + wikidata_qid = row["item"].strip().split("/")[-1] + publisher = row["publisher_name"] + if ( + (publisher.startswith("Q") and publisher[1].isdigit()) + or publisher.startswith("t1") + or not publisher + ): publisher = None - info =DirectoryInfo( + info = DirectoryInfo( directory_slug=self.source_slug, - raw_issn=row['issn'], + raw_issn=row["issn"], custom_id=wikidata_qid, - name=clean_str(row['title']), + name=clean_str(row["title"]), publisher=clean_str(publisher), ) - if row.get('start_year'): - info.extra['start_year'] = row['start_year'] + if row.get("start_year"): + info.extra["start_year"] = row["start_year"] - url = HomepageUrl.from_url(row.get('websiteurl')) + url = HomepageUrl.from_url(row.get("websiteurl")) if url: info.homepage_urls.append(url) -- cgit v1.2.3