fmt (black)

author: Bryan Newbold <bnewbold@archive.org> 2020-06-22 13:46:42 -0700
committer: Bryan Newbold <bnewbold@archive.org> 2020-06-22 13:46:42 -0700
commit: db40b9e70b917dbbbfda48f6d77a2fc509366a82 (patch)
tree: e66626601ec32965c4cee7fad16982530408019b /chocula/directories
parent: 5d3ce061d24a5188fc015012b2f70a4c6f568969 (diff)
download: chocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.tar.gz
chocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.zip
14 files changed, 211 insertions, 172 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py
index a233a26..90e6f26 100644
--- a/chocula/directories/__init__.py
+++ b/chocula/directories/__init__.py
@@ -1,4 +1,3 @@
-
 from chocula.directories.crossref import CrossrefLoader
 from chocula.directories.doaj import DoajLoader
 from chocula.directories.entrez import EntrezLoader
@@ -14,7 +13,17 @@ from chocula.directories.szczepanski import SzczepanskiLoader
 from chocula.directories.wikidata import WikidataLoader
 
 ALL_CHOCULA_DIR_CLASSES = [
-    CrossrefLoader, DoajLoader, EntrezLoader,EzbLoader, GoldOALoader,
-    NorwegianLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader,
-    SzczepanskiLoader, WikidataLoader, SimLoader, ScieloLoader,
+    CrossrefLoader,
+    DoajLoader,
+    EntrezLoader,
+    EzbLoader,
+    GoldOALoader,
+    NorwegianLoader,
+    OpenAPCLoader,
+    RoadLoader,
+    SherpaRomeoLoader,
+    SzczepanskiLoader,
+    WikidataLoader,
+    SimLoader,
+    ScieloLoader,
 ]
diff --git a/chocula/directories/crossref.py b/chocula/directories/crossref.py
index 4208008..a494021 100644
--- a/chocula/directories/crossref.py
+++ b/chocula/directories/crossref.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import csv
 
@@ -23,14 +22,14 @@ class CrossrefLoader(DirectoryLoader):
     def parse_record(self, record) -> Optional[DirectoryInfo]:
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issne=record['eissn'],
-            issnp=record['pissn'],
-            custom_id=record.get('doi').strip() or None,
-            name=clean_str(record.get('JournalTitle')),
-            publisher=clean_str(record.get('Publisher')),
+            issne=record["eissn"],
+            issnp=record["pissn"],
+            custom_id=record.get("doi").strip() or None,
+            name=clean_str(record.get("JournalTitle")),
+            publisher=clean_str(record.get("Publisher")),
         )
 
-        if record['additionalIssns']:
-            info.raw_issn = record['additionalIssns'][0]
+        if record["additionalIssns"]:
+            info.raw_issn = record["additionalIssns"][0]
 
         return info
diff --git a/chocula/directories/doaj.py b/chocula/directories/doaj.py
index 7968dc2..795ce68 100644
--- a/chocula/directories/doaj.py
+++ b/chocula/directories/doaj.py
@@ -1,8 +1,13 @@
-
 from typing import Iterable, Optional, Dict, Any
 import csv
 
-from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP
+from chocula.util import (
+    clean_str,
+    parse_mimetypes,
+    parse_country,
+    parse_lang,
+    PLATFORM_MAP,
+)
 from chocula.common import DirectoryLoader
 from chocula.database import DirectoryInfo, HomepageUrl
 
@@ -81,40 +86,43 @@ class DoajLoader(DirectoryLoader):
 
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issnp=row['Journal ISSN (print version)'],
-            issne=row['Journal EISSN (online version)'],
-            name=clean_str(row['Journal title']),
-            publisher=clean_str(row['Publisher']),
-            platform=PLATFORM_MAP.get(row['Platform, host or aggregator']),
-            country=parse_country(row['Country of publisher']),
+            issnp=row["Journal ISSN (print version)"],
+            issne=row["Journal EISSN (online version)"],
+            name=clean_str(row["Journal title"]),
+            publisher=clean_str(row["Publisher"]),
+            platform=PLATFORM_MAP.get(row["Platform, host or aggregator"]),
+            country=parse_country(row["Country of publisher"]),
         )
 
-        lang = parse_lang(row['Full text language'])
+        lang = parse_lang(row["Full text language"])
         if lang:
             info.langs.append(lang)
 
         extra: Dict[str, Any] = dict(doaj=dict())
-        extra['mimetypes'] = parse_mimetypes(row['Full text formats'])
-        extra['doaj']['as_of'] = self.config.snapshot.date
-        if row['DOAJ Seal']:
-            extra['doaj']['seal'] = {"no": False, "yes": True}[row['DOAJ Seal'].lower()]
+        extra["mimetypes"] = parse_mimetypes(row["Full text formats"])
+        extra["doaj"]["as_of"] = self.config.snapshot.date
+        if row["DOAJ Seal"]:
+            extra["doaj"]["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()]
 
-        if row['Digital archiving policy or program(s)']:
-            extra['archive'] = [a.strip() for a in row['Digital archiving policy or program(s)'].split(',') if a.strip()]
-        elif row['Archiving: national library']:
-            extra['archive'] = ['national-library']
+        if row["Digital archiving policy or program(s)"]:
+            extra["archive"] = [
+                a.strip()
+                for a in row["Digital archiving policy or program(s)"].split(",")
+                if a.strip()
+            ]
+        elif row["Archiving: national library"]:
+            extra["archive"] = ["national-library"]
 
-        crawl_permission = row['Journal full-text crawl permission']
+        crawl_permission = row["Journal full-text crawl permission"]
         if crawl_permission:
-            extra['crawl-permission'] = dict(Yes=True, No=False)[crawl_permission]
-        default_license = row['Journal license']
-        if default_license and default_license.startswith('CC'):
-            extra['default_license'] = default_license.replace('CC ', 'CC-').strip()
+            extra["crawl-permission"] = dict(Yes=True, No=False)[crawl_permission]
+        default_license = row["Journal license"]
+        if default_license and default_license.startswith("CC"):
+            extra["default_license"] = default_license.replace("CC ", "CC-").strip()
 
-        url = row['Journal URL']
+        url = row["Journal URL"]
         if url:
-            homepage = HomepageUrl.from_url(row['Journal URL'])
+            homepage = HomepageUrl.from_url(row["Journal URL"])
             if homepage:
                 info.homepage_urls.append(homepage)
         return info
-
diff --git a/chocula/directories/entrez.py b/chocula/directories/entrez.py
index b30f04d..f9f6d23 100644
--- a/chocula/directories/entrez.py
+++ b/chocula/directories/entrez.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import csv
 
@@ -26,14 +25,13 @@ class EntrezLoader(DirectoryLoader):
         return csv.DictReader(open(self.config.entrez_simple.filepath))
 
     def parse_record(self, record) -> Optional[DirectoryInfo]:
-        if not (record.get('ISSN (Online)') or record.get('ISSN (Print)')):
+        if not (record.get("ISSN (Online)") or record.get("ISSN (Print)")):
             return None
         return DirectoryInfo(
             directory_slug=self.source_slug,
-            issne=record.get('ISSN (Online)'),
-            issnp=record.get('ISSN (Print)'),
-            custom_id=record.get('NlmId').strip() or None,
-            name=clean_str(record.get('JournalTitle')),
-            abbrev=clean_str(record['IsoAbbr']),
+            issne=record.get("ISSN (Online)"),
+            issnp=record.get("ISSN (Print)"),
+            custom_id=record.get("NlmId").strip() or None,
+            name=clean_str(record.get("JournalTitle")),
+            abbrev=clean_str(record["IsoAbbr"]),
         )
-
diff --git a/chocula/directories/ezb.py b/chocula/directories/ezb.py
index 1573048..056350d 100644
--- a/chocula/directories/ezb.py
+++ b/chocula/directories/ezb.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import json
 
@@ -16,7 +15,7 @@ class EzbLoader(DirectoryLoader):
     source_slug = "ezb"
 
     def open_file(self) -> Iterable:
-        return open(self.config.ezb.filepath, 'r')
+        return open(self.config.ezb.filepath, "r")
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
 
@@ -26,21 +25,29 @@ class EzbLoader(DirectoryLoader):
 
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issne=row.get('issne'),
-            issnp=row.get('issnp'),
-            custom_id=row['ezb_id'],
-            name=clean_str(row['title']),
-            publisher=clean_str(row.get('publisher')),
+            issne=row.get("issne"),
+            issnp=row.get("issnp"),
+            custom_id=row["ezb_id"],
+            name=clean_str(row["title"]),
+            publisher=clean_str(row.get("publisher")),
         )
 
         info.extra = dict()
-        for k in ('ezb_color', 'subjects', 'keywords', 'zdb_id',
-                    'first_volume', 'first_issue', 'first_year',
-                    'appearance', 'costs'):
+        for k in (
+            "ezb_color",
+            "subjects",
+            "keywords",
+            "zdb_id",
+            "first_volume",
+            "first_issue",
+            "first_year",
+            "appearance",
+            "costs",
+        ):
             if row.get(k):
                 info.extra[k] = row[k]
 
-        url = HomepageUrl.from_url(row.get('url'))
+        url = HomepageUrl.from_url(row.get("url"))
         if url:
             info.homepage_urls.append(url)
 
diff --git a/chocula/directories/gold_oa.py b/chocula/directories/gold_oa.py
index a75944d..d0c6e8b 100644
--- a/chocula/directories/gold_oa.py
+++ b/chocula/directories/gold_oa.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import csv
 
@@ -21,11 +20,11 @@ class GoldOALoader(DirectoryLoader):
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
 
-        if not (row.get('ISSN_L') and row.get('TITLE')):
+        if not (row.get("ISSN_L") and row.get("TITLE")):
             return None
 
         # TODO: also add for other non-direct indices
-        #for ind in ('WOS', 'SCOPUS'):
+        # for ind in ('WOS', 'SCOPUS'):
         #    issnl, status = self.add_issn(
         #        ind.lower(),
         #        raw_issn=row['ISSN_L'],
@@ -33,12 +32,12 @@ class GoldOALoader(DirectoryLoader):
         #    )
 
         extra = dict()
-        for ind in ('DOAJ', 'ROAD', 'PMC', 'OAPC', 'WOS', 'SCOPUS'):
-            extra['in_' + ind.lower()] = bool(int(row['JOURNAL_IN_' + ind]))
+        for ind in ("DOAJ", "ROAD", "PMC", "OAPC", "WOS", "SCOPUS"):
+            extra["in_" + ind.lower()] = bool(int(row["JOURNAL_IN_" + ind]))
 
         return DirectoryInfo(
             directory_slug=self.source_slug,
-            raw_issn=row['ISSN_L'],
-            name=clean_str(row['TITLE']),
+            raw_issn=row["ISSN_L"],
+            name=clean_str(row["TITLE"]),
             extra=extra,
         )
diff --git a/chocula/directories/norwegian.py b/chocula/directories/norwegian.py
index 2b83961..2425318 100644
--- a/chocula/directories/norwegian.py
+++ b/chocula/directories/norwegian.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import csv
 
@@ -52,29 +51,31 @@ class NorwegianLoader(DirectoryLoader):
     source_slug = "norwegian"
 
     def open_file(self) -> Iterable:
-        return csv.DictReader(open(self.config.norwegian.filepath, encoding="ISO-8859-1"), delimiter=";")
+        return csv.DictReader(
+            open(self.config.norwegian.filepath, encoding="ISO-8859-1"), delimiter=";"
+        )
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issnp=row['Print ISSN'],
-            issne=row['Online ISSN'],
-            country=parse_country(row['Country of publication']),
-            name=clean_str(row.get('International title')),
-            langs=[l for l in [parse_lang(row['Language'])] if l],
+            issnp=row["Print ISSN"],
+            issne=row["Online ISSN"],
+            country=parse_country(row["Country of publication"]),
+            name=clean_str(row.get("International title")),
+            langs=[l for l in [parse_lang(row["Language"])] if l],
         )
 
-        info.extra['norwegian'] = dict(as_of=self.config.norwegian.date)
-        if row['Level 2019']:
-            info.extra['norwegian']['level'] = int(row['Level 2019'])
+        info.extra["norwegian"] = dict(as_of=self.config.norwegian.date)
+        if row["Level 2019"]:
+            info.extra["norwegian"]["level"] = int(row["Level 2019"])
 
-        if row['Original title'] != row['International title']:
-            info.original_name = clean_str(row['Original title'])
+        if row["Original title"] != row["International title"]:
+            info.original_name = clean_str(row["Original title"])
 
-            identifier=row['NSD tidsskrift_id'],
-            publisher=row['Publisher'],
+            identifier = (row["NSD tidsskrift_id"],)
+            publisher = (row["Publisher"],)
 
-        url = HomepageUrl.from_url(row['URL'])
+        url = HomepageUrl.from_url(row["URL"])
         if url:
             info.homepage_urls.append(url)
 
diff --git a/chocula/directories/openapc.py b/chocula/directories/openapc.py
index c2acd95..99304c3 100644
--- a/chocula/directories/openapc.py
+++ b/chocula/directories/openapc.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import csv
 
@@ -21,24 +20,22 @@ class OpenAPCLoader(DirectoryLoader):
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
 
-        if not row.get('issn'):
+        if not row.get("issn"):
             return None
 
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issne=row['issn_electronic'],
-            issnp=row['issn_print'],
-            raw_issn=row['issn_l'] or row['issn'],
-            name=clean_str(row['journal_full_title']),
-            publisher=clean_str(row['publisher']),
+            issne=row["issn_electronic"],
+            issnp=row["issn_print"],
+            raw_issn=row["issn_l"] or row["issn"],
+            name=clean_str(row["journal_full_title"]),
+            publisher=clean_str(row["publisher"]),
         )
 
-        info.extra['is_hybrid'] = bool(row['is_hybrid'])
+        info.extra["is_hybrid"] = bool(row["is_hybrid"])
 
-        homepage = HomepageUrl.from_url(row['url'])
+        homepage = HomepageUrl.from_url(row["url"])
         if homepage:
             info.homepage_urls.append(homepage)
 
         return info
-
-
diff --git a/chocula/directories/road.py b/chocula/directories/road.py
index 23cca65..bc550fd 100644
--- a/chocula/directories/road.py
+++ b/chocula/directories/road.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import csv
 
@@ -26,27 +25,39 @@ class RoadLoader(DirectoryLoader):
     source_slug = "road"
 
     def open_file(self) -> Iterable:
-        return csv.DictReader(open(self.config.road.filepath), delimiter='\t',
-            fieldnames=("ISSN", "ISSN-L", "Short Title", "Title", "Publisher", "URL1", "URL2", "Region", "Lang1", "Lang2")
+        return csv.DictReader(
+            open(self.config.road.filepath),
+            delimiter="\t",
+            fieldnames=(
+                "ISSN",
+                "ISSN-L",
+                "Short Title",
+                "Title",
+                "Publisher",
+                "URL1",
+                "URL2",
+                "Region",
+                "Lang1",
+                "Lang2",
+            ),
         )
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            raw_issn=row['ISSN-L'],
-            name=clean_str(row['Short Title']),
-            publisher=clean_str(row['Publisher']),
-            langs=[l for l in (row['Lang1'], row['Lang2']) if l],
+            raw_issn=row["ISSN-L"],
+            name=clean_str(row["Short Title"]),
+            publisher=clean_str(row["Publisher"]),
+            langs=[l for l in (row["Lang1"], row["Lang2"]) if l],
         )
 
         # TODO: region mapping: "Europe and North America"
         # TODO: lang mapping: already alpha-3
 
         # homepages
-        for url in [u for u in (row['URL1'], row['URL2']) if u]:
+        for url in [u for u in (row["URL1"], row["URL2"]) if u]:
             homepage = HomepageUrl.from_url(url)
             if homepage:
                 info.homepage_urls.append(homepage)
 
         return info
-
diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py
index 247866b..0ed8fde 100644
--- a/chocula/directories/scielo.py
+++ b/chocula/directories/scielo.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import json
 
@@ -17,32 +16,31 @@ class ScieloLoader(DirectoryLoader):
     def parse_record(self, line) -> Optional[DirectoryInfo]:
         record = json.loads(line)
         extra = dict(
-            status=clean_str(record.get('current_status')),
-            first_year=record.get('first_year'),
-            collection=record.get('collection_acronym'),
+            status=clean_str(record.get("current_status")),
+            first_year=record.get("first_year"),
+            collection=record.get("collection_acronym"),
         )
         for k in list(extra.keys()):
             if extra[k] is None:
                 extra.pop(k)
         country: Optional[str] = None
-        if record['publisher_country'] and len(record['publisher_country'][0]) == 2:
-            country = record['publisher_country'][0].lower()
+        if record["publisher_country"] and len(record["publisher_country"][0]) == 2:
+            country = record["publisher_country"][0].lower()
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issne=clean_issn(record.get('electronic_issn') or ''),
-            issnp=clean_issn(record.get('print_issn') or ''),
-            custom_id=clean_str(record.get('scielo_issn')),
-            name=clean_str(record.get('fulltitle')),
-            publisher=clean_str((record.get('publisher_name') or [''])[0]),
-            abbrev=clean_str(record['abbreviated_iso_title']),
-            platform='scielo',
-            langs=list(filter(lambda s: len(s) == 2, record['languages'])),
+            issne=clean_issn(record.get("electronic_issn") or ""),
+            issnp=clean_issn(record.get("print_issn") or ""),
+            custom_id=clean_str(record.get("scielo_issn")),
+            name=clean_str(record.get("fulltitle")),
+            publisher=clean_str((record.get("publisher_name") or [""])[0]),
+            abbrev=clean_str(record["abbreviated_iso_title"]),
+            platform="scielo",
+            langs=list(filter(lambda s: len(s) == 2, record["languages"])),
             country=country,
             extra=extra,
         )
-        if record['url']:
-            homepage = HomepageUrl.from_url(record['url'])
+        if record["url"]:
+            homepage = HomepageUrl.from_url(record["url"])
             if homepage:
                 info.homepage_urls.append(homepage)
         return info
-
diff --git a/chocula/directories/sherpa_romeo.py b/chocula/directories/sherpa_romeo.py
index e92dc69..a8ba1b0 100644
--- a/chocula/directories/sherpa_romeo.py
+++ b/chocula/directories/sherpa_romeo.py
@@ -1,4 +1,3 @@
-
 import sys
 from typing import Iterable, Optional, Dict, Any
 import csv
@@ -27,32 +26,38 @@ class SherpaRomeoLoader(DirectoryLoader):
 
         # first load policies
         print("##### Loading SHERPA/ROMEO policies...", file=sys.stderr)
-        fixed_policy_file = ftfy.fix_file(open(self.config.sherpa_romeo_policies_simple.filepath, 'rb'))
+        fixed_policy_file = ftfy.fix_file(
+            open(self.config.sherpa_romeo_policies_simple.filepath, "rb")
+        )
         policy_reader = csv.DictReader(fixed_policy_file)
         for row in policy_reader:
-            self.sherpa_policies[row['RoMEO Record ID']] = row
+            self.sherpa_policies[row["RoMEO Record ID"]] = row
 
         # then open regular file
-        raw_file = open(self.config.sherpa_romeo_journals_simple.filepath, 'rb').read().decode(errors='replace')
+        raw_file = (
+            open(self.config.sherpa_romeo_journals_simple.filepath, "rb")
+            .read()
+            .decode(errors="replace")
+        )
         fixed_file = ftfy.fix_text(raw_file)
-        return csv.DictReader(fixed_file.split('\n'))
+        return csv.DictReader(fixed_file.split("\n"))
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
         # super mangled :(
 
-        row.update(self.sherpa_policies[row['RoMEO Record ID']])
+        row.update(self.sherpa_policies[row["RoMEO Record ID"]])
 
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issnp=row['ISSN'],
-            issne=row['ESSN'],
-            name=clean_str(row['Journal Title']),
-            publisher=clean_str(row['Publisher']),
-            country=parse_country(row['Country']),
-            custom_id=row['RoMEO Record ID'],
+            issnp=row["ISSN"],
+            issne=row["ESSN"],
+            name=clean_str(row["Journal Title"]),
+            publisher=clean_str(row["Publisher"]),
+            country=parse_country(row["Country"]),
+            custom_id=row["RoMEO Record ID"],
         )
 
-        if row['RoMEO colour']:
-            info.extra['sherpa_romeo'] = dict(color=row['RoMEO colour'])
+        if row["RoMEO colour"]:
+            info.extra["sherpa_romeo"] = dict(color=row["RoMEO colour"])
 
         return info
diff --git a/chocula/directories/sim.py b/chocula/directories/sim.py
index ff5cce3..97f84d2 100644
--- a/chocula/directories/sim.py
+++ b/chocula/directories/sim.py
@@ -1,8 +1,14 @@
-
 from typing import Iterable, Optional, Dict, Any
 import csv
 
-from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP, gaps_to_spans
+from chocula.util import (
+    clean_str,
+    parse_mimetypes,
+    parse_country,
+    parse_lang,
+    PLATFORM_MAP,
+    gaps_to_spans,
+)
 from chocula.common import DirectoryLoader
 from chocula.database import DirectoryInfo, HomepageUrl
 
@@ -37,35 +43,34 @@ class SimLoader(DirectoryLoader):
         # TODO: 'Pub Type'
 
         extra: Dict[str, Any] = {}
-        first_year = row['First Volume']
+        first_year = row["First Volume"]
         if first_year:
             first_year = int(first_year)
-            extra['first_year'] = int(row['First Volume'])
+            extra["first_year"] = int(row["First Volume"])
         else:
             first_year = None
-        last_year = row['Last Volume']
+        last_year = row["Last Volume"]
         if last_year:
             last_year = int(last_year)
-            extra['last_year'] = last_year
+            extra["last_year"] = last_year
         else:
             last_year = None
-        gaps = [int(g) for g in row['NA Gaps'].split(';') if g.strip()]
+        gaps = [int(g) for g in row["NA Gaps"].split(";") if g.strip()]
         if gaps:
-            extra['gaps'] = gaps
+            extra["gaps"] = gaps
         if first_year and last_year:
-            extra['year_spans'] = gaps_to_spans(first_year, last_year, gaps)
-        extra['scholarly_peer_reviewed'] = row["Scholarly / Peer-\nReviewed"]
-        extra['peer_reviewed'] = row["Peer-\nReviewed"]
-        extra['pub_type'] = clean_str(row["Pub Type"])
+            extra["year_spans"] = gaps_to_spans(first_year, last_year, gaps)
+        extra["scholarly_peer_reviewed"] = row["Scholarly / Peer-\nReviewed"]
+        extra["peer_reviewed"] = row["Peer-\nReviewed"]
+        extra["pub_type"] = clean_str(row["Pub Type"])
 
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            name=clean_str(row['Title']),
-            publisher=clean_str(row['Publisher']),
-            raw_issn=row['ISSN'][:9],
-            custom_id=row.get('NA Pub Cat ID').strip() or None,
-            langs=[parse_lang(row['Pub Language'])],
+            name=clean_str(row["Title"]),
+            publisher=clean_str(row["Publisher"]),
+            raw_issn=row["ISSN"][:9],
+            custom_id=row.get("NA Pub Cat ID").strip() or None,
+            langs=[parse_lang(row["Pub Language"])],
             extra=extra,
         )
         return info
-
diff --git a/chocula/directories/szczepanski.py b/chocula/directories/szczepanski.py
index 0d1558a..3586acb 100644
--- a/chocula/directories/szczepanski.py
+++ b/chocula/directories/szczepanski.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import json
 
@@ -16,7 +15,7 @@ class SzczepanskiLoader(DirectoryLoader):
     source_slug = "szczepanski"
 
     def open_file(self) -> Iterable:
-        return open(self.config.szczepanski.filepath, 'r')
+        return open(self.config.szczepanski.filepath, "r")
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
 
@@ -27,21 +26,21 @@ class SzczepanskiLoader(DirectoryLoader):
 
         info = DirectoryInfo(
             directory_slug=self.source_slug,
-            issne=row.get('issne'),
-            issnp=row.get('issnp'),
-            raw_issn=row.get('issn'),
-            name=clean_str(row['title']),
-            publisher=clean_str(row.get('ed')),
+            issne=row.get("issne"),
+            issnp=row.get("issnp"),
+            raw_issn=row.get("issn"),
+            name=clean_str(row["title"]),
+            publisher=clean_str(row.get("ed")),
         )
 
-        info.extra['szczepanski'] = dict(as_of=self.config.szczepanski.date)
-        if row.get('extra'):
-            info.extra['szczepanski']['notes'] = row.get('extra')
-        for k in ('other_titles', 'year_spans', 'ed'):
+        info.extra["szczepanski"] = dict(as_of=self.config.szczepanski.date)
+        if row.get("extra"):
+            info.extra["szczepanski"]["notes"] = row.get("extra")
+        for k in ("other_titles", "year_spans", "ed"):
             if row.get(k):
-                info.extra['szczepanski'][k] = row[k]
+                info.extra["szczepanski"][k] = row[k]
 
-        url = HomepageUrl.from_url(row.get('url'))
+        url = HomepageUrl.from_url(row.get("url"))
         if url:
             info.homepage_urls.append(url)
 
diff --git a/chocula/directories/wikidata.py b/chocula/directories/wikidata.py
index d16d8df..5ffe6fb 100644
--- a/chocula/directories/wikidata.py
+++ b/chocula/directories/wikidata.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Optional
 import csv
 
@@ -16,27 +15,31 @@ class WikidataLoader(DirectoryLoader):
     source_slug = "wikidata"
 
     def open_file(self) -> Iterable:
-        return csv.DictReader(open(self.config.wikidata.filepath), delimiter='\t')
+        return csv.DictReader(open(self.config.wikidata.filepath), delimiter="\t")
 
     def parse_record(self, row) -> Optional[DirectoryInfo]:
 
-        if not (row.get('issn') and row.get('title')):
+        if not (row.get("issn") and row.get("title")):
             return None
-        wikidata_qid = row['item'].strip().split('/')[-1]
-        publisher = row['publisher_name']
-        if (publisher.startswith('Q') and publisher[1].isdigit()) or publisher.startswith('t1') or not publisher:
+        wikidata_qid = row["item"].strip().split("/")[-1]
+        publisher = row["publisher_name"]
+        if (
+            (publisher.startswith("Q") and publisher[1].isdigit())
+            or publisher.startswith("t1")
+            or not publisher
+        ):
             publisher = None
-        info =DirectoryInfo(
+        info = DirectoryInfo(
             directory_slug=self.source_slug,
-            raw_issn=row['issn'],
+            raw_issn=row["issn"],
             custom_id=wikidata_qid,
-            name=clean_str(row['title']),
+            name=clean_str(row["title"]),
             publisher=clean_str(publisher),
         )
-        if row.get('start_year'):
-            info.extra['start_year'] = row['start_year']
+        if row.get("start_year"):
+            info.extra["start_year"] = row["start_year"]
 
-        url = HomepageUrl.from_url(row.get('websiteurl'))
+        url = HomepageUrl.from_url(row.get("websiteurl"))
         if url:
             info.homepage_urls.append(url)
author	Bryan Newbold <bnewbold@archive.org>	2020-06-22 13:46:42 -0700
committer	Bryan Newbold <bnewbold@archive.org>	2020-06-22 13:46:42 -0700
commit	db40b9e70b917dbbbfda48f6d77a2fc509366a82 (patch)
tree	e66626601ec32965c4cee7fad16982530408019b /chocula/directories
parent	5d3ce061d24a5188fc015012b2f70a4c6f568969 (diff)
download	chocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.tar.gz chocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.zip