aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories/wikidata.py
diff options
context:
space:
mode:
Diffstat (limited to 'chocula/directories/wikidata.py')
-rw-r--r--chocula/directories/wikidata.py27
1 files changed, 15 insertions, 12 deletions
diff --git a/chocula/directories/wikidata.py b/chocula/directories/wikidata.py
index d16d8df..5ffe6fb 100644
--- a/chocula/directories/wikidata.py
+++ b/chocula/directories/wikidata.py
@@ -1,4 +1,3 @@
-
from typing import Iterable, Optional
import csv
@@ -16,27 +15,31 @@ class WikidataLoader(DirectoryLoader):
source_slug = "wikidata"
def open_file(self) -> Iterable:
- return csv.DictReader(open(self.config.wikidata.filepath), delimiter='\t')
+ return csv.DictReader(open(self.config.wikidata.filepath), delimiter="\t")
def parse_record(self, row) -> Optional[DirectoryInfo]:
- if not (row.get('issn') and row.get('title')):
+ if not (row.get("issn") and row.get("title")):
return None
- wikidata_qid = row['item'].strip().split('/')[-1]
- publisher = row['publisher_name']
- if (publisher.startswith('Q') and publisher[1].isdigit()) or publisher.startswith('t1') or not publisher:
+ wikidata_qid = row["item"].strip().split("/")[-1]
+ publisher = row["publisher_name"]
+ if (
+ (publisher.startswith("Q") and publisher[1].isdigit())
+ or publisher.startswith("t1")
+ or not publisher
+ ):
publisher = None
- info =DirectoryInfo(
+ info = DirectoryInfo(
directory_slug=self.source_slug,
- raw_issn=row['issn'],
+ raw_issn=row["issn"],
custom_id=wikidata_qid,
- name=clean_str(row['title']),
+ name=clean_str(row["title"]),
publisher=clean_str(publisher),
)
- if row.get('start_year'):
- info.extra['start_year'] = row['start_year']
+ if row.get("start_year"):
+ info.extra["start_year"] = row["start_year"]
- url = HomepageUrl.from_url(row.get('websiteurl'))
+ url = HomepageUrl.from_url(row.get("websiteurl"))
if url:
info.homepage_urls.append(url)