diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-22 13:46:42 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-22 13:46:42 -0700 |
commit | db40b9e70b917dbbbfda48f6d77a2fc509366a82 (patch) | |
tree | e66626601ec32965c4cee7fad16982530408019b /chocula/directories/wikidata.py | |
parent | 5d3ce061d24a5188fc015012b2f70a4c6f568969 (diff) | |
download | chocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.tar.gz chocula-db40b9e70b917dbbbfda48f6d77a2fc509366a82.zip |
fmt (black)
Diffstat (limited to 'chocula/directories/wikidata.py')
-rw-r--r-- | chocula/directories/wikidata.py | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/chocula/directories/wikidata.py b/chocula/directories/wikidata.py index d16d8df..5ffe6fb 100644 --- a/chocula/directories/wikidata.py +++ b/chocula/directories/wikidata.py @@ -1,4 +1,3 @@ - from typing import Iterable, Optional import csv @@ -16,27 +15,31 @@ class WikidataLoader(DirectoryLoader): source_slug = "wikidata" def open_file(self) -> Iterable: - return csv.DictReader(open(self.config.wikidata.filepath), delimiter='\t') + return csv.DictReader(open(self.config.wikidata.filepath), delimiter="\t") def parse_record(self, row) -> Optional[DirectoryInfo]: - if not (row.get('issn') and row.get('title')): + if not (row.get("issn") and row.get("title")): return None - wikidata_qid = row['item'].strip().split('/')[-1] - publisher = row['publisher_name'] - if (publisher.startswith('Q') and publisher[1].isdigit()) or publisher.startswith('t1') or not publisher: + wikidata_qid = row["item"].strip().split("/")[-1] + publisher = row["publisher_name"] + if ( + (publisher.startswith("Q") and publisher[1].isdigit()) + or publisher.startswith("t1") + or not publisher + ): publisher = None - info =DirectoryInfo( + info = DirectoryInfo( directory_slug=self.source_slug, - raw_issn=row['issn'], + raw_issn=row["issn"], custom_id=wikidata_qid, - name=clean_str(row['title']), + name=clean_str(row["title"]), publisher=clean_str(publisher), ) - if row.get('start_year'): - info.extra['start_year'] = row['start_year'] + if row.get("start_year"): + info.extra["start_year"] = row["start_year"] - url = HomepageUrl.from_url(row.get('websiteurl')) + url = HomepageUrl.from_url(row.get("websiteurl")) if url: info.homepage_urls.append(url) |