aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories/crossref.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-05-06 18:26:53 -0700
committerBryan Newbold <bnewbold@archive.org>2020-05-07 00:59:37 -0700
commit4d701f4f2ea99ac95bd4235adef1998f3abdc9f9 (patch)
tree6408d86364109765d0deb3692321ed7f3128ea05 /chocula/directories/crossref.py
parentd559304babb24e4961ba13c554817730b46cfadc (diff)
downloadchocula-4d701f4f2ea99ac95bd4235adef1998f3abdc9f9.tar.gz
chocula-4d701f4f2ea99ac95bd4235adef1998f3abdc9f9.zip
start a Makefile
Move all "index" functions into classes, each in a separate file. Add lots of type annotations. Use dataclass objects to hold database rows. This aspect will need further refactoring to remove "extra" usage, probably by adding database rows to align with DatabaseInfo more closely.
Diffstat (limited to 'chocula/directories/crossref.py')
-rw-r--r--chocula/directories/crossref.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/chocula/directories/crossref.py b/chocula/directories/crossref.py
new file mode 100644
index 0000000..ba47566
--- /dev/null
+++ b/chocula/directories/crossref.py
@@ -0,0 +1,36 @@
+
+from typing import Iterable, Optional
+import csv
+
+from chocula.util import clean_str
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo
+
+
+class CrossrefLoader(DirectoryLoader):
+ """
+ CSV Columns:
+
+ #"JournalTitle","JournalID","Publisher","pissn","eissn","additionalIssns","doi","(year1)[volume1]issue1,issue2,issue3(year2)[volume2]issue4,issues5"
+
+ """
+
+ source_slug = "crossref"
+
+ def open_file(self) -> Iterable:
+ return csv.DictReader(open(self.config.CROSSREF_FILE))
+
+ def parse_record(self, record) -> Optional[DirectoryInfo]:
+ info = DirectoryInfo(
+ directory_slug=self.source_slug,
+ issne=record['eissn'],
+ issnp=record['pissn'],
+ custom_id=record.get('doi').strip() or None,
+ name=clean_str(record.get('JournalTitle')),
+ publisher=clean_str(record.get('Publisher')),
+ )
+
+ if record['additionalIssns']:
+ info.raw_issn = record['additionalIssns'][0]
+
+ return info