aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 23:11:47 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 23:11:47 -0700
commit66ff1990d81bfc461c4cbbcc46278f785c4f273c (patch)
tree4474a5662e5072c4f6206986847ce9653683099d /chocula
parent3a4344d1e26e679b0dc9558d15752e53ce86f8ac (diff)
downloadchocula-66ff1990d81bfc461c4cbbcc46278f785c4f273c.tar.gz
chocula-66ff1990d81bfc461c4cbbcc46278f785c4f273c.zip
add MAG importer; reorder directory class listing
Diffstat (limited to 'chocula')
-rw-r--r--chocula/directories/__init__.py23
-rw-r--r--chocula/directories/mag.py60
2 files changed, 73 insertions, 10 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py
index 17329e1..ed306c0 100644
--- a/chocula/directories/__init__.py
+++ b/chocula/directories/__init__.py
@@ -18,26 +18,29 @@ from chocula.directories.vanished_inactive import VanishedInactiveLoader
from chocula.directories.issn_meta import IssnMetaLoader
from chocula.directories.australian_era import AustralianEraLoader
from chocula.directories.awol import AwolLoader
+from chocula.directories.mag import MagLoader
+# sort order roughly results in metadata prioritization
ALL_CHOCULA_DIR_CLASSES = [
- CrossrefLoader,
+ IssnMetaLoader,
+ ManualHomepageLoader,
+ ScieloLoader,
DoajLoader,
+ CrossrefLoader,
EntrezLoader,
EzbLoader,
GoldOALoader,
NorwegianLoader,
+ AustralianEraLoader,
+ SzczepanskiLoader,
+ WikidataLoader,
+ AwolLoader,
+ VanishedDisapearedLoader,
+ VanishedInactiveLoader,
OpenAPCLoader,
RoadLoader,
SherpaRomeoLoader,
- SzczepanskiLoader,
- WikidataLoader,
SimLoader,
- ScieloLoader,
- ManualHomepageLoader,
ZdbFizeLoader,
- VanishedDisapearedLoader,
- VanishedInactiveLoader,
- IssnMetaLoader,
- AustralianEraLoader,
- AwolLoader,
+ MagLoader,
]
diff --git a/chocula/directories/mag.py b/chocula/directories/mag.py
new file mode 100644
index 0000000..dbbc324
--- /dev/null
+++ b/chocula/directories/mag.py
@@ -0,0 +1,60 @@
+from typing import Iterable, Optional
+import csv
+
+from chocula.util import clean_str, clean_issn
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class MagLoader(DirectoryLoader):
+ """
+ TSV Columns (from schema docs):
+
+ 1 JournalId long PRIMARY KEY
+ 2 Rank uint See FAQ
+ 3 NormalizedName string
+ 4 DisplayName string
+ 5 Issn string
+ 6 Publisher string
+ 7 Webpage string
+ 8 PaperCount long
+ 9 PaperFamilyCount long See FAQ
+ 10 CitationCount long
+ 11 CreatedDate DateTime
+
+ """
+
+ source_slug = "mag"
+
+ def open_file(self) -> Iterable:
+ return csv.DictReader(
+ open(self.config.mag.filepath, "r"),
+ delimiter="\t",
+ fieldnames=[
+ "JournalId",
+ "Rank",
+ "NormalizedName",
+ "DisplayName",
+ "Issn",
+ "Publisher",
+ "Webpage",
+ "PaperCount",
+ "PaperFamilyCount",
+ "CitationCount",
+ "CreatedDate",
+ ],
+ )
+
+ def parse_record(self, record) -> Optional[DirectoryInfo]:
+ info = DirectoryInfo(
+ directory_slug=self.source_slug,
+ raw_issn=clean_issn(record["Issn"]),
+ custom_id=record["JournalId"],
+ name=clean_str(record["DisplayName"]),
+ publisher=clean_str(record["Publisher"]),
+ )
+ homepage = HomepageUrl.from_url(record["Webpage"] or "")
+ if homepage:
+ info.homepage_urls.append(homepage)
+
+ return info