diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 23:11:47 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 23:11:47 -0700 |
commit | 66ff1990d81bfc461c4cbbcc46278f785c4f273c (patch) | |
tree | 4474a5662e5072c4f6206986847ce9653683099d /chocula | |
parent | 3a4344d1e26e679b0dc9558d15752e53ce86f8ac (diff) | |
download | chocula-66ff1990d81bfc461c4cbbcc46278f785c4f273c.tar.gz chocula-66ff1990d81bfc461c4cbbcc46278f785c4f273c.zip |
add MAG importer; reorder directory class listing
Diffstat (limited to 'chocula')
-rw-r--r-- | chocula/directories/__init__.py | 23 | ||||
-rw-r--r-- | chocula/directories/mag.py | 60 |
2 files changed, 73 insertions, 10 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py index 17329e1..ed306c0 100644 --- a/chocula/directories/__init__.py +++ b/chocula/directories/__init__.py @@ -18,26 +18,29 @@ from chocula.directories.vanished_inactive import VanishedInactiveLoader from chocula.directories.issn_meta import IssnMetaLoader from chocula.directories.australian_era import AustralianEraLoader from chocula.directories.awol import AwolLoader +from chocula.directories.mag import MagLoader +# sort order roughly results in metadata prioritization ALL_CHOCULA_DIR_CLASSES = [ - CrossrefLoader, + IssnMetaLoader, + ManualHomepageLoader, + ScieloLoader, DoajLoader, + CrossrefLoader, EntrezLoader, EzbLoader, GoldOALoader, NorwegianLoader, + AustralianEraLoader, + SzczepanskiLoader, + WikidataLoader, + AwolLoader, + VanishedDisapearedLoader, + VanishedInactiveLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader, - SzczepanskiLoader, - WikidataLoader, SimLoader, - ScieloLoader, - ManualHomepageLoader, ZdbFizeLoader, - VanishedDisapearedLoader, - VanishedInactiveLoader, - IssnMetaLoader, - AustralianEraLoader, - AwolLoader, + MagLoader, ] diff --git a/chocula/directories/mag.py b/chocula/directories/mag.py new file mode 100644 index 0000000..dbbc324 --- /dev/null +++ b/chocula/directories/mag.py @@ -0,0 +1,60 @@ +from typing import Iterable, Optional +import csv + +from chocula.util import clean_str, clean_issn +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo, HomepageUrl + + +class MagLoader(DirectoryLoader): + """ + TSV Columns (from schema docs): + + 1 JournalId long PRIMARY KEY + 2 Rank uint See FAQ + 3 NormalizedName string + 4 DisplayName string + 5 Issn string + 6 Publisher string + 7 Webpage string + 8 PaperCount long + 9 PaperFamilyCount long See FAQ + 10 CitationCount long + 11 CreatedDate DateTime + + """ + + source_slug = "mag" + + def open_file(self) -> Iterable: + return csv.DictReader( + open(self.config.mag.filepath, "r"), + delimiter="\t", + fieldnames=[ + "JournalId", + "Rank", + "NormalizedName", + "DisplayName", + "Issn", + "Publisher", + "Webpage", + "PaperCount", + "PaperFamilyCount", + "CitationCount", + "CreatedDate", + ], + ) + + def parse_record(self, record) -> Optional[DirectoryInfo]: + info = DirectoryInfo( + directory_slug=self.source_slug, + raw_issn=clean_issn(record["Issn"]), + custom_id=record["JournalId"], + name=clean_str(record["DisplayName"]), + publisher=clean_str(record["Publisher"]), + ) + homepage = HomepageUrl.from_url(record["Webpage"] or "") + if homepage: + info.homepage_urls.append(homepage) + + return info |