diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-03 01:11:23 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-03 01:11:23 -0700 |
commit | 8f0aa515d4dff2537eb9e1ab557da0b067f42250 (patch) | |
tree | 08d38c42b269292136af26122543b459918dc9da /chocula | |
parent | 9418d1d15ca809b7796085cf23afa0948cf956c4 (diff) | |
download | chocula-8f0aa515d4dff2537eb9e1ab557da0b067f42250.tar.gz chocula-8f0aa515d4dff2537eb9e1ab557da0b067f42250.zip |
scielo metadata import
Diffstat (limited to 'chocula')
-rw-r--r-- | chocula/directories/__init__.py | 3 | ||||
-rw-r--r-- | chocula/directories/scielo.py | 48 |
2 files changed, 50 insertions, 1 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py index 4bed696..a233a26 100644 --- a/chocula/directories/__init__.py +++ b/chocula/directories/__init__.py @@ -9,11 +9,12 @@ from chocula.directories.openapc import OpenAPCLoader from chocula.directories.road import RoadLoader from chocula.directories.sherpa_romeo import SherpaRomeoLoader from chocula.directories.sim import SimLoader +from chocula.directories.scielo import ScieloLoader from chocula.directories.szczepanski import SzczepanskiLoader from chocula.directories.wikidata import WikidataLoader ALL_CHOCULA_DIR_CLASSES = [ CrossrefLoader, DoajLoader, EntrezLoader,EzbLoader, GoldOALoader, NorwegianLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader, - SzczepanskiLoader, WikidataLoader, SimLoader, + SzczepanskiLoader, WikidataLoader, SimLoader, ScieloLoader, ] diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py new file mode 100644 index 0000000..247866b --- /dev/null +++ b/chocula/directories/scielo.py @@ -0,0 +1,48 @@ + +from typing import Iterable, Optional +import json + +from chocula.util import clean_str, clean_issn +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo, HomepageUrl + + +class ScieloLoader(DirectoryLoader): + + source_slug = "scielo" + + def open_file(self) -> Iterable: + return open(self.config.scielo.filepath) + + def parse_record(self, line) -> Optional[DirectoryInfo]: + record = json.loads(line) + extra = dict( + status=clean_str(record.get('current_status')), + first_year=record.get('first_year'), + collection=record.get('collection_acronym'), + ) + for k in list(extra.keys()): + if extra[k] is None: + extra.pop(k) + country: Optional[str] = None + if record['publisher_country'] and len(record['publisher_country'][0]) == 2: + country = record['publisher_country'][0].lower() + info = DirectoryInfo( + directory_slug=self.source_slug, + issne=clean_issn(record.get('electronic_issn') or ''), + issnp=clean_issn(record.get('print_issn') or ''), + custom_id=clean_str(record.get('scielo_issn')), + name=clean_str(record.get('fulltitle')), + publisher=clean_str((record.get('publisher_name') or [''])[0]), + abbrev=clean_str(record['abbreviated_iso_title']), + platform='scielo', + langs=list(filter(lambda s: len(s) == 2, record['languages'])), + country=country, + extra=extra, + ) + if record['url']: + homepage = HomepageUrl.from_url(record['url']) + if homepage: + info.homepage_urls.append(homepage) + return info + |