aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-03 01:11:23 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-03 01:11:23 -0700
commit8f0aa515d4dff2537eb9e1ab557da0b067f42250 (patch)
tree08d38c42b269292136af26122543b459918dc9da /chocula
parent9418d1d15ca809b7796085cf23afa0948cf956c4 (diff)
downloadchocula-8f0aa515d4dff2537eb9e1ab557da0b067f42250.tar.gz
chocula-8f0aa515d4dff2537eb9e1ab557da0b067f42250.zip
scielo metadata import
Diffstat (limited to 'chocula')
-rw-r--r--chocula/directories/__init__.py3
-rw-r--r--chocula/directories/scielo.py48
2 files changed, 50 insertions, 1 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py
index 4bed696..a233a26 100644
--- a/chocula/directories/__init__.py
+++ b/chocula/directories/__init__.py
@@ -9,11 +9,12 @@ from chocula.directories.openapc import OpenAPCLoader
from chocula.directories.road import RoadLoader
from chocula.directories.sherpa_romeo import SherpaRomeoLoader
from chocula.directories.sim import SimLoader
+from chocula.directories.scielo import ScieloLoader
from chocula.directories.szczepanski import SzczepanskiLoader
from chocula.directories.wikidata import WikidataLoader
ALL_CHOCULA_DIR_CLASSES = [
CrossrefLoader, DoajLoader, EntrezLoader,EzbLoader, GoldOALoader,
NorwegianLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader,
- SzczepanskiLoader, WikidataLoader, SimLoader,
+ SzczepanskiLoader, WikidataLoader, SimLoader, ScieloLoader,
]
diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py
new file mode 100644
index 0000000..247866b
--- /dev/null
+++ b/chocula/directories/scielo.py
@@ -0,0 +1,48 @@
+
+from typing import Iterable, Optional
+import json
+
+from chocula.util import clean_str, clean_issn
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class ScieloLoader(DirectoryLoader):
+
+ source_slug = "scielo"
+
+ def open_file(self) -> Iterable:
+ return open(self.config.scielo.filepath)
+
+ def parse_record(self, line) -> Optional[DirectoryInfo]:
+ record = json.loads(line)
+ extra = dict(
+ status=clean_str(record.get('current_status')),
+ first_year=record.get('first_year'),
+ collection=record.get('collection_acronym'),
+ )
+ for k in list(extra.keys()):
+ if extra[k] is None:
+ extra.pop(k)
+ country: Optional[str] = None
+ if record['publisher_country'] and len(record['publisher_country'][0]) == 2:
+ country = record['publisher_country'][0].lower()
+ info = DirectoryInfo(
+ directory_slug=self.source_slug,
+ issne=clean_issn(record.get('electronic_issn') or ''),
+ issnp=clean_issn(record.get('print_issn') or ''),
+ custom_id=clean_str(record.get('scielo_issn')),
+ name=clean_str(record.get('fulltitle')),
+ publisher=clean_str((record.get('publisher_name') or [''])[0]),
+ abbrev=clean_str(record['abbreviated_iso_title']),
+ platform='scielo',
+ langs=list(filter(lambda s: len(s) == 2, record['languages'])),
+ country=country,
+ extra=extra,
+ )
+ if record['url']:
+ homepage = HomepageUrl.from_url(record['url'])
+ if homepage:
+ info.homepage_urls.append(homepage)
+ return info
+