aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-03 01:11:23 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-03 01:11:23 -0700
commit8f0aa515d4dff2537eb9e1ab557da0b067f42250 (patch)
tree08d38c42b269292136af26122543b459918dc9da
parent9418d1d15ca809b7796085cf23afa0948cf956c4 (diff)
downloadchocula-8f0aa515d4dff2537eb9e1ab557da0b067f42250.tar.gz
chocula-8f0aa515d4dff2537eb9e1ab557da0b067f42250.zip
scielo metadata import
-rw-r--r--chocula/directories/__init__.py3
-rw-r--r--chocula/directories/scielo.py48
-rw-r--r--sources.toml7
-rw-r--r--tests/files/ISSN-to-ISSN-L.txt19
4 files changed, 76 insertions, 1 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py
index 4bed696..a233a26 100644
--- a/chocula/directories/__init__.py
+++ b/chocula/directories/__init__.py
@@ -9,11 +9,12 @@ from chocula.directories.openapc import OpenAPCLoader
from chocula.directories.road import RoadLoader
from chocula.directories.sherpa_romeo import SherpaRomeoLoader
from chocula.directories.sim import SimLoader
+from chocula.directories.scielo import ScieloLoader
from chocula.directories.szczepanski import SzczepanskiLoader
from chocula.directories.wikidata import WikidataLoader
ALL_CHOCULA_DIR_CLASSES = [
CrossrefLoader, DoajLoader, EntrezLoader,EzbLoader, GoldOALoader,
NorwegianLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader,
- SzczepanskiLoader, WikidataLoader, SimLoader,
+ SzczepanskiLoader, WikidataLoader, SimLoader, ScieloLoader,
]
diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py
new file mode 100644
index 0000000..247866b
--- /dev/null
+++ b/chocula/directories/scielo.py
@@ -0,0 +1,48 @@
+
+from typing import Iterable, Optional
+import json
+
+from chocula.util import clean_str, clean_issn
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class ScieloLoader(DirectoryLoader):
+
+ source_slug = "scielo"
+
+ def open_file(self) -> Iterable:
+ return open(self.config.scielo.filepath)
+
+ def parse_record(self, line) -> Optional[DirectoryInfo]:
+ record = json.loads(line)
+ extra = dict(
+ status=clean_str(record.get('current_status')),
+ first_year=record.get('first_year'),
+ collection=record.get('collection_acronym'),
+ )
+ for k in list(extra.keys()):
+ if extra[k] is None:
+ extra.pop(k)
+ country: Optional[str] = None
+ if record['publisher_country'] and len(record['publisher_country'][0]) == 2:
+ country = record['publisher_country'][0].lower()
+ info = DirectoryInfo(
+ directory_slug=self.source_slug,
+ issne=clean_issn(record.get('electronic_issn') or ''),
+ issnp=clean_issn(record.get('print_issn') or ''),
+ custom_id=clean_str(record.get('scielo_issn')),
+ name=clean_str(record.get('fulltitle')),
+ publisher=clean_str((record.get('publisher_name') or [''])[0]),
+ abbrev=clean_str(record['abbreviated_iso_title']),
+ platform='scielo',
+ langs=list(filter(lambda s: len(s) == 2, record['languages'])),
+ country=country,
+ extra=extra,
+ )
+ if record['url']:
+ homepage = HomepageUrl.from_url(record['url'])
+ if homepage:
+ info.homepage_urls.append(homepage)
+ return info
+
diff --git a/sources.toml b/sources.toml
index efde2d8..a91d8ee 100644
--- a/sources.toml
+++ b/sources.toml
@@ -57,6 +57,13 @@ filename = "norwegian_register.csv"
original_url = "https://dbh.nsd.uib.no/publiseringskanaler/AlltidFerskListe"
mirror_url = "https://archive.org/download/norwegian_register_journals"
+[scielo]
+date = "2020-05-05"
+filename = "scielo.json"
+mirror_url = "https://archive.org/download/scielo-journal-metadata/journals.20200505.json"
+# not to be confused with scielo_journal_list.20200428.csv also floating
+# around, but has no ISSNs
+
[lockss]
filename = "kbart_LOCKSS.txt"
original_url = "https://reports.lockss.org/kbart/kbart_LOCKSS.txt"
diff --git a/tests/files/ISSN-to-ISSN-L.txt b/tests/files/ISSN-to-ISSN-L.txt
index f44ea24..9c7b339 100644
--- a/tests/files/ISSN-to-ISSN-L.txt
+++ b/tests/files/ISSN-to-ISSN-L.txt
@@ -258,3 +258,22 @@ ISSN ISSN-L
0009-5532 0009-5532
0888-8817 0888-8817
0001-1452 0001-1452
+0102-7182 0102-7182
+1679-074X 1679-074X
+1982-5471 1982-5471
+1516-1498 1516-1498
+1516-2567 1516-2567
+1413-0556 1413-0556
+0104-8023 0104-8023
+1413-0556 1413-0556
+1679-074X 1679-074X
+0103-166X 0103-166X
+0124-4906 0124-4906
+0104-3269 0104-3269
+1983-3288 1983-3288
+1516-8530 1516-8530
+1982-5471 1982-5471
+1809-8894 1809-8894
+0102-7182 0102-7182
+1806-6631 1806-6631
+1809-8894 1809-8894