aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-08 17:36:07 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-08 17:36:07 -0700
commita52e5f39346e28bdb2eb58f17a298177659dbbfe (patch)
tree551ea758efd05d90fbece22897222610dd054871
parent886c23d2e8ae3c0e05d8300e42e5934fae5a0496 (diff)
downloadchocula-a52e5f39346e28bdb2eb58f17a298177659dbbfe.tar.gz
chocula-a52e5f39346e28bdb2eb58f17a298177659dbbfe.zip
database support for scholarsportal and cariniana preservation holdings
-rw-r--r--chocula/common.py51
-rw-r--r--chocula/database.py2
-rw-r--r--chocula/kbart.py20
-rw-r--r--sources.toml13
-rw-r--r--tests/files/ISSN-to-ISSN-L.txt54
-rw-r--r--tests/files/cariniana.csv30
-rw-r--r--tests/files/onix_scholarsportal.csv29
7 files changed, 198 insertions, 1 deletions
diff --git a/chocula/common.py b/chocula/common.py
index edd48a3..94c4c7f 100644
--- a/chocula/common.py
+++ b/chocula/common.py
@@ -148,6 +148,9 @@ class KbartLoader:
else:
new_spans = [[record.start_year, record.end_year]]
record.year_spans = merge_spans(old_spans, new_spans)
+ elif record.year_spans:
+ old_spans = existing.year_spans or []
+ record.year_spans = merge_spans(old_spans, record.year_spans)
kbart_dict[record.issnl] = record
counts["unique-issnl"] = len(kbart_dict)
@@ -218,6 +221,54 @@ class OnixCsvLoader(KbartLoader):
return record
+class CarinianaCsvLoader(KbartLoader):
+ """
+ Similar to the KBART loader class, but for custom CSV files instead of
+ KBART formated TSV.
+
+ CSV columns:
+ - Region
+ - Knowledge Area
+ - Publisher
+ - Title
+ - ISSN
+ - eISSN
+ - Preserved Volumes
+ - Preserved Years
+ - In Progress Volumes
+ - In Progress Years
+
+ TODO: volumes
+ """
+
+ def open_file(self) -> Iterable:
+ return csv.DictReader(open(self.file_path(), "r"))
+
+ def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]:
+
+ raw_issn = clean_issn(row["ISSN"])
+ issne = clean_issn(row["ISSN"])
+ issnl = issn_db.issn2issnl(raw_issn or issne or "")
+ # convert list of years to a set of year spans
+ years = [int(y.strip()) for y in row["Preserved Years"].split(";") if y]
+ year_spans = merge_spans([], [[y, y] for y in years])
+ record = KbartRecord(
+ issnl=issnl,
+ issne=issne,
+ issnp=None,
+ embargo=None,
+ title=clean_str(row["Title"]),
+ publisher=clean_str(row["Publisher"]),
+ url=None,
+ start_year=None,
+ end_year=None,
+ start_volume=None,
+ end_volume=None,
+ year_spans=year_spans,
+ )
+ return record
+
+
class HathifilesLoader(KbartLoader):
"""
Similar to the KBART loader class, but for Hathifiles bulk format.
diff --git a/chocula/database.py b/chocula/database.py
index 3e8db57..d4fbbed 100644
--- a/chocula/database.py
+++ b/chocula/database.py
@@ -837,6 +837,8 @@ class ChoculaDatabase:
"jstor",
"pkp_pln",
"hathitrust",
+ "scholarsportal",
+ "cariniana",
):
extra["kbart"] = extra.get("kbart", {})
extra["kbart"][drow["slug"]] = dict(year_spans=dextra["year_spans"])
diff --git a/chocula/kbart.py b/chocula/kbart.py
index 3944430..23c5533 100644
--- a/chocula/kbart.py
+++ b/chocula/kbart.py
@@ -1,4 +1,4 @@
-from chocula.common import KbartLoader, OnixCsvLoader, HathifilesLoader
+from chocula.common import KbartLoader, OnixCsvLoader, CarinianaCsvLoader, HathifilesLoader
class ClockssKbartLoader(KbartLoader):
@@ -41,6 +41,22 @@ class PkpPlnOnixLoader(OnixCsvLoader):
return self.config.pkp_pln.filepath
+class CarinianaLoader(CarinianaCsvLoader):
+
+ source_slug = "cariniana"
+
+ def file_path(self) -> str:
+ return self.config.cariniana.filepath
+
+
+class ScholarsportalOnixLoader(OnixCsvLoader):
+
+ source_slug = "scholarsportal"
+
+ def file_path(self) -> str:
+ return self.config.scholarsportal.filepath
+
+
class HathitrustLoader(HathifilesLoader):
source_slug = "hathitrust"
@@ -55,5 +71,7 @@ ALL_CHOCULA_KBART_CLASSES = [
PorticoKbartLoader,
JstorKbartLoader,
PkpPlnOnixLoader,
+ CarinianaLoader,
+ ScholarsportalOnixLoader,
HathitrustLoader,
]
diff --git a/sources.toml b/sources.toml
index e9544b4..0edbce6 100644
--- a/sources.toml
+++ b/sources.toml
@@ -93,6 +93,19 @@ date = "2020-09-01"
filename = "hathi_serials.tsv"
original_url = "https://www.hathitrust.org/hathifiles"
+[scholarsportal]
+# from correspondance
+date = "2020-08-24"
+filename = "onix_scholarsportal.csv"
+mirror_url = "https://archive.org/download/scholarsportal-keepers-onix-xml-20200824"
+
+[cariniana]
+# https://cariniana.ibict.br/index.php/preservacao-de-publicacoes-digitais/periodicos-eletronicos
+date = "2017-12-01"
+filename = "cariniana.csv"
+original_url = "https://cariniana.ibict.br/images/Periodicos_2016/Lista_Dezembro_2017.xls"
+mirror_url = "https://archive.org/download/cariniana-journal-holdings"
+
[szczepanski]
date = '2018'
# Jan-Szczepanski-Open-Access-Journals-2018_0.fixed.json
diff --git a/tests/files/ISSN-to-ISSN-L.txt b/tests/files/ISSN-to-ISSN-L.txt
index b91c3c1..b5a4788 100644
--- a/tests/files/ISSN-to-ISSN-L.txt
+++ b/tests/files/ISSN-to-ISSN-L.txt
@@ -387,3 +387,57 @@ ISSN ISSN-L
1941-1006 1941-1006
1858-1447 1858-1447
1579-699X 1579-699X
+1679-2483 1679-2483
+1982-6125 1982-6125
+1518-1243 1518-1243
+1516-2664 1516-2664
+1677-6402 1677-6402
+1981-9951 1981-9951
+1679-7361 1679-7361
+1980-6493 1980-6493
+2316-8307 2316-8307
+1809-4031 1809-4031
+2316-6541 2316-6541
+2316-1620 2316-1620
+1518-2797 1518-2797
+1983-7801 1983-7801
+1413-7038 1413-7038
+2237-8723 2237-8723
+1676-4439 1676-4439
+2237-4132 2237-4132
+1981-9854 1981-9854
+2176-9192 2176-9192
+2176-9176 2176-9176
+2176-9184 2176-9184
+2176-9168 2176-9168
+1807-8656 1807-8656
+2178-2172 2178-2172
+1983-0890 1983-0890
+2178-2687 2178-2687
+1809-4309 1809-4309
+2359-0017 2359-0017
+1809-6891 1809-6891
+1982-4017 1982-4017
+2238-3212 2238-3212
+0101-9570 0101-9570
+1984-1647 1984-1647
+0001-1037 0001-1037
+0374-4884 0374-4884
+0021-8790 0021-8790
+0168-9002 0168-9002
+0924-2716 0924-2716
+0168-0072 0168-0072
+0892-3647 0892-3647
+1052-5629 1052-5629
+1045-9219 1045-9219
+0261-3050 0261-3050
+1359-5997 1359-5997
+0920-5861 0920-5861
+0302-8933 0302-8933
+1365-8816 1365-8816
+0166-4328 0166-4328
+1068-798x 1068-798x
+0003-9993 0003-9993
+0005-0423 0005-0423
+0198-8859 0198-8859
+0959-6526 0959-6526
diff --git a/tests/files/cariniana.csv b/tests/files/cariniana.csv
new file mode 100644
index 0000000..5440a1c
--- /dev/null
+++ b/tests/files/cariniana.csv
@@ -0,0 +1,30 @@
+Region,Knowledge Area,Publisher,Title,ISSN,eISSN,Preserved Volumes,Preserved Years,In Progress Volumes,In Progress Years
+Sudeste,Ciências Humanas,Arquivo Nacional,Revista Acervo,,2237-8723,,,,
+Nordeste,Ciencias Sociais Aplicadas ,Associação Brasileira de Estudos do Trabalho,Revista da ABET,1679-2483,1676-4439,1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15,2001; 2002; 2003; 2004; 2005; 2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,16,2016
+Sudeste,Ciências Humanas,Associação Brasileira de História das Religiões,Anais dos Simpósios da ABHR,,2237-4132,12; 13; 16,2011; 2012; 2015,,
+Centro-Oeste,Ciencias Humanas ,Associação Brasileira de Pesquisadores em Jornalismo-SBPJor,Brazilian Journalism Research (BJR),,1981-9854,1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11,2005; 2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,,
+Sudeste,Ciencias Humanas ,Associação Nacional de Pesquisa e Pós-Graduação em Turismo,RBTur: Revista Brasileira de Pesquisa em Turismo,1982-6125,,1; 2; 3; 4; 5,2007; 2008; 2009; 2010; 2011,,
+Sul ,Ciências Humanas ,Centro Universitário Cesumar,Iniciação Científica CESUMAR,1518-1243,2176-9192,1; 1; 2; 2; 3; 3; 4; 4; 5; 5; 6; 6; 7; 7; 8; 8; 9; 9; 10; 10; 11; 11; 12; 12; 13; 13; 14; 15; 17,1999; 1999; 2000; 2000; 2001; 2001; 2002; 2002; 2003; 2003; 2004; 2004; 2005; 2005; 2006; 2006; 2007; 2007; 2008; 2008; 2009; 2009; 2010; 2010; 2011; 2011; 2012; 2013; 2015,14; 15; 16; 18,2012; 2013; 2014; 2016
+Sul,Ciências Sociais Aplicadas,Centro Universitário Cesumar,Revista Cesumar: Ciências Humanas e Sociais Aplicadas,1516-2664,2176-9176,1; 1; 2; 2; 3; 3; 5; 5; 6; 6; 7; 7; 8; 8; 9; 9; 10; 10; 11; 11; 12; 12; 13; 13; 14; 14; 15; 15; 16; 17; 19,1997; 1997; 1998; 1998; 1999; 1999; 2001; 2001; 2002; 2002; 2003; 2003; 2004; 2004; 2005; 2005; 2006; 2006; 2007; 2007; 2008; 2008; 2009; 2009; 2010; 2010; 2011; 2011; 2012; 2013; 2015,16; 17; 18; 20,2012; 2013; 2014; 2016
+Sul,Ciências Sociais Aplicadas ,Centro Universitário Cesumar,Revista Jurídica Cesumar: Mestrado,1677-6402,2176-9184,1; 1; 2; 2; 3; 3; 4; 4; 5; 5; 6; 6; 7; 7; 8; 8; 9; 9; 10; 10; 11; 11; 12; 13; 15,2001; 2001; 2002; 2002; 2003; 2003; 2004; 2004; 2005; 2005; 2006; 2006; 2007; 2007; 2008; 2008; 2009; 2009; 2010; 2010; 2011; 2011; 2012; 2013; 2015,12; 13; 14; 16,2012; 2013; 2014; 2016
+Sul,Ciências Agrária ,Centro Universitário Cesumar,Revista em Agronegócio e Meio Ambiente,1981-9951,2176-9168,1; 1; 2; 2; 3; 3; 4; 4; 5; 6; 8,2008; 2008; 2009; 2009; 2010; 2010; 2011; 2011; 2012; 2013; 2015,5; 6; 7; 9,2012; 2013; 2014; 2016
+Sul,Ciências Humanas,Universidade Estadual de Maringá,Acta Scientiarum. Human and Social Sciences,1679-7361,1807-8656,,,,
+Sul,"Linguística,Letras e Artes ",Universidade Federal do Rio Grande do Sul,Cena em Movimento,2178-2172,,,,,
+Sudeste,Ciências Humanas ,Universidade do Sul de Santa Catarina,Revista Crítica Cultural,1980-6493,,1; 2; 3; 4; 5; 6; 7; 8; 9; 10,2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,11,2016
+Sudeste,Ciências Humanas,Centro Universitário de Belo Horizonte,e-Com,,1983-0890,1; 2; 3; 4; 5; 6; 7; 8; 9,2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,10,2016
+Sul,Ciências Humanas ,Universidade de Caxias do Sul,Conexão: Comunicação e Cultura,,2178-2687,1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14,2002; 2003; 2004; 2005; 2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,15,2016
+"
+Sul,"Linguistica, Letras e Artes ",Universidade Federal do Rio Grande do Sul,Porto Arte,,,,,,
+Centro-Oeste,Ciências Humanas,Universidade Estadual de Goiás-UEG,Anais do Seminário sobre Políticas Públicas e Aspectos das Mudanças Institucionais no Brasil e em Goiás,2316-8307,,,,,
+Sul,Ciências Humanas ,Universidade Estadual de Ponta Grossa,Práxis Educativa,1809-4031,1809-4309,1; 2; 3; 4; 5; 6; 7; 8; 9; 10,2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,11; 12,2016; 2017
+Sul,Ciências Humanas,Sociedade Brasileira de Educação,Anais do Workshop de Informática na Escola,2316-6541,,,,,
+Sudeste,Ciências Sociais Aplicadas ,Universidade Federal do Espírito Santo,Simbiótica,2316-1620,,1; 2; 3; 4,2012; 2013; 2014; 2015,,
+Sudeste,Ciências Humanas ,Pontifícia Universidade Católica de Minas Gerais,Revista do Instituto de Ciências Humanas,,2359-0017,1; 2; 3; 4,2012; 2013; 2014; 2015,,
+Centro-Oeste,Ciências da Saúde,Universidade Federal de Goiás (UFG),Ciência Animal Brasileira,1518-2797,1809-6891,1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 13; 14; 15,2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007; 2008; 2009; 2010; 2012; 2013; 2014,12; 16; 17,2011; 2015; 2016
+Sudeste,"Linguística, Letras e Artes ",Universidade do Sul de Santa Catarina,Linguagem em (Dis)curso,1982-4017,,1; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15,2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,16,2016
+Centro-Oeste,Ciências Humanas,Pontifícia Universidade Católica de Goiás-PUC Goiás,Mosaico,1983-7801,,,,,
+Sul,Ciências Sociais Aplicadas ,Universidade de Passo Fundo,Revista Justiça do Direito,1413-7038,2238-3212,1; 2; 3; 4; 5; 6; 7; 8; 9; 10,2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,11,2016
+Sul,"Linguística, Letras e Artes ",Universidade Federal de Santa Catarina-UFSC,Travessia,0101-9570,,1; 2; 3; 4; 5; 6; 7; 8; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20,1980; 1981; 1982; 1983; 1984; 1985; 1986; 1987; 1989; 1990; 1991; 1992; 1993; 1994; 1995; 1996; 1997; 1998; 1999,,
+Sudeste,"Linguistica, Letras e Artes ",Universidade Estadual Paulista,Baleia na Rede: estudos em arte e sociedade,,1808-8473,,,,
+Sudeste,Ciências Humanas ,Universidade Estadual Paulista,GEO ATOS - Revista Geografia em Atos,,1984-1647,1; 2; 3; 4; 5; 6; 7; 8; 9; 10,2006; 2007; 2008; 2009; 2010; 2011; 2012; 2013; 2014; 2015,,
+Sul,"Linguística, Letras e Artes ",Universidade Regional de Blumenau,O Teatro Transcende,,2236-6644,15; 16; 17; 18; 19; 20,2009; 2011; 2012; 2013; 2014; 2015,,
diff --git a/tests/files/onix_scholarsportal.csv b/tests/files/onix_scholarsportal.csv
new file mode 100644
index 0000000..3f0e430
--- /dev/null
+++ b/tests/files/onix_scholarsportal.csv
@@ -0,0 +1,29 @@
+ISSN,Title,Publisher,Url,Vol,No,Published,Deposited
+0001-1037,AEDS Journal,Taylor and Francis,,11,3,1978-03,
+0001-1037,AEDS Journal,Taylor and Francis,,11,4,1978-06,
+0001-1037,AEDS Journal,Taylor and Francis,,12,1,1978-09,
+0001-1037,AEDS Journal,Taylor and Francis,,12,2,1979-01,
+0001-1037,AEDS Journal,Taylor and Francis,,12,3,1979-03,
+0001-1037,AEDS Journal,Taylor and Francis,,12,4,1979-06,
+0001-1037,AEDS Journal,Taylor and Francis,,13,1,1979-09,
+0001-1037,AEDS Journal,Taylor and Francis,,13,2,1980-01,
+0001-1037,AEDS Journal,Taylor and Francis,,13,3,1980-03,
+0374-4884,Journal of the Korean Physical Society,Springer,,75,10,2019-11,
+0021-8790,Journal of Animal Ecology,Wiley,,75,5,2006-09,
+0168-9002,"Nuclear Inst. and Methods in Physics Research, A",Elsevier,,490,1-2,2002-09,
+0924-2716,ISPRS Journal of Photogrammetry and Remote Sensing,Elsevier,,65,3,2010-05,
+0168-0072,Annals of Pure and Applied Logic,Elsevier,,158,3,2009-04,
+0892-3647,American Journal of Distance Education,Taylor and Francis,,17,3,2003-09,
+1052-5629,Journal of Management Education,Sage,,29,4,2005-08,
+1045-9219,"Parallel and Distributed Systems, IEEE Transactions on",IEEE,,11,8,2000-08,
+0261-3050,Bulletin of Latin American Research,Wiley,,14,3,1995-09,
+1359-5997,Materials and Structures,Springer,,24,2,1991-03,
+0920-5861,Catalysis Today,Elsevier,,97,2-3,2004-10,
+0302-8933,Archives of Microbiology,Springer,,183,3,2005-03,
+1365-8816,International Journal of Geographical Information Science,Taylor and Francis,,24,1,2010-01,
+0166-4328,Behavioural Brain Research,Elsevier,,46,2,1991-12,
+1068-798x,Russian Engineering Research,Springer,,27,7,2007-07,
+0003-9993,Archives of Physical Medicine and Rehabilitation,Elsevier,,92,1,2011-01,
+0005-0423,Australian Veterinary Journal,Wiley,,18,1,1942-02,
+0198-8859,Human Immunology,Elsevier,,39,1,1994-01,
+0959-6526,Journal of Cleaner Production,Elsevier,,88,Complete,2015-02,