diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-08-31 20:03:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-08-31 20:03:37 -0700 |
commit | 4cafb8dee8dac0816907088878a86e8ff1c7b320 (patch) | |
tree | 75a979cb7b31e8356cec8a167b5bd59c9c1e3794 | |
parent | 44548721f42eb71ee08cd8667cf129cddb2d3341 (diff) | |
download | chocula-4cafb8dee8dac0816907088878a86e8ff1c7b320.tar.gz chocula-4cafb8dee8dac0816907088878a86e8ff1c7b320.zip |
add support for PKP PLN (KBART-like)
-rw-r--r-- | chocula/__main__.py | 1 | ||||
-rw-r--r-- | chocula/common.py | 46 | ||||
-rw-r--r-- | chocula/kbart.py | 11 | ||||
-rw-r--r-- | tests/files/ISSN-to-ISSN-L.txt | 22 | ||||
-rw-r--r-- | tests/files/onix_pkp_pln.csv | 60 |
5 files changed, 139 insertions, 1 deletions
diff --git a/chocula/__main__.py b/chocula/__main__.py index 4eb1982..ebd4120 100644 --- a/chocula/__main__.py +++ b/chocula/__main__.py @@ -40,6 +40,7 @@ Commands: clockss lockss portico + pkp_pln See TODO.md for more work-in-progress """ diff --git a/chocula/common.py b/chocula/common.py index a87a736..763997b 100644 --- a/chocula/common.py +++ b/chocula/common.py @@ -170,3 +170,49 @@ class KbartLoader: cur.close() db.db.commit() return counts + + +class OnixCsvLoader(KbartLoader): + """ + Similar to the KBART loader class, but for ONIX CSV files instead of KBART + formated TSV. + + CSV columns: + - ISSN + - Title + - Publisher + - Url + - Vol + - No + - Published + - Deposited + """ + + def open_file(self) -> Iterable: + f = open(self.file_path(), "r") + # skip first line of PKP PLN Onix file, which is a "generated date" header + if self.source_slug == "pkp_pln": + next(f) + return csv.DictReader(f) + + def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: + + raw_issn = clean_issn(row["ISSN"]) + issnl = issn_db.issn2issnl(raw_issn or "") + start_year = int(row["Published"][:4]) + start_volume = clean_str(row["Vol"]) + record = KbartRecord( + issnl=issnl, + issne=None, + issnp=None, + embargo=None, + title=clean_str(row["Title"]), + publisher=clean_str(row["Publisher"]), + url=HomepageUrl.from_url(row["Url"]), + start_year=start_year, + end_year=start_year, + start_volume=start_volume, + end_volume=start_volume, + year_spans=[], + ) + return record diff --git a/chocula/kbart.py b/chocula/kbart.py index e74e19f..5fd0acc 100644 --- a/chocula/kbart.py +++ b/chocula/kbart.py @@ -1,4 +1,4 @@ -from chocula.common import KbartLoader +from chocula.common import KbartLoader, OnixCsvLoader class ClockssKbartLoader(KbartLoader): @@ -33,9 +33,18 @@ class JstorKbartLoader(KbartLoader): return self.config.jstor.filepath +class PkpPlnOnixLoader(OnixCsvLoader): + + source_slug = "pkp_pln" + + def file_path(self) -> str: + return self.config.pkp_pln.filepath + + ALL_CHOCULA_KBART_CLASSES = [ ClockssKbartLoader, LockssKbartLoader, PorticoKbartLoader, JstorKbartLoader, + PkpPlnOnixLoader, ] diff --git a/tests/files/ISSN-to-ISSN-L.txt b/tests/files/ISSN-to-ISSN-L.txt index 655570a..73f4629 100644 --- a/tests/files/ISSN-to-ISSN-L.txt +++ b/tests/files/ISSN-to-ISSN-L.txt @@ -344,3 +344,25 @@ ISSN ISSN-L 8756-4629 8756-4629 1648-6897 1648-6897 0030-9648 0030-9648 +1518-8787 1518-8787 +2346-2108 2346-2108 +1410-9166 1410-9166 +2177-6059 2177-6059 +1678-4634 1678-4634 +2037-416X 2037-416X +1988-320X 1988-320X +0048-749X 0048-749X +2378-1351 2378-1351 +2178-6011 2178-6011 +1920-0323 1920-0323 +2338-476X 2338-476X +1988-5253 1988-5253 +2027-5374 2027-5374 +1678-9946 1678-9946 +2346-2116 2346-2116 +1409-469X 1409-469X +1518-8787 1518-8787 +1941-0832 1941-0832 +1715-0868 1715-0868 +2215-2075 2215-2075 +1988-8325 1988-8325 diff --git a/tests/files/onix_pkp_pln.csv b/tests/files/onix_pkp_pln.csv new file mode 100644 index 0000000..262096f --- /dev/null +++ b/tests/files/onix_pkp_pln.csv @@ -0,0 +1,60 @@ +Generated,2020-08-30 +ISSN,Title,Publisher,Url,Vol,No,Published,Deposited +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,1,1,2012-08-07,2015-11-04 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,1,2,2012-12-14,2015-11-04 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,2,1,2013-06-14,2015-11-04 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,2,2,2013-12-13,2015-11-04 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,3,1,2014-06-10,2015-11-04 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,3,2,2014-12-01,2015-11-04 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,4,1,2015-06-03,2015-11-04 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,5,2,2016-12-01,2016-12-09 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,6,1,2017-05-30,2017-05-31 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,6,2,2017-12-01,2017-12-02 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,7,1,2018-05-18,2018-05-19 +1893-3211,"(unknown title)","Septentrio Academic Publishing",https://septentrio.uit.no/index.php/borealis,7,2,2018-12-03,2018-12-04 +1715-720X,"Evidence Based Library and Information Practice","University of Alberta Learning Services",https://journals.library.ualberta.ca/eblip/index.php/EBLIP,11,2,2016-06-20,2016-06-21 +1715-720X,"Evidence Based Library and Information Practice","University of Alberta Learning Services",https://journals.library.ualberta.ca/eblip/index.php/EBLIP,11,3,2016-09-26,2016-09-27 +1715-720X,"Evidence Based Library and Information Practice","University of Alberta Learning Services",https://journals.library.ualberta.ca/eblip/index.php/EBLIP,11,4,2016-12-15,2016-12-17 +1715-720X,"Evidence Based Library and Information Practice","University of Alberta Learning Services",https://journals.library.ualberta.ca/eblip/index.php/EBLIP,12,1,2017-03-15,2017-03-17 +1715-720X,"Evidence Based Library and Information Practice","University of Alberta Learning Services",https://journals.library.ualberta.ca/eblip/index.php/EBLIP,12,2,2017-06-29,2017-07-01 +1715-720X,"Evidence Based Library and Information Practice","University of Alberta Learning Services",https://journals.library.ualberta.ca/eblip/index.php/EBLIP,12,3,2017-09-18,2017-09-19 +1822-4288,"The Baltic Journal of Road and Bridge Engineering",,https://bjrbe-journals.rtu.lv/,8,2,2020-06-04,2020-06-05 +2509-0119,"International Journal of Progressive Sciences and Technologies","International Journals of Sciences and High Technologies",https://www.ijsht-journals.org/IJPSAT/index.php/ijpsat,8,1,2018-04-10,2019-05-23 +1678-9946,"Revista do Instituto de Medicina Tropical de São Paulo","Universidade de São Paulo. Instituto de Medicina Tropical de São Paulo",https://www.revistas.usp.br/rimtsp,50,6,2008-12-01,2016-09-25 +2316-8935,"Anais da Escola Superior de Agricultura Luiz de Queiroz","Universidade de São Paulo. Escola Superior de Agricultura Luiz de Queiroz",http://www.revistas.usp.br/aesalq,39,1,1982-01-01,2016-10-01 +1982-8837,"Pandaemonium Germanicum","Universidade de São Paulo. Faculdade de Filosofia, Letras e Ciências Humanas",http://www.revistas.usp.br/pg,0,12,2008-11-05,2016-08-11 +1988-5253,"Anuario de Psicología/The UB Journal of Psychology","Universitat de Barcelona",https://revistes.ub.edu/index.php/Anuario-psicologia,40,2,2009-01-01,2020-05-20 +-,"InForma - Macedonian Journal of Informatics","Id Design 2012/DOOEL Skopje, Republic of Macedonia",http://ojs2x.id-press.eu/informa,2,16,2016-03-18,2016-05-15 +2327-5596,"Linguistic Evidence in Security, Law and Intelligence",e-journals@mail.pitt.edu,http://www.lesli-journal.org/ojs/index.php/lesli,1,1,2013-12-06,2016-06-25 +2395-8235,"Acta Pediátrica de México","Instituto Nacional de Pediatría",http://ojs.actapediatrica.org.mx/index.php/APM,31,5,2010-09-01,2018-04-10 +2283-4044,"Pratica Medica & Aspetti Legali",SEEd,http://test-journals-3.1.2.seedmedicalpublishers.com/index.php/PMeAL,9,1,2015-02-28,2016-05-12 +1708-6892,"Journal of the Canadian Health Libraries Association / Journal de l'Association des bibliothèques de la santé du Canada","Canadian Health Libraries Association/Association des bibliothèques de la santé du Canada",https://journals.library.ualberta.ca/jchla/index.php/jchla,40,2,2019-08-01,2020-07-14 +1518-8787,"Revista de Saúde Pública","Universidade de São Paulo. Faculdade de Saúde Pública",https://www.revistas.usp.br/rsp,36,3,2002-06-01,2016-09-25 +2346-2108,"Derecho Penal y Criminología","Instituto de Ciencias Penales y Criminológicas",http://uexternado2.metarevistas.org/index.php/derpen,34,96,2013-06-24,2017-07-08 +1410-9166,"TASHWIRUL AFKAR: Journal of Reflection of Religious and Cultural Thought",,http://jurnalafkar.lakpesdam.or.id/index.php/afkar,33,0,2017-11-01,2017-11-05 +2177-6059,Roteiro,"Universidade do Oeste de Santa Catarina",https://portalperiodicos.unoesc.edu.br/roteiro,45,0,2019-10-22,2020-07-14 +1678-4634,"Educação e Pesquisa","Universidade de São Paulo. Faculdade de Educação",http://www.revistas.usp.br/ep,27,1,2001-06-01,2017-11-11 +2037-416X,"Annals of Geophysics","Istituto Nazionale di Geofisica e Vulcanologia, INGV",https://www.annalsofgeophysics.eu/index.php/annals,51,4,2008-06-12,2017-07-16 +1988-320X,Sefarad,"Consejo Superior de Investigaciones Científicas",http://sefarad.revistas.csic.es/index.php/sefarad,71,2,2019-03-27,2019-03-29 +0048-749X,"The Review of Regional Studies","Southern Regional Science Association",http://journal.srsa.org/ojs/index.php/RRS,13,3,1983-09-01,2017-08-12 +2378-1351,Abdomen,,http://www.old.smartscitech.com/index.php/Abdomen,3,0,2015-12-01,2019-08-22 +2178-6011,"Brazilian Dental Science","Institute of Science and Technology of São José dos Campos",http://unesp.homologacao.emnuvens.com.br/cob,11,1,2008-08-25,2019-02-01 +1920-0323,"TranscUlturAl: A Journal of Translation and Cultural Studies","University of Alberta",https://journals.library.ualberta.ca/tc/index.php/TC,8,2,2016-11-22,2017-05-26 +2338-476X,"Paediatrica Indonesiana","Indonesian Pediatric Society",http://localhost/pi2/index.php/paediatrica-indonesiana,47,2,2007-05-01,2018-02-06 +1988-5253,"Anuario de Psicología/The UB Journal of Psychology","Universitat de Barcelona",https://revistes.ub.edu/index.php/Anuario-psicologia,,23,1980-01-12,2020-05-20 +2027-5374,Aquichan,"Universidad de La Sabana",https://aquichan.unisabana.edu.co/index.php/aquichan,9,1,2009-07-09,2020-02-06 +1678-9946,"Revista do Instituto de Medicina Tropical de São Paulo","Universidade de São Paulo. Instituto de Medicina Tropical de São Paulo",https://www.revistas.usp.br/rimtsp,57,0,2015-09-01,2016-09-25 +2346-2116,"Revista La Propiedad Inmaterial","Centro de Estudios de la Propiedad Intelectual",http://uexternado2.metarevistas.org/index.php/propin,0,19,2015-06-25,2017-07-08 +1409-469X,"Diálogos Revista Electrónica","Universidad de Costa Rica",https://163.178.170.219/index.php/dialogos,16,0,2015-11-24,2018-06-09 +1518-8787,"Revista de Saúde Pública","Universidade de São Paulo. Faculdade de Saúde Pública",https://www.revistas.usp.br/rsp,14,1,1980-03-01,2016-09-25 +1941-0832,"Radical Teacher","University Library System, University of Pittsburgh",http://radicalteacher.library.pitt.edu/ojs/index.php/radicalteacher,0,96,2013-05-07,2017-08-16 +1715-0868,"Contributions to Discrete Mathematics","Faculty of Science, University of Calgary",http://136.159.200.85/cdm/index.php/cdm,5,2,2010-09-29,2016-06-24 +2215-2075,"Revista de Biología Tropical","Universidad de Costa Rica",https://163.178.170.219/index.php/rbt,38,2,2016-07-18,2017-04-29 +1988-8325,"Anales Cervantinos","Consejo Superior de Investigaciones Científicas",http://analescervantinos.revistas.csic.es/index.php/analescervantinos,45,0,2019-02-15,2019-02-17 +2597-9388,"Indonesian Food and Nutrition Progress","Indonesian Association of Food Technologists",https://journal.ugm.ac.id/ifnp,6,2,2014-05-21,2018-10-17 +0034-3064,"Reformed Review","Western Theological Seminary",https://repository.westernsem.edu/pkp/index.php/rr,35,1,1981-10-01,2016-08-26 +1988-2955,Al-Qanṭara,"Consejo Superior de Investigaciones Científicas",http://al-qantara.revistas.csic.es/index.php/al-qantara,39,1,2019-02-15,2019-02-17 +2316-9117,"Fisioterapia e Pesquisa","Universidade de São Paulo. Faculdade de Medicina",http://www.revistas.usp.br/fpusp,4,2,1997-12-07,2017-12-09 +۲۴۲۳-۷۶۰,"Medical Ethics","مرکز تحقیقات اخلاق و حقوق پزشکی، دانشگاه علوم پزشکی شهید بهشتی",http://ojs2.sbmu.ac.ir/me,7,26,2014-05-14,2019-11-01 +2355-5777,"Jurnal Kawistara","Sekolah Pascasarjana UGM",https://dev.jurnal.ugm.ac.id/kawistara,3,1,2014-03-26,2018-10-17 +2215-3373,"Revista de Matemática: Teoría y Aplicaciones","Centro de Investigación en Matemática Pura y Aplicada (CIMPA)",https://163.178.170.219/index.php/matematica,13,2,2006-08-01,2018-06-14 |