diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 17:23:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 17:23:37 -0700 |
commit | 427f25fb9a362348df644afae2f56124634ca67d (patch) | |
tree | 1fc8ef796abdaac009121bca2093214e4ff77578 | |
parent | 571b1f77e9375c7bab5ccbe8ae41c60dd2c64779 (diff) | |
download | chocula-427f25fb9a362348df644afae2f56124634ca67d.tar.gz chocula-427f25fb9a362348df644afae2f56124634ca67d.zip |
vanished journal metadata importer
-rw-r--r-- | chocula/directories/vanished_disapeared.py | 63 | ||||
-rw-r--r-- | chocula/directories/vanished_inactive.py | 50 | ||||
-rw-r--r-- | tests/files/vanished_journals_disapeared.csv | 25 | ||||
-rw-r--r-- | tests/files/vanished_journals_inactive.csv | 25 |
4 files changed, 163 insertions, 0 deletions
diff --git a/chocula/directories/vanished_disapeared.py b/chocula/directories/vanished_disapeared.py new file mode 100644 index 0000000..a5e4c38 --- /dev/null +++ b/chocula/directories/vanished_disapeared.py @@ -0,0 +1,63 @@ +import csv +from typing import Iterable, Optional + +from chocula.util import clean_str, clean_issn, parse_lang, parse_country +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo, HomepageUrl + + +class VanishedDisapearedLoader(DirectoryLoader): + """ + Journal-level metadata from the "Vanished Journals" project. This is the + "disapeared" dataset, with many homepage URLs in wayback (web.archive.org). + + CSV headers: + - Source + - If Identified by second source + - Journal Name + - ISSN + - E-ISSN + - URL + - Publisher + - blank + - Language(s) + - Country + - society_affiliation + - other_sci_affiliation + - Discipline + - Discipline Group + - Start Year + - End Year + - Last Year Online + - Actively Publishing + - Internet Archive Link + - Verified + - Comments + - The Keepers (archived) + - Archive Link + - Mikael (1 = agree with Lisa) + """ + + source_slug = "vanished_disapeared" + + def open_file(self) -> Iterable: + return csv.DictReader(open(self.config.vanished_disapeared.filepath)) + + def parse_record(self, record) -> Optional[DirectoryInfo]: + + info = DirectoryInfo( + directory_slug=self.source_slug, + raw_issn=clean_issn(record["ISSN"]), + issne=clean_issn(record["E-ISSN"]), + name=clean_str(record["Journal Name"]), + publisher=clean_str(record["Publisher"]), + langs=[parse_lang(record["Language(s)"])], + country=parse_country(record["Country"]), + ) + homepage = HomepageUrl.from_url(record["Internet Archive Link"]) + if homepage: + info.homepage_urls.append(homepage) + homepage = HomepageUrl.from_url(record["Archive Link"]) + if homepage: + info.homepage_urls.append(homepage) + return info diff --git a/chocula/directories/vanished_inactive.py b/chocula/directories/vanished_inactive.py new file mode 100644 index 0000000..7996084 --- /dev/null +++ b/chocula/directories/vanished_inactive.py @@ -0,0 +1,50 @@ +import csv +from typing import Iterable, Optional + +from chocula.util import clean_str, clean_issn, parse_lang, parse_country +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo + + +class VanishedInactiveLoader(DirectoryLoader): + """ + Journal-level metadata from the "Vanished Journals" project. This is the + "inactive" dataset. + + CSV headers: + + - Source + - Title + - Identifier + - Publisher + - Comment + - Language + - ISSN + - EISSN + - Keyword + - Start Year + - End Year + - Added on date + - Subjects + - Country + - Publication fee + - Further Information + """ + + source_slug = "vanished_inactive" + + def open_file(self) -> Iterable: + return csv.DictReader(open(self.config.vanished_inactive.filepath)) + + def parse_record(self, record) -> Optional[DirectoryInfo]: + + info = DirectoryInfo( + directory_slug=self.source_slug, + raw_issn=clean_issn(record["ISSN"]), + issne=clean_issn(record["EISSN"]), + name=clean_str(record["Title"]), + publisher=clean_str(record["Publisher"]), + langs=[parse_lang(record["Language"])], + country=parse_country(record["Country"]), + ) + return info diff --git a/tests/files/vanished_journals_disapeared.csv b/tests/files/vanished_journals_disapeared.csv new file mode 100644 index 0000000..c3fbe1a --- /dev/null +++ b/tests/files/vanished_journals_disapeared.csv @@ -0,0 +1,25 @@ +Source,If Identified by second source,Journal Name,ISSN,E-ISSN,URL,Publisher,,Language(s),Country,society_affiliation,other_sci_affiliation,Discipline,Discipline Group,Start Year,End Year,Last Year Online,Actively Publishing,Internet Archive Link,Verified,Comments,The Keepers (archived),Archive Link,Mikael (1 = agree with Lisa) +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",,St. Francis Journal of Medicine,1097-413X,,http://journal.med.edu/,St. Francis Medical Center,,English,United States,0,1,Medicine,Health,1996,2000,2002,4,https://web.archive.org/web/20020925003323/http://journal.med.edu/,Yes,,No,, +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",,Shimane Ika Daigaku Kiyo,0387-9097,,http://lib.shimane-med.ac.jp/home/kiyo.htm,Shimane Medical University Library,,English Japanese,Japan,0,1,Medicine,Health,1999,2001,2003,2,https://web.archive.org/web/20030604181053/http://lib.shimane-med.ac.jp:80/home/kiyo.htm,Yes,,No,, +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",,Lyon Pharmaceutique (Online),1625-9823,,http://www.univ-lyon1.fr/ispb/lyon-pharma,Association des Pharmaciens - Anciens Eleves - Amis de la Faculte de Pharmacie de Lyon,,French,France,1,0,Medicine,Health,1999,2001,2004,2,https://web.archive.org/web/20040414132312/http://ispb.univ-lyon1.fr/lyon-pharma/,Yes,,No,, +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",,School of Advanced Technologies. Electronic Journal,1513-1432,,http://www.sat.ait.ac.th/ej-sat/,Asian Institute of Technology,,English,Thailand,0,1,Computers,PSM,1999,2001,2004,2,https://web.archive.org/web/20040812033245/http://www.sat.ait.ac.th/ej-sat/,Yes,,No,, +Ulrichsweb OA Journal Comparison,,Elore,,1456-3010,http://cc.joensuu.fi/~loristi/english.html,Suomen Kansantietouden Tutkijain Seura,,English | Text in Finnish | Text in Swedish,Finland,1,0,Folklore,SSH,1994,2001,2001,7,http://web.archive.org/web/20010518155429/http://cc.joensuu.fi/~loristi/,Yes,,No,, +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",,Journal of Modern Business,,,http://www.dcpress.com/jmb/jmb1999.htm,DCPress,,English,United States,0,0,Economics,SSH,1998,2002,2006,4,http://web.archive.org/web/20060513234602/http://www.dcpress.com/jmb/jmb1999.htm,Yes,,No,, +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",,Social Science Paper Publisher,1492-4064,,http://www.sspp.net,"University of Western Ontario, Department of Sociology",,English,Canada,0,1,Social Sciences,SSH,1997,2002,2003,5,http://web.archive.org/web/20030320125719/http://sspp.net/,Yes,"""SSPP.net is an informal electronic journal serving as an academic resource and opportunity for scholars who would like to find a readership for broadly sociological work""",No,, +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",,Journal of Virtual Environments,1947-8518,1947-8526,http://www.brandeis.edu/pubs/jove/,Brandeis University,,English,United States,0,1,Virtual Environments,PSM,1996,2002,2006,6,https://web.archive.org/web/20090427023738/http://www.brandeis.edu/pubs/jove/,Yes,"Seems to have relaunched and reverse-flipped in 2010, https://www.igi-global.com/journal/international-journal-virtual-personal-learning/1134. couldn't find earlier editions though. New version in The Keepers https://thekeepers.org/journals?query=Journal+of+Virtual+Environments",No,, +"Discovered During Earlier Studies (Laakso et al 2011 and Björk, Shen Laakso 2016)",Scopus OA Journal Comparison,Complexity International,1320-0682,,http://www.csu.edu.au/ci/ci.html,"Johnston Center, Charles Sturt University",,English,Australia,0,1,Complex Systems,PSM,1994,2002,2003,8,https://web.archive.org/web/20030228153727/http://www.csu.edu.au/ci/idx-volume.html,Yes,,No,, +Scopus OA Journal Comparison,,Great Lakes Geographer,1195-3071,,http://geography.ssc.uwo.ca/research/great_lakes_geographer,"Dept. of Geography, University of Western Ontario",,English,Canada,0,1,Geography,SSH,1994,2006,2014,12,https://web.archive.org/web/20141021142251/http://geography.ssc.uwo.ca:80/research/great_lakes_geographer/EditorialBoard.asp,Yes,,No,, +DOAJ Removed journals 2012-2014,,Bolgarska Rusistika,0323-9160,1313-3713,http://journals.slavica.org/index.php/bulrus/index,Society of Russian Rusists in Bulgaria,,"Russian, Bulgarian",Bulgaria,1,0,Languages and Literatures,SSH,2003,2011,2012,8,https://web.archive.org/web/20120329124730/http://journals.slavica.org/index.php/bulrus/issue/archive,Yes,,No,, +Morrison 2016,,Journal of Pharmaceutical and Bioanalytical Science,,2278-828X,www.jpbscience.com,JPB Science,,English,India,0,0,Pharmacy,LS,2012,2014,2015,2,https://web.archive.org/web/20150823060851/http://jpbscience.com/currentissue.html,Yes,,No,, +DOAJ Removed journals 2012-2014,,Research Journal of International Studies,1453-212X,,http://www.eurojournals.com/international_studies.htm,"European Journals, Inc.",,English,United Kingdom,0,0,Social Sciences --- Political Science,SSH,2006,2012,2013,6,http://web.archive.org/web/20120112032409/http://www.eurojournals.com/international_studies.htm,Yes,,No,, +DOAJ Removed journals 2012-2014,,Revue UniRcoop,1705-2165,,http://www.unircoop.org/unircoop/index.php?q=en/revue,Universit de Sherbrooke,,"French, Spanish, Portuguese, English",Canada,0,1,Social Science,SSH,2003,2006,2010,3,http://web.archive.org/web/20100410052233/http://www.unircoop.org/unircoop/index.php?q=fr/revue,Yes,,No,, +DOAJ Removed journals 2012-2014,,Ulbra e Movimento : Revista de Educao Fsica,2178-4515,,http://revista.ulbrajp.edu.br/ojs/index.php/edufisica,Centro Universitrio Luterano de Ji-Paran,,Portuguese,Brazil,0,1,Sports Science,SSH,2010,2016,2016,6,https://web.archive.org/web/20160707143843/http://www.revista.ulbrajp.edu.br/ojs/index.php/actabrasileira/issue/archive,Yes,,No,, +DOAJ 2019,,Open source science journal,2066-740X,,http://opensourcejournal.ro,Academy of Economic Sciences,,English,Romania,1,0,Computer Science,PSM,2009,2011,2014,2,https://web.archive.org/web/20140103171830/http://opensourcejournal.ro/,Yes,,No,, +DOAJ Removed journals 2012-2014,,Lianes,1776-3150,,http://www.lianes.org,Lianes Association,,"French, English",France,0,0,Sociology,SSH,2005,2007,2014,2,https://web.archive.org/web/20140216232120/http://www.lianes.org/,Yes,,No,, +Ulrichsweb OA Journal Comparison,,E- Journal of Dentistry,,0976-2663,http://www.ejournalofdentistry.com,E - Journal of Dentistry,,Text and summaries in English,India,0,0,MEDICAL SCIENCES - DENTISTRY,Health,2011,2014,2016,3,https://web.archive.org/web/20160404072039/http://ejournalofdentistry.com/home.asp,Yes,,No,, +DOAJ Removed journals 2012-2014,,Educao Profissional : Cincia e Tecnologia,1980-5594,1981-0482,http://www.revista.facsenac.edu.br/index.php?journal=edupro&page=index,Servio Nacional de Aprendizagem Comercial do Distrito Federal,,Portuguese,Brazil,0,1,Science (General) --- Education,SSH,2006,2008,2009,2,https://web.archive.org/web/20090208175115/http://revista.facsenac.com.br/index.php?journal=edupro&page=issue&op=archive,Yes,,No,, +DOAJ Removed journals 2012-2014,,eJournal of Biological Sciences,2076-9946,2076-9954,http://www.ejarr.com/ejbs/index.aspx,EJARR Publishing,,English,Pakistan,0,0,Biology,LS,2009,2013,2017,4,https://web.archive.org/web/20170714182123/http://www.ejarr.com/ejbs/Volume7.aspx,Yes,,No,, +DOAJ Removed journals 2012-2014,,Ciencias Sociales Online,,0718-1671,http://www.uvm.cl/csonline,University of Via,,"Spanish, English, Portuguese",Chile,0,1,Social Sciences,SSH,2004,2008,2016,4,http://web.archive.org/web/20160208024403/http://www.uvm.cl/csonline/,Yes,,No,, +DOAJ Removed journals 2012-2014,,"Electronic Journal of Environmental, Agricultural and Food Chemistry",,1579-4377,http://ejeafche.uvigo.es/,Universidade de Vigo,,English,Spain,0,1,Nutrition and Food Sciences --- Agriculture (General),LS,2002,2011,2012,9,"https://web.archive.org/web/20120629231958/http://ejeafche.uvigo.es/component/option,com_docman/Itemid,33/",Yes,,No,, +Ulrichsweb OA Journal Comparison,,Accion Pedagogica,1315-401X,2244-8756,http://www.saber.ula.ve/accionpe/,"Universidad de los Andes, Tachira",,Spanish,"Venezuela, RB",0,1,Education,SSH,1999,2017,2018,18,https://web.archive.org/web/20181023220301/http://www.saber.ula.ve:80/handle/123456789/4113,Yes,,No,, +DOAJ Removed journals 2012-2014,,More than Thought : a Scholarly Literary Journal Devoted to Consciousness,2153-8565,,http://morethanthought.community.officelive.com,More than Thought,,"English, Portuguese, Spanish",United States,0,0,Languages and Literatures,SSH,2009,2011,2012,2,https://web.archive.org/web/20120516102935/http://morethanthought.community.officelive.com/default.aspx,Yes,"Website says: ""All articles for previous issues are in the process of being uploaded to Project Muse where they will continue being available in perpetuity.""",No,, diff --git a/tests/files/vanished_journals_inactive.csv b/tests/files/vanished_journals_inactive.csv new file mode 100644 index 0000000..cedc900 --- /dev/null +++ b/tests/files/vanished_journals_inactive.csv @@ -0,0 +1,25 @@ +Source,Title,Identifier,Publisher,Comment,Language,ISSN,EISSN,Keyword,Start Year,End Year,Added on date,Subjects,Country,Publication fee,Further Information +DOAJ Removed journals 2012-2014,Journal of Cardiovascular Disease Research,http://www.jcdronline.com,Medknow Publications,OA but Inactive,English,0975-3583,0976-2833,"cardiology, cardiovascular diseases",2010,,"40,862,531",Cardiovascular,United States,Yes,http://www.jcdronline.com/contributors.asp#pp1 +DOAJ Removed journals 2012-2015,International Journal of Advanced Computer and Mathematical Sciences,http://bipublication.com/IJCSA.html,BioIT InternationalsJournals,OA but Inactive,English,2230-9624,,mathematics,2010,,"408,615,589",Computer Science,India,Conditional,http://bipublication.com/auth-guid.html +DOAJ Removed journals 2012-2016,Medical Journal of the Islamic Republic of Iran,http://mjiri.tums.ac.ir,Tehran University of Medical Sciences,OA but Inactive,English,1016-1430,2251-6840,public health,2008,,"408,615,094",Medicine (General),Iran,Conditional,http://mjiri.tums.ac.ir/page.php?slct_pg_id=75&sid=1&slc_lang=en +DOAJ Removed journals 2012-2017,Perspectiva Interdisciplinaria de Msica,http://www.revistas.unam.mx/index.php/pim,Universidad Nacional Autnoma de Mxico,OA but Inactive,Spanish,1870-5758,,"musicology, ethnomusicology, music education",2006,,"408,575,429",Music,Mexico,No,http://www.revistas.unam.mx/index.php/pim/about/submissions#onlineSubmissions +DOAJ Removed journals 2012-2018,Gnosis,https://artsciweb.concordia.ca/ojs/index.php/gnosis/index,Concordia University,OA but Inactive,"English, French",1927-5277,,"ethics, logic",2000,,"408,574,468",Philosophy,Canada,No,https://artsciweb.concordia.ca/ojs/index.php/gnosis/about/submissions#authorGuidelines +DOAJ Removed journals 2012-2019,Journal of Pakistan Medical Students,http://jpmsonline.com/,Dow University of Health Sciences,OA but Inactive,English,2222-2332,,"medical sciences, public health",2011,,"408,505,432",Medicine (General),Pakistan,No,http://www.jpmsonline.com/authors +DOAJ Removed journals 2012-2020,International Journal of Pharmacy and Biomedical Sciences,http://www.pharmainterscience.com/,PharmaInterScience Publisher,OA but Inactive,English,0976-5263,,"pharmacognosy, pharmacology, immunology, pharmaceutical analysis",2010,,"408,156,376",Pharmacy and materia medica,India,Yes,http://www.pharmainterscience.com/IJPBS-Author-Instructions.html +DOAJ Removed journals 2012-2021,Revista de la Asociacin Argentina de Sedimentologa,http://www.scielo.org.ar/scielo.php?script=sci_serial&pid=1853-6360&lng=es&nrm=iso,Asociacin Argentina de Sedimentologa,OA but Inactive,Spanish,0328-1159,1853-6360,sedimentology,2003,2004,"408,094,778",Earth Sciences --- Geology,Argentina,No,http://www.scielo.org.ar/revistas/raas/einstruc.htm +DOAJ Removed journals 2012-2022,International Journal of Computer Information Systems,http://www.svpublishers.co.uk/#/ijcis/4547309847,Silicon Valley Publishers,OA but Inactive,English,2229-5208,,"computational mathematics, computer engineering",2010,,"408,076,788",Computer Science,India,No,http://www.svpublishers.co.uk/#/ijcis-authors-instruction/4547330633 +DOAJ Removed journals 2012-2166,International Journal of Multicultural Societies,http://www.unesco.org/shs/ijms,UNESCO,OA but Inactive,"English, French",1817-4574,,"multiculturalism, diversity, migration, minority rights, social sciences",1999,2009,"398,846,001",Social Sciences,France,No,http://portal.unesco.org/shs/en/ev.php-URL_ID=2553&URL_DO=DO_TOPIC&URL_SECTION=201.html +Scopus OA Journal Comparison,"African Journal of Traditional, Complementary and Alternative Medicines",,African Networks on Ethnomedicines,OA but Inactive,,0189-6016,-,,,,,,Nigeria,, +Scopus OA Journal Comparison,Western Criminology Review,,Western Society of Criminology,OA but Inactive,,1096-4886,-,,,,,,United States,, +Ulrichsweb OA Journal Comparison,Open Access Animal Physiology,http://www.dovepress.com/open-access-animal-physiology-journal,Dove Medical Press Ltd.,OA but Inactive,,1179-2779,,"BIOLOGY - PHYSIOLOGY, VETERINARY SCIENCE",2010,,,,United Kingdom,, +DOAJ Removed journals 2012-2240,Open Applied Mathematics Journal,http://www.bentham.org/open/toamj/,Bentham open,OA but Inactive,English,1874-1142,,"applied mathematics, equations",2007,,"395,345,632",Mathematics,United States,Yes,http://www.benthamscience.com/open/toamj/MSandI.htm +Ulrichsweb OA Journal Comparison,The Open Applied Physics Journal,http://www.bentham.org/open/toapj/EBM.htm,Bentham Open,OA but Inactive,,1874-1835,,PHYSICS,2008,,,,Netherlands,, +DOAJ Removed journals 2012-2360,European Journal of Scientific Research,http://www.eurojournals.com/EJSR.htm,"European Journals, Inc.",OA but Inactive,English,1450-216X,,"applied sciences, theoretical sciences",2005,,"38,569,465",Science (General),United Kingdom,Yes, +DOAJ Removed journals 2012-2025,"Revista Sapiens : Histria, Patrimnio e Arqueologia",http://www.revistasapiens.org/,Revista Sapiens,OA but Inactive,Portuguese,1647-1660,,"archaeology, heritage",2008,,"407,885,193",History,Portugal,No,http://www.revistasapiens.org/enviodeartigos.html +Ulrichsweb OA Journal Comparison,"Journal of Information, Information Technology, and Organizations",http://jiito.org,Informing Science Institute,OA but Inactive,,1557-1327,,LIBRARY AND INFORMATION SCIENCES,2006,,,,United States,, +DOAJ Removed journals 2012-2074,CORELL : Computer Resources for Language Learning,http://www.ucam.edu/corell,Universidad Catlica San Antonio de Murcia,OA but Inactive,English,1988-1746,,"language education, foreign language learning, computer resources, computer-mediated communication",2007,,"404,595,258",Education --- Languages and Literatures,Spain,No,http://www.ucam.edu/corell/submissions +Ulrichsweb OA Journal Comparison,Ars Disputandi,http://www.arsdisputandi.org/,"Igitur, Utrecht Publishing & Archiving Services",OA but Inactive,,1566-5399,,"PHILOSOPHY, RELIGIONS AND THEOLOGY",2000,,,,Netherlands,, +Ulrichsweb OA Journal Comparison,The Open Signal Processing Journal,http://www.bentham.org/open/tosigpj/index.htm,Bentham Open,OA but Inactive,,1876-8253,,ENGINEERING - ELECTRICAL ENGINEERING,2008,,,,Netherlands,, +DOAJ Removed journals 2012-2316,Ciencias,http://redalyc.uaemex.mx/src/inicio/HomRevRed.jsp?iCveEntRev=644,Universidad Nacional Autnoma de Mxico,OA but Inactive,Spanish,0187-6376,,"social sciences, natural sciences",2000,,"388,595,476",Science (General),Mexico,Yes,http://redalyc.uaemex.mx/redalyc/media/normas/normcol644.html +Ulrichsweb OA Journal Comparison,Journal of Social Sciences,http://www.krepublishers.com/02-Journals/JSS/JSS-00-0-000-000-1997-Web/JSS-00-0-000-000-1997-1-Cover.htm,Kamla-Raj Enterprises,OA but Inactive,,null,,SOCIAL SCIENCES: COMPREHENSIVE WORKS,2004,,,,India,, +DOAJ Removed journals 2012-2101,Open Arthritis Journal,http://www.bentham.org/open/toarthj/index.htm,Bentham open,OA but Inactive,English,1876-5394,,arthritis therapy,2010,,"403,405,405",Internal medicine,United States,Yes,http://www.benthamscience.com/open/toarthj/MSandI.htm |