From 4018c62adc32a88a76f7bd54f1003d58a29fe120 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 23 Jun 2020 23:15:17 -0700 Subject: updated report HTML --- reports/report.2020-06-24.html | 1172 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1172 insertions(+) create mode 100644 reports/report.2020-06-24.html diff --git a/reports/report.2020-06-24.html b/reports/report.2020-06-24.html new file mode 100644 index 0000000..4d9636e --- /dev/null +++ b/reports/report.2020-06-24.html @@ -0,0 +1,1172 @@ + + +

Chocula Journal Aggregate Stats

+
+ + + + + + +
datetime('now')
2020-06-24 06:07:59
QUERY: SELECT datetime('now');
+
+ + + + + + + + + + +
seqnamefile
0main/home/bnewbold/code/chocula/chocula.sqlite
QUERY: PRAGMA database_list;
+

Overview

+

Top publishers by journal count:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
publisherCOUNT(*)
50438
Elsevier4896
Springer3180
Taylor & Francis3048
John Wiley & Sons, Inc2322
SAGE Publications1441
J-STAGE1405
Peter Lang International Academic Publishers1356
SciELO1187
Informa UK (Taylor & Francis)737
Springer-Verlag706
Cambridge University Press598
Walter de Gruyter GmbH554
Georg Thieme Verlag KG515
OMICS Publishing Group497
IEEE, Inc482
JSTOR471
Medknow Publications463
Oxford University Press461
Hindawi456
Bentham Science445
De Gruyter Open Sp. z o.o.440
Wolters Kluwer Health427
CAIRN416
Inderscience396
QUERY: SELECT publisher, COUNT(*)
+FROM journal
+GROUP BY publisher
+ORDER BY COUNT(*) DESC
+LIMIT 25;
+

Top countries by number of journals:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
countryCOUNT(*)
us31063
11775
id10203
de8221
in7489
gb7357
fr6947
uk5989
nl5579
br4779
QUERY: SELECT  country,
+COUNT(*)
+FROM journal
+GROUP BY country
+ORDER BY COUNT(*) DESC
+LIMIT 10;
+

.. by number of papers:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
countryCOUNT(*)SUM(release_count)
us3106332178939
gb735713514834
nl557910994203
de82217537722
jp39245534859
uk59894689013
fr69472205320
ch21841956771
ru33211437805
in74891193631
QUERY: SELECT  country,
+COUNT(*),
+SUM(release_count)
+FROM journal
+GROUP BY country
+ORDER BY SUM(release_count) DESC
+LIMIT 10;
+

Top languages by number of journals:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
langCOUNT(*)
119385
en33483
fr2545
es1980
pt1253
id807
fa705
de687
ja627
ru454
QUERY: SELECT  lang,
+COUNT(*)
+FROM journal
+GROUP BY lang
+ORDER BY COUNT(*) DESC
+LIMIT 10;
+

... by number of papers:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
langCOUNT(*)SUM(release_count)
en3348352106736
11938539633327
de6871080227
ja627701062
fr2545459624
es1980326190
pt1253259440
ru454216062
it365107777
id80763140
QUERY: SELECT  lang,
+COUNT(*),
+SUM(release_count)
+FROM journal
+GROUP BY lang
+ORDER BY SUM(release_count) DESC
+LIMIT 10;
+

Fatcat Fulltext Coverage

+

Fulltext coverage by publisher type:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
publisher_typeAVG(ia_frac)AVG(preserved_frac)journal_countpaper_count
big50.203831409887110960.74789233637749961434138968407
society0.382857165843233150.52559480974346241159417479369
0.28929333151877260.394896558685083947644917433767
unipress0.52915211592292480.715613385774320381565979807
commercial0.332234112662403660.67929442429664259005783703
longtail0.70211711691410040.7512778812541995428175541048
repository0.124114373972027970.241226554041370837651032655
scielo0.82340536986080260.85326575178344941588931427
other0.178193300039085340.6325501923587422961847590
archive0.32746602291231760.9870091603652298545733596
oa0.77034125081611280.80515629630222131854665337
QUERY: SELECT  publisher_type,
+AVG(ia_frac),
+AVG(preserved_frac),
+COUNT(*) AS journal_count,
+SUM(release_count) AS paper_count
+FROM journal
+GROUP BY publisher_type
+ORDER BY SUM(release_count) DESC;
+

Top publishers with very little coverage:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
publisherjournal_countAVG(ia_frac)
96040.0017576670094484276
Elsevier18940.01719793308556602
Taylor & Francis10340.026333463256277043
J-STAGE10000.008786577145385244
John Wiley & Sons, Inc7580.021752551474386456
Informa UK (Taylor & Francis)5830.010076744435949143
SAGE Publications5660.018548379050446432
Springer-Verlag3810.015239356601104427
Springer3520.025469114384971014
JSTOR2710.01053436333975769
QUERY: SELECT  publisher,
+COUNT(*) AS journal_count,
+AVG(ia_frac)
+FROM journal
+WHERE ia_frac < 0.05
+GROUP BY publisher
+ORDER BY journal_count DESC
+LIMIT 10;
+

Amount of fulltext by SHERPA/ROMEO journal color::

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
sherpa_colorSUM(ia_count)
8179318
blue1068696
green10279336
white731881
yellow2489843
QUERY: SELECT  sherpa_color,
+SUM(ia_count)
+FROM journal
+GROUP BY sherpa_color;
+

Journal Homepages

+

Homepage URL counts:

+
+ + + + + + + + +
unique_urlsjournals_with_hompages
188481118838
QUERY: SELECT COUNT(DISTINCT surt) as unique_urls, COUNT(DISTINCT issnl) as journals_with_hompages FROM homepage;
+

Journals with the most homepage URLs:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
issnlCOUNT(*)
0036-643921
1487-061416
2375-038316
2374-403015
0097-632614
0749-405X13
1521-909713
0009-700412
0030-707612
0717-554X12
QUERY: SELECT  issnl,
+COUNT(*)
+FROM homepage
+GROUP BY issnl
+ORDER BY COUNT(*) DESC
+LIMIT 10;
+

Top/redundant URLs and SURTs:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
surtCOUNT(*)
com,indianjournals)/80
com,hindawi)/71
au,com,informit,search)/search;res=apaft64
com,umi)/pqdauto51
org,rsc,pubs)/en/ebooks50
com,umi)/proquest48
org,ieee,ieeexplore)/xplore/conferences.jsp40
org,omicsonline)/37
com,idealibrary)/36
com,wiley,interscience)/31
QUERY: SELECT  surt,
+COUNT(*)
+FROM homepage
+GROUP BY surt
+ORDER BY COUNT(*) DESC
+LIMIT 10;
+

What is the deal with all those "benjamins" URLs?

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
publishername
John Benjamins Publishing CompanyNOWELE
Studia Uralo-Altaica
John Benjamins Publishing CompanyLanguage Problems and Language Planning
John Benjamins Publishing CompanyLingvisticæ investigationes
John Benjamins Publishing CompanyLinguistics of the TIbeto-Burman Area
John Benjamins Publishing CompanyPragmatics & Cognition
John Benjamins Publishing CompanyTerminology
John Benjamins Publishing CompanyWritten Language & Literacy
FORUM: Revue internationale d?interprétation et de traduction / International Journal of Interpretation and Translation
John Benjamins Publishing CompanyEnglish Text Construction
John Benjamins Publishing CompanyConstructions and Frames
John Benjamins Publishing CompanyPragmatics and Society
John Benjamins Publishing CompanyTranslation and Interpreting Studies
John Benjamins Publishing CompanyLanguage and Dialogue
John Benjamins Publishing CompanyMetaphor in Language, Cognition, and Communication
Hamburg Studies on Linguistic Diversity
John Benjamins Publishing CompanyTranslation Spaces
Studies in Arabic Linguistics
John Benjamins Publishing CompanyJournal of Immersion and Content-Based Language Education (JICB)
Children's Literature, Culture, and Cognition
John Benjamins Publishing CompanyJournal of Language Aggression and Conflict
FILLM Studies in Languages and Literatures
Advances in Historical Sociolinguistics
John Benjamins Publishing CompanyLinguistic Landscape
John Benjamins Publishing CompanyInternational Journal of Learner Corpus Research
John Benjamins Publishing CompanyJournal of Second Language Pronunciation
ITL - International Journal of Applied Linguistics
John Benjamins Publishing CompanyCognitive Individual Differences in Second Language Processing and Acquisition
John Benjamins Publishing CompanyFORUM
John Benjamins Publishing CompanyStudies in Germanic Linguistics
QUERY: SELECT  publisher,
+name
+FROM journal
+LEFT JOIN homepage ON journal.issnl = homepage.issnl
+WHERE homepage.surt = 'com,benjamins)/';
+

Domains that block us:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
domainjournal_homepagesSUM(blocked)
jstor.org76696575
wiley.com4285229
brill.nl234161
bentham.org152150
annualreviews.org174103
euppublishing.com6254
hogrefe.com7251
icevirtuallibrary.com4637
uem.br4936
brepolsonline.net4734
futuremedicine.com4031
siam.org6723
vr-elibrary.de2417
liverpooluniversitypress.co.uk2615
rodopi.nl2015
nrcresearchpress.com1814
aiaa.org3410
guilfordjournals.com1410
ingentaconnect.com4179
univie.ac.at549
QUERY: SELECT  domain,
+COUNT(*) as journal_homepages,
+SUM(blocked)
+FROM homepage
+GROUP BY domain
+ORDER BY SUM(blocked) DESC
+LIMIT 20;
+

Top duplicated domains:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
urlCOUNT(*)
http://www.indianjournals.com/73
http://www.hindawi.com/70
http://search.informit.com.au/search;res=APAFT60
http://www.umi.com/proquest46
http://www.umi.com/pqdauto/45
http://ieeexplore.ieee.org/Xplore/conferences.jsp40
http://omicsonline.org/36
http://www.idealibrary.com/36
http://ieeexplore.ieee.org/xpl/conferences.jsp24
http://www.metapress.com/24
http://www.randspublications.org/22
http://www.studia.ubbcluj.ro/serii/index_en.html22
http://find.galegroup.com/ips/publicationSearch.do21
http://jurnal.unimed.ac.id/21
http://www.bioinfo.in/journals.php20
http://www.interscience.wiley.com/20
http://www.commongroundpublishing.com/19
http://www.haworthpress.com/19
http://www.heinonline.org/19
http://www.infosci-journals.com/19
QUERY: SELECT  url,
+COUNT(*)
+FROM homepage
+GROUP BY url
+ORDER BY COUNT(*) DESC
+LIMIT 20;
+

Number of journals with a homepage that points to web.archive.org or archive.org:

+
+ + + + + + +
COUNT(DISTINCT issnl)
1453
QUERY: SELECT COUNT(DISTINCT issnl)
+FROM homepage
+WHERE domain = 'archive.org';
+

Top publishers that have journals in wayback:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
publisherCOUNT(*)
653
EDP Sciences23
CAIRN18
OpenEdition18
Elsevier6
Springer6
PERSEE Program5
Peer Community In5
Institut de recherche et d'histoire des textes (France)4
San Lucas Medical4
QUERY: SELECT  publisher,
+COUNT(*)
+FROM journal
+LEFT JOIN homepage ON journal.issnl = homepage.issnl
+WHERE homepage.domain = 'archive.org'
+GROUP BY journal.publisher
+ORDER BY COUNT(*) DESC
+LIMIT 10;
+

Top publishers by number of journals missing a homepage:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
publisherCOUNT(*)
21472
Peter Lang International Academic Publishers1270
Elsevier866
J-STAGE863
Egypts Presidential Specialized Council for Education and Scientific Research340
Georg Thieme Verlag KG288
Al Manhal FZ, LLC217
Informa UK (Taylor & Francis)202
Springer-Verlag156
ELSEVIER LTD145
Inderscience122
African Journals Online121
Diva Enterprises Private Limited119
PERSEE Program118
Sabinet109
SAGE Publications103
Brill99
Superintendent of Government Documents99
Taylor & Francis98
Bentham Science94
QUERY: SELECT  publisher,
+COUNT(*)
+FROM journal
+WHERE any_homepage=0
+GROUP BY publisher
+ORDER BY COUNT(*) DESC
+LIMIT 20;
+
-- cgit v1.2.3