datetime('now')
2020-06-24 06:07:59
QUERY: SELECT datetime('now');
seq
name
file
0
main
/home/bnewbold/code/chocula/chocula.sqlite
QUERY: PRAGMA database_list;
Top publishers by journal count:
publisher
COUNT(*)
50438
Elsevier
4896
Springer
3180
Taylor & Francis
3048
John Wiley & Sons, Inc
2322
SAGE Publications
1441
J-STAGE
1405
Peter Lang International Academic Publishers
1356
SciELO
1187
Informa UK (Taylor & Francis)
737
Springer-Verlag
706
Cambridge University Press
598
Walter de Gruyter GmbH
554
Georg Thieme Verlag KG
515
OMICS Publishing Group
497
IEEE, Inc
482
JSTOR
471
Medknow Publications
463
Oxford University Press
461
Hindawi
456
Bentham Science
445
De Gruyter Open Sp. z o.o.
440
Wolters Kluwer Health
427
CAIRN
416
Inderscience
396
QUERY: SELECT publisher, COUNT(*)
FROM journal
GROUP BY publisher
ORDER BY COUNT(*) DESC
LIMIT 25;
Top countries by number of journals:
country
COUNT(*)
us
31063
11775
id
10203
de
8221
in
7489
gb
7357
fr
6947
uk
5989
nl
5579
br
4779
QUERY: SELECT country,
COUNT(*)
FROM journal
GROUP BY country
ORDER BY COUNT(*) DESC
LIMIT 10;
.. by number of papers:
country
COUNT(*)
SUM(release_count)
us
31063
32178939
gb
7357
13514834
nl
5579
10994203
de
8221
7537722
jp
3924
5534859
uk
5989
4689013
fr
6947
2205320
ch
2184
1956771
ru
3321
1437805
in
7489
1193631
QUERY: SELECT country,
COUNT(*),
SUM(release_count)
FROM journal
GROUP BY country
ORDER BY SUM(release_count) DESC
LIMIT 10;
Top languages by number of journals:
lang
COUNT(*)
119385
en
33483
fr
2545
es
1980
pt
1253
id
807
fa
705
de
687
ja
627
ru
454
QUERY: SELECT lang,
COUNT(*)
FROM journal
GROUP BY lang
ORDER BY COUNT(*) DESC
LIMIT 10;
... by number of papers:
lang
COUNT(*)
SUM(release_count)
en
33483
52106736
119385
39633327
de
687
1080227
ja
627
701062
fr
2545
459624
es
1980
326190
pt
1253
259440
ru
454
216062
it
365
107777
id
807
63140
QUERY: SELECT lang,
COUNT(*),
SUM(release_count)
FROM journal
GROUP BY lang
ORDER BY SUM(release_count) DESC
LIMIT 10;
Fulltext coverage by publisher type:
publisher_type
AVG(ia_frac)
AVG(preserved_frac)
journal_count
paper_count
big5
0.20383140988711096
0.7478923363774996
14341
38968407
society
0.38285716584323315
0.5255948097434624
11594
17479369
0.2892933315187726
0.39489655868508394
76449
17433767
unipress
0.5291521159229248
0.7156133857743203
8156
5979807
commercial
0.33223411266240366
0.679294424296642
5900
5783703
longtail
0.7021171169141004
0.7512778812541995
42817
5541048
repository
0.12411437397202797
0.24122655404137083
765
1032655
scielo
0.8234053698608026
0.8532657517834494
1588
931427
other
0.17819330003908534
0.6325501923587422
961
847590
archive
0.3274660229123176
0.9870091603652298
545
733596
oa
0.7703412508161128
0.8051562963022213
1854
665337
QUERY: SELECT publisher_type,
AVG(ia_frac),
AVG(preserved_frac),
COUNT(*) AS journal_count,
SUM(release_count) AS paper_count
FROM journal
GROUP BY publisher_type
ORDER BY SUM(release_count) DESC;
Top publishers with very little coverage:
publisher
journal_count
AVG(ia_frac)
9604
0.0017576670094484276
Elsevier
1894
0.01719793308556602
Taylor & Francis
1034
0.026333463256277043
J-STAGE
1000
0.008786577145385244
John Wiley & Sons, Inc
758
0.021752551474386456
Informa UK (Taylor & Francis)
583
0.010076744435949143
SAGE Publications
566
0.018548379050446432
Springer-Verlag
381
0.015239356601104427
Springer
352
0.025469114384971014
JSTOR
271
0.01053436333975769
QUERY: SELECT publisher,
COUNT(*) AS journal_count,
AVG(ia_frac)
FROM journal
WHERE ia_frac < 0.05
GROUP BY publisher
ORDER BY journal_count DESC
LIMIT 10;
Amount of fulltext by SHERPA/ROMEO journal color::
sherpa_color
SUM(ia_count)
8179318
blue
1068696
green
10279336
white
731881
yellow
2489843
QUERY: SELECT sherpa_color,
SUM(ia_count)
FROM journal
GROUP BY sherpa_color;
Homepage URL counts:
unique_urls
journals_with_hompages
188481
118838
QUERY: SELECT COUNT(DISTINCT surt) as unique_urls, COUNT(DISTINCT issnl) as journals_with_hompages FROM homepage;
Journals with the most homepage URLs:
issnl
COUNT(*)
0036-6439
21
1487-0614
16
2375-0383
16
2374-4030
15
0097-6326
14
0749-405X
13
1521-9097
13
0009-7004
12
0030-7076
12
0717-554X
12
QUERY: SELECT issnl,
COUNT(*)
FROM homepage
GROUP BY issnl
ORDER BY COUNT(*) DESC
LIMIT 10;
Top/redundant URLs and SURTs:
surt
COUNT(*)
com,indianjournals)/
80
com,hindawi)/
71
au,com,informit,search)/search;res=apaft
64
com,umi)/pqdauto
51
org,rsc,pubs)/en/ebooks
50
com,umi)/proquest
48
org,ieee,ieeexplore)/xplore/conferences.jsp
40
org,omicsonline)/
37
com,idealibrary)/
36
com,wiley,interscience)/
31
QUERY: SELECT surt,
COUNT(*)
FROM homepage
GROUP BY surt
ORDER BY COUNT(*) DESC
LIMIT 10;
What is the deal with all those "benjamins" URLs?
publisher
name
John Benjamins Publishing Company
NOWELE
Studia Uralo-Altaica
John Benjamins Publishing Company
Language Problems and Language Planning
John Benjamins Publishing Company
Lingvisticæ investigationes
John Benjamins Publishing Company
Linguistics of the TIbeto-Burman Area
John Benjamins Publishing Company
Pragmatics & Cognition
John Benjamins Publishing Company
Terminology
John Benjamins Publishing Company
Written Language & Literacy
FORUM: Revue internationale d?interprétation et de traduction / International Journal of Interpretation and Translation
John Benjamins Publishing Company
English Text Construction
John Benjamins Publishing Company
Constructions and Frames
John Benjamins Publishing Company
Pragmatics and Society
John Benjamins Publishing Company
Translation and Interpreting Studies
John Benjamins Publishing Company
Language and Dialogue
John Benjamins Publishing Company
Metaphor in Language, Cognition, and Communication
Hamburg Studies on Linguistic Diversity
John Benjamins Publishing Company
Translation Spaces
Studies in Arabic Linguistics
John Benjamins Publishing Company
Journal of Immersion and Content-Based Language Education (JICB)
Children's Literature, Culture, and Cognition
John Benjamins Publishing Company
Journal of Language Aggression and Conflict
FILLM Studies in Languages and Literatures
Advances in Historical Sociolinguistics
John Benjamins Publishing Company
Linguistic Landscape
John Benjamins Publishing Company
International Journal of Learner Corpus Research
John Benjamins Publishing Company
Journal of Second Language Pronunciation
ITL - International Journal of Applied Linguistics
John Benjamins Publishing Company
Cognitive Individual Differences in Second Language Processing and Acquisition
John Benjamins Publishing Company
FORUM
John Benjamins Publishing Company
Studies in Germanic Linguistics
QUERY: SELECT publisher,
name
FROM journal
LEFT JOIN homepage ON journal.issnl = homepage.issnl
WHERE homepage.surt = 'com,benjamins)/';
Domains that block us:
domain
journal_homepages
SUM(blocked)
jstor.org
7669
6575
wiley.com
4285
229
brill.nl
234
161
bentham.org
152
150
annualreviews.org
174
103
euppublishing.com
62
54
hogrefe.com
72
51
icevirtuallibrary.com
46
37
uem.br
49
36
brepolsonline.net
47
34
futuremedicine.com
40
31
siam.org
67
23
vr-elibrary.de
24
17
liverpooluniversitypress.co.uk
26
15
rodopi.nl
20
15
nrcresearchpress.com
18
14
aiaa.org
34
10
guilfordjournals.com
14
10
ingentaconnect.com
417
9
univie.ac.at
54
9
QUERY: SELECT domain,
COUNT(*) as journal_homepages,
SUM(blocked)
FROM homepage
GROUP BY domain
ORDER BY SUM(blocked) DESC
LIMIT 20;
Top duplicated domains:
QUERY: SELECT url,
COUNT(*)
FROM homepage
GROUP BY url
ORDER BY COUNT(*) DESC
LIMIT 20;
Number of journals with a homepage that points to web.archive.org or archive.org:
COUNT(DISTINCT issnl)
1453
QUERY: SELECT COUNT(DISTINCT issnl)
FROM homepage
WHERE domain = 'archive.org';
Top publishers that have journals in wayback:
publisher
COUNT(*)
653
EDP Sciences
23
CAIRN
18
OpenEdition
18
Elsevier
6
Springer
6
PERSEE Program
5
Peer Community In
5
Institut de recherche et d'histoire des textes (France)
4
San Lucas Medical
4
QUERY: SELECT publisher,
COUNT(*)
FROM journal
LEFT JOIN homepage ON journal.issnl = homepage.issnl
WHERE homepage.domain = 'archive.org'
GROUP BY journal.publisher
ORDER BY COUNT(*) DESC
LIMIT 10;
Top publishers by number of journals missing a homepage:
publisher
COUNT(*)
21472
Peter Lang International Academic Publishers
1270
Elsevier
866
J-STAGE
863
Egypts Presidential Specialized Council for Education and Scientific Research
340
Georg Thieme Verlag KG
288
Al Manhal FZ, LLC
217
Informa UK (Taylor & Francis)
202
Springer-Verlag
156
ELSEVIER LTD
145
Inderscience
122
African Journals Online
121
Diva Enterprises Private Limited
119
PERSEE Program
118
Sabinet
109
SAGE Publications
103
Brill
99
Superintendent of Government Documents
99
Taylor & Francis
98
Bentham Science
94
QUERY: SELECT publisher,
COUNT(*)
FROM journal
WHERE any_homepage=0
GROUP BY publisher
ORDER BY COUNT(*) DESC
LIMIT 20;