aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 23:15:17 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 23:15:17 -0700
commit4018c62adc32a88a76f7bd54f1003d58a29fe120 (patch)
treeaeb5a55f42c66e62150f31181fbb407e73914bfc
parent66ff1990d81bfc461c4cbbcc46278f785c4f273c (diff)
downloadchocula-4018c62adc32a88a76f7bd54f1003d58a29fe120.tar.gz
chocula-4018c62adc32a88a76f7bd54f1003d58a29fe120.zip
updated report HTML
-rw-r--r--reports/report.2020-06-24.html1172
1 files changed, 1172 insertions, 0 deletions
diff --git a/reports/report.2020-06-24.html b/reports/report.2020-06-24.html
new file mode 100644
index 0000000..4d9636e
--- /dev/null
+++ b/reports/report.2020-06-24.html
@@ -0,0 +1,1172 @@
+<!--
+This template can be "executed" to generate an HTML report page using the
+`sqlite-notebook` tool.
+-->
+
+<h1>Chocula Journal Aggregate Stats</h1>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>datetime('now')</th>
+</tr></thead>
+<tr>
+ <td>2020-06-24 06:07:59</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT datetime('now');</pre>
+<br></code></div><div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>seq</th>
+ <th>name</th>
+ <th>file</th>
+</tr></thead>
+<tr>
+ <td>0</td>
+ <td>main</td>
+ <td>/home/bnewbold/code/chocula/chocula.sqlite</td>
+</tr>
+</table><pre><b>QUERY:</b> PRAGMA database_list;</pre>
+<br></code></div><h2>Overview</h2>
+<p>Top publishers by journal count:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>publisher</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td></td>
+ <td>50438</td>
+</tr>
+<tr>
+ <td>Elsevier</td>
+ <td>4896</td>
+</tr>
+<tr>
+ <td>Springer</td>
+ <td>3180</td>
+</tr>
+<tr>
+ <td>Taylor & Francis</td>
+ <td>3048</td>
+</tr>
+<tr>
+ <td>John Wiley & Sons, Inc</td>
+ <td>2322</td>
+</tr>
+<tr>
+ <td>SAGE Publications</td>
+ <td>1441</td>
+</tr>
+<tr>
+ <td>J-STAGE</td>
+ <td>1405</td>
+</tr>
+<tr>
+ <td>Peter Lang International Academic Publishers</td>
+ <td>1356</td>
+</tr>
+<tr>
+ <td>SciELO</td>
+ <td>1187</td>
+</tr>
+<tr>
+ <td>Informa UK (Taylor & Francis)</td>
+ <td>737</td>
+</tr>
+<tr>
+ <td>Springer-Verlag</td>
+ <td>706</td>
+</tr>
+<tr>
+ <td>Cambridge University Press</td>
+ <td>598</td>
+</tr>
+<tr>
+ <td>Walter de Gruyter GmbH</td>
+ <td>554</td>
+</tr>
+<tr>
+ <td>Georg Thieme Verlag KG</td>
+ <td>515</td>
+</tr>
+<tr>
+ <td>OMICS Publishing Group</td>
+ <td>497</td>
+</tr>
+<tr>
+ <td>IEEE, Inc</td>
+ <td>482</td>
+</tr>
+<tr>
+ <td>JSTOR</td>
+ <td>471</td>
+</tr>
+<tr>
+ <td>Medknow Publications</td>
+ <td>463</td>
+</tr>
+<tr>
+ <td>Oxford University Press</td>
+ <td>461</td>
+</tr>
+<tr>
+ <td>Hindawi</td>
+ <td>456</td>
+</tr>
+<tr>
+ <td>Bentham Science</td>
+ <td>445</td>
+</tr>
+<tr>
+ <td>De Gruyter Open Sp. z o.o.</td>
+ <td>440</td>
+</tr>
+<tr>
+ <td>Wolters Kluwer Health</td>
+ <td>427</td>
+</tr>
+<tr>
+ <td>CAIRN</td>
+ <td>416</td>
+</tr>
+<tr>
+ <td>Inderscience</td>
+ <td>396</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT publisher, COUNT(*)
+FROM journal
+GROUP BY publisher
+ORDER BY COUNT(*) DESC
+LIMIT 25;</pre>
+<br></code></div><p>Top countries by number of journals:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>country</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td>us</td>
+ <td>31063</td>
+</tr>
+<tr>
+ <td></td>
+ <td>11775</td>
+</tr>
+<tr>
+ <td>id</td>
+ <td>10203</td>
+</tr>
+<tr>
+ <td>de</td>
+ <td>8221</td>
+</tr>
+<tr>
+ <td>in</td>
+ <td>7489</td>
+</tr>
+<tr>
+ <td>gb</td>
+ <td>7357</td>
+</tr>
+<tr>
+ <td>fr</td>
+ <td>6947</td>
+</tr>
+<tr>
+ <td>uk</td>
+ <td>5989</td>
+</tr>
+<tr>
+ <td>nl</td>
+ <td>5579</td>
+</tr>
+<tr>
+ <td>br</td>
+ <td>4779</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT country,
+COUNT(*)
+FROM journal
+GROUP BY country
+ORDER BY COUNT(*) DESC
+LIMIT 10;</pre>
+<br></code></div><p>.. by number of papers:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>country</th>
+ <th>COUNT(*)</th>
+ <th>SUM(release_count)</th>
+</tr></thead>
+<tr>
+ <td>us</td>
+ <td>31063</td>
+ <td>32178939</td>
+</tr>
+<tr>
+ <td>gb</td>
+ <td>7357</td>
+ <td>13514834</td>
+</tr>
+<tr>
+ <td>nl</td>
+ <td>5579</td>
+ <td>10994203</td>
+</tr>
+<tr>
+ <td>de</td>
+ <td>8221</td>
+ <td>7537722</td>
+</tr>
+<tr>
+ <td>jp</td>
+ <td>3924</td>
+ <td>5534859</td>
+</tr>
+<tr>
+ <td>uk</td>
+ <td>5989</td>
+ <td>4689013</td>
+</tr>
+<tr>
+ <td>fr</td>
+ <td>6947</td>
+ <td>2205320</td>
+</tr>
+<tr>
+ <td>ch</td>
+ <td>2184</td>
+ <td>1956771</td>
+</tr>
+<tr>
+ <td>ru</td>
+ <td>3321</td>
+ <td>1437805</td>
+</tr>
+<tr>
+ <td>in</td>
+ <td>7489</td>
+ <td>1193631</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT country,
+COUNT(*),
+SUM(release_count)
+FROM journal
+GROUP BY country
+ORDER BY SUM(release_count) DESC
+LIMIT 10;</pre>
+<br></code></div><p>Top languages by number of journals:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>lang</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td></td>
+ <td>119385</td>
+</tr>
+<tr>
+ <td>en</td>
+ <td>33483</td>
+</tr>
+<tr>
+ <td>fr</td>
+ <td>2545</td>
+</tr>
+<tr>
+ <td>es</td>
+ <td>1980</td>
+</tr>
+<tr>
+ <td>pt</td>
+ <td>1253</td>
+</tr>
+<tr>
+ <td>id</td>
+ <td>807</td>
+</tr>
+<tr>
+ <td>fa</td>
+ <td>705</td>
+</tr>
+<tr>
+ <td>de</td>
+ <td>687</td>
+</tr>
+<tr>
+ <td>ja</td>
+ <td>627</td>
+</tr>
+<tr>
+ <td>ru</td>
+ <td>454</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT lang,
+COUNT(*)
+FROM journal
+GROUP BY lang
+ORDER BY COUNT(*) DESC
+LIMIT 10;</pre>
+<br></code></div><p>... by number of papers:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>lang</th>
+ <th>COUNT(*)</th>
+ <th>SUM(release_count)</th>
+</tr></thead>
+<tr>
+ <td>en</td>
+ <td>33483</td>
+ <td>52106736</td>
+</tr>
+<tr>
+ <td></td>
+ <td>119385</td>
+ <td>39633327</td>
+</tr>
+<tr>
+ <td>de</td>
+ <td>687</td>
+ <td>1080227</td>
+</tr>
+<tr>
+ <td>ja</td>
+ <td>627</td>
+ <td>701062</td>
+</tr>
+<tr>
+ <td>fr</td>
+ <td>2545</td>
+ <td>459624</td>
+</tr>
+<tr>
+ <td>es</td>
+ <td>1980</td>
+ <td>326190</td>
+</tr>
+<tr>
+ <td>pt</td>
+ <td>1253</td>
+ <td>259440</td>
+</tr>
+<tr>
+ <td>ru</td>
+ <td>454</td>
+ <td>216062</td>
+</tr>
+<tr>
+ <td>it</td>
+ <td>365</td>
+ <td>107777</td>
+</tr>
+<tr>
+ <td>id</td>
+ <td>807</td>
+ <td>63140</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT lang,
+COUNT(*),
+SUM(release_count)
+FROM journal
+GROUP BY lang
+ORDER BY SUM(release_count) DESC
+LIMIT 10;</pre>
+<br></code></div><h2>Fatcat Fulltext Coverage</h2>
+<p>Fulltext coverage by publisher type:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>publisher_type</th>
+ <th>AVG(ia_frac)</th>
+ <th>AVG(preserved_frac)</th>
+ <th>journal_count</th>
+ <th>paper_count</th>
+</tr></thead>
+<tr>
+ <td>big5</td>
+ <td>0.20383140988711096</td>
+ <td>0.7478923363774996</td>
+ <td>14341</td>
+ <td>38968407</td>
+</tr>
+<tr>
+ <td>society</td>
+ <td>0.38285716584323315</td>
+ <td>0.5255948097434624</td>
+ <td>11594</td>
+ <td>17479369</td>
+</tr>
+<tr>
+ <td></td>
+ <td>0.2892933315187726</td>
+ <td>0.39489655868508394</td>
+ <td>76449</td>
+ <td>17433767</td>
+</tr>
+<tr>
+ <td>unipress</td>
+ <td>0.5291521159229248</td>
+ <td>0.7156133857743203</td>
+ <td>8156</td>
+ <td>5979807</td>
+</tr>
+<tr>
+ <td>commercial</td>
+ <td>0.33223411266240366</td>
+ <td>0.679294424296642</td>
+ <td>5900</td>
+ <td>5783703</td>
+</tr>
+<tr>
+ <td>longtail</td>
+ <td>0.7021171169141004</td>
+ <td>0.7512778812541995</td>
+ <td>42817</td>
+ <td>5541048</td>
+</tr>
+<tr>
+ <td>repository</td>
+ <td>0.12411437397202797</td>
+ <td>0.24122655404137083</td>
+ <td>765</td>
+ <td>1032655</td>
+</tr>
+<tr>
+ <td>scielo</td>
+ <td>0.8234053698608026</td>
+ <td>0.8532657517834494</td>
+ <td>1588</td>
+ <td>931427</td>
+</tr>
+<tr>
+ <td>other</td>
+ <td>0.17819330003908534</td>
+ <td>0.6325501923587422</td>
+ <td>961</td>
+ <td>847590</td>
+</tr>
+<tr>
+ <td>archive</td>
+ <td>0.3274660229123176</td>
+ <td>0.9870091603652298</td>
+ <td>545</td>
+ <td>733596</td>
+</tr>
+<tr>
+ <td>oa</td>
+ <td>0.7703412508161128</td>
+ <td>0.8051562963022213</td>
+ <td>1854</td>
+ <td>665337</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT publisher_type,
+AVG(ia_frac),
+AVG(preserved_frac),
+COUNT(*) AS journal_count,
+SUM(release_count) AS paper_count
+FROM journal
+GROUP BY publisher_type
+ORDER BY SUM(release_count) DESC;</pre>
+<br></code></div><p>Top publishers with very little coverage:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>publisher</th>
+ <th>journal_count</th>
+ <th>AVG(ia_frac)</th>
+</tr></thead>
+<tr>
+ <td></td>
+ <td>9604</td>
+ <td>0.0017576670094484276</td>
+</tr>
+<tr>
+ <td>Elsevier</td>
+ <td>1894</td>
+ <td>0.01719793308556602</td>
+</tr>
+<tr>
+ <td>Taylor & Francis</td>
+ <td>1034</td>
+ <td>0.026333463256277043</td>
+</tr>
+<tr>
+ <td>J-STAGE</td>
+ <td>1000</td>
+ <td>0.008786577145385244</td>
+</tr>
+<tr>
+ <td>John Wiley & Sons, Inc</td>
+ <td>758</td>
+ <td>0.021752551474386456</td>
+</tr>
+<tr>
+ <td>Informa UK (Taylor & Francis)</td>
+ <td>583</td>
+ <td>0.010076744435949143</td>
+</tr>
+<tr>
+ <td>SAGE Publications</td>
+ <td>566</td>
+ <td>0.018548379050446432</td>
+</tr>
+<tr>
+ <td>Springer-Verlag</td>
+ <td>381</td>
+ <td>0.015239356601104427</td>
+</tr>
+<tr>
+ <td>Springer</td>
+ <td>352</td>
+ <td>0.025469114384971014</td>
+</tr>
+<tr>
+ <td>JSTOR</td>
+ <td>271</td>
+ <td>0.01053436333975769</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT publisher,
+COUNT(*) AS journal_count,
+AVG(ia_frac)
+FROM journal
+WHERE ia_frac < 0.05
+GROUP BY publisher
+ORDER BY journal_count DESC
+LIMIT 10;</pre>
+<br></code></div><p>Amount of fulltext by SHERPA/ROMEO journal color::</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>sherpa_color</th>
+ <th>SUM(ia_count)</th>
+</tr></thead>
+<tr>
+ <td></td>
+ <td>8179318</td>
+</tr>
+<tr>
+ <td>blue</td>
+ <td>1068696</td>
+</tr>
+<tr>
+ <td>green</td>
+ <td>10279336</td>
+</tr>
+<tr>
+ <td>white</td>
+ <td>731881</td>
+</tr>
+<tr>
+ <td>yellow</td>
+ <td>2489843</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT sherpa_color,
+SUM(ia_count)
+FROM journal
+GROUP BY sherpa_color;</pre>
+<br></code></div><h2>Journal Homepages</h2>
+<p>Homepage URL counts:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>unique_urls</th>
+ <th>journals_with_hompages</th>
+</tr></thead>
+<tr>
+ <td>188481</td>
+ <td>118838</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT COUNT(DISTINCT surt) as unique_urls, COUNT(DISTINCT issnl) as journals_with_hompages FROM homepage;</pre>
+<br></code></div><p>Journals with the most homepage URLs:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>issnl</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td>0036-6439</td>
+ <td>21</td>
+</tr>
+<tr>
+ <td>1487-0614</td>
+ <td>16</td>
+</tr>
+<tr>
+ <td>2375-0383</td>
+ <td>16</td>
+</tr>
+<tr>
+ <td>2374-4030</td>
+ <td>15</td>
+</tr>
+<tr>
+ <td>0097-6326</td>
+ <td>14</td>
+</tr>
+<tr>
+ <td>0749-405X</td>
+ <td>13</td>
+</tr>
+<tr>
+ <td>1521-9097</td>
+ <td>13</td>
+</tr>
+<tr>
+ <td>0009-7004</td>
+ <td>12</td>
+</tr>
+<tr>
+ <td>0030-7076</td>
+ <td>12</td>
+</tr>
+<tr>
+ <td>0717-554X</td>
+ <td>12</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT issnl,
+COUNT(*)
+FROM homepage
+GROUP BY issnl
+ORDER BY COUNT(*) DESC
+LIMIT 10;</pre>
+<br></code></div><p>Top/redundant URLs and SURTs:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>surt</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td>com,indianjournals)/</td>
+ <td>80</td>
+</tr>
+<tr>
+ <td>com,hindawi)/</td>
+ <td>71</td>
+</tr>
+<tr>
+ <td>au,com,informit,search)/search;res=apaft</td>
+ <td>64</td>
+</tr>
+<tr>
+ <td>com,umi)/pqdauto</td>
+ <td>51</td>
+</tr>
+<tr>
+ <td>org,rsc,pubs)/en/ebooks</td>
+ <td>50</td>
+</tr>
+<tr>
+ <td>com,umi)/proquest</td>
+ <td>48</td>
+</tr>
+<tr>
+ <td>org,ieee,ieeexplore)/xplore/conferences.jsp</td>
+ <td>40</td>
+</tr>
+<tr>
+ <td>org,omicsonline)/</td>
+ <td>37</td>
+</tr>
+<tr>
+ <td>com,idealibrary)/</td>
+ <td>36</td>
+</tr>
+<tr>
+ <td>com,wiley,interscience)/</td>
+ <td>31</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT surt,
+COUNT(*)
+FROM homepage
+GROUP BY surt
+ORDER BY COUNT(*) DESC
+LIMIT 10;</pre>
+<br></code></div><p>What is the deal with all those "benjamins" URLs?</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>publisher</th>
+ <th>name</th>
+</tr></thead>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>NOWELE</td>
+</tr>
+<tr>
+ <td></td>
+ <td>Studia Uralo-Altaica</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Language Problems and Language Planning</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Lingvisticæ investigationes</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Linguistics of the TIbeto-Burman Area</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Pragmatics & Cognition</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Terminology</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Written Language & Literacy</td>
+</tr>
+<tr>
+ <td></td>
+ <td>FORUM: Revue internationale d?interprétation et de traduction / International Journal of Interpretation and Translation</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>English Text Construction</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Constructions and Frames</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Pragmatics and Society</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Translation and Interpreting Studies</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Language and Dialogue</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Metaphor in Language, Cognition, and Communication</td>
+</tr>
+<tr>
+ <td></td>
+ <td>Hamburg Studies on Linguistic Diversity</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Translation Spaces</td>
+</tr>
+<tr>
+ <td></td>
+ <td>Studies in Arabic Linguistics</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Journal of Immersion and Content-Based Language Education (JICB)</td>
+</tr>
+<tr>
+ <td></td>
+ <td>Children's Literature, Culture, and Cognition</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Journal of Language Aggression and Conflict</td>
+</tr>
+<tr>
+ <td></td>
+ <td>FILLM Studies in Languages and Literatures</td>
+</tr>
+<tr>
+ <td></td>
+ <td>Advances in Historical Sociolinguistics</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Linguistic Landscape</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>International Journal of Learner Corpus Research</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Journal of Second Language Pronunciation</td>
+</tr>
+<tr>
+ <td></td>
+ <td>ITL - International Journal of Applied Linguistics</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Cognitive Individual Differences in Second Language Processing and Acquisition</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>FORUM</td>
+</tr>
+<tr>
+ <td>John Benjamins Publishing Company</td>
+ <td>Studies in Germanic Linguistics</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT publisher,
+name
+FROM journal
+LEFT JOIN homepage ON journal.issnl = homepage.issnl
+WHERE homepage.surt = 'com,benjamins)/';</pre>
+<br></code></div><p>Domains that block us:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>domain</th>
+ <th>journal_homepages</th>
+ <th>SUM(blocked)</th>
+</tr></thead>
+<tr>
+ <td>jstor.org</td>
+ <td>7669</td>
+ <td>6575</td>
+</tr>
+<tr>
+ <td>wiley.com</td>
+ <td>4285</td>
+ <td>229</td>
+</tr>
+<tr>
+ <td>brill.nl</td>
+ <td>234</td>
+ <td>161</td>
+</tr>
+<tr>
+ <td>bentham.org</td>
+ <td>152</td>
+ <td>150</td>
+</tr>
+<tr>
+ <td>annualreviews.org</td>
+ <td>174</td>
+ <td>103</td>
+</tr>
+<tr>
+ <td>euppublishing.com</td>
+ <td>62</td>
+ <td>54</td>
+</tr>
+<tr>
+ <td>hogrefe.com</td>
+ <td>72</td>
+ <td>51</td>
+</tr>
+<tr>
+ <td>icevirtuallibrary.com</td>
+ <td>46</td>
+ <td>37</td>
+</tr>
+<tr>
+ <td>uem.br</td>
+ <td>49</td>
+ <td>36</td>
+</tr>
+<tr>
+ <td>brepolsonline.net</td>
+ <td>47</td>
+ <td>34</td>
+</tr>
+<tr>
+ <td>futuremedicine.com</td>
+ <td>40</td>
+ <td>31</td>
+</tr>
+<tr>
+ <td>siam.org</td>
+ <td>67</td>
+ <td>23</td>
+</tr>
+<tr>
+ <td>vr-elibrary.de</td>
+ <td>24</td>
+ <td>17</td>
+</tr>
+<tr>
+ <td>liverpooluniversitypress.co.uk</td>
+ <td>26</td>
+ <td>15</td>
+</tr>
+<tr>
+ <td>rodopi.nl</td>
+ <td>20</td>
+ <td>15</td>
+</tr>
+<tr>
+ <td>nrcresearchpress.com</td>
+ <td>18</td>
+ <td>14</td>
+</tr>
+<tr>
+ <td>aiaa.org</td>
+ <td>34</td>
+ <td>10</td>
+</tr>
+<tr>
+ <td>guilfordjournals.com</td>
+ <td>14</td>
+ <td>10</td>
+</tr>
+<tr>
+ <td>ingentaconnect.com</td>
+ <td>417</td>
+ <td>9</td>
+</tr>
+<tr>
+ <td>univie.ac.at</td>
+ <td>54</td>
+ <td>9</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT domain,
+COUNT(*) as journal_homepages,
+SUM(blocked)
+FROM homepage
+GROUP BY domain
+ORDER BY SUM(blocked) DESC
+LIMIT 20;</pre>
+<br></code></div><p>Top duplicated domains:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>url</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td><a href="http://www.indianjournals.com/">http://www.indianjournals.com/</a></td>
+ <td>73</td>
+</tr>
+<tr>
+ <td><a href="http://www.hindawi.com/">http://www.hindawi.com/</a></td>
+ <td>70</td>
+</tr>
+<tr>
+ <td><a href="http://search.informit.com.au/search;res=APAFT">http://search.informit.com.au/search;res=APAFT</a></td>
+ <td>60</td>
+</tr>
+<tr>
+ <td><a href="http://www.umi.com/proquest">http://www.umi.com/proquest</a></td>
+ <td>46</td>
+</tr>
+<tr>
+ <td><a href="http://www.umi.com/pqdauto/">http://www.umi.com/pqdauto/</a></td>
+ <td>45</td>
+</tr>
+<tr>
+ <td><a href="http://ieeexplore.ieee.org/Xplore/conferences.jsp">http://ieeexplore.ieee.org/Xplore/conferences.jsp</a></td>
+ <td>40</td>
+</tr>
+<tr>
+ <td><a href="http://omicsonline.org/">http://omicsonline.org/</a></td>
+ <td>36</td>
+</tr>
+<tr>
+ <td><a href="http://www.idealibrary.com/">http://www.idealibrary.com/</a></td>
+ <td>36</td>
+</tr>
+<tr>
+ <td><a href="http://ieeexplore.ieee.org/xpl/conferences.jsp">http://ieeexplore.ieee.org/xpl/conferences.jsp</a></td>
+ <td>24</td>
+</tr>
+<tr>
+ <td><a href="http://www.metapress.com/">http://www.metapress.com/</a></td>
+ <td>24</td>
+</tr>
+<tr>
+ <td><a href="http://www.randspublications.org/">http://www.randspublications.org/</a></td>
+ <td>22</td>
+</tr>
+<tr>
+ <td><a href="http://www.studia.ubbcluj.ro/serii/index_en.html">http://www.studia.ubbcluj.ro/serii/index_en.html</a></td>
+ <td>22</td>
+</tr>
+<tr>
+ <td><a href="http://find.galegroup.com/ips/publicationSearch.do">http://find.galegroup.com/ips/publicationSearch.do</a></td>
+ <td>21</td>
+</tr>
+<tr>
+ <td><a href="http://jurnal.unimed.ac.id/">http://jurnal.unimed.ac.id/</a></td>
+ <td>21</td>
+</tr>
+<tr>
+ <td><a href="http://www.bioinfo.in/journals.php">http://www.bioinfo.in/journals.php</a></td>
+ <td>20</td>
+</tr>
+<tr>
+ <td><a href="http://www.interscience.wiley.com/">http://www.interscience.wiley.com/</a></td>
+ <td>20</td>
+</tr>
+<tr>
+ <td><a href="http://www.commongroundpublishing.com/">http://www.commongroundpublishing.com/</a></td>
+ <td>19</td>
+</tr>
+<tr>
+ <td><a href="http://www.haworthpress.com/">http://www.haworthpress.com/</a></td>
+ <td>19</td>
+</tr>
+<tr>
+ <td><a href="http://www.heinonline.org/">http://www.heinonline.org/</a></td>
+ <td>19</td>
+</tr>
+<tr>
+ <td><a href="http://www.infosci-journals.com/">http://www.infosci-journals.com/</a></td>
+ <td>19</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT url,
+COUNT(*)
+FROM homepage
+GROUP BY url
+ORDER BY COUNT(*) DESC
+LIMIT 20;</pre>
+<br></code></div><p>Number of journals with a homepage that points to web.archive.org or archive.org:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>COUNT(DISTINCT issnl)</th>
+</tr></thead>
+<tr>
+ <td>1453</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT COUNT(DISTINCT issnl)
+FROM homepage
+WHERE domain = 'archive.org';</pre>
+<br></code></div><p>Top publishers that have journals in wayback:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>publisher</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td></td>
+ <td>653</td>
+</tr>
+<tr>
+ <td>EDP Sciences</td>
+ <td>23</td>
+</tr>
+<tr>
+ <td>CAIRN</td>
+ <td>18</td>
+</tr>
+<tr>
+ <td>OpenEdition</td>
+ <td>18</td>
+</tr>
+<tr>
+ <td>Elsevier</td>
+ <td>6</td>
+</tr>
+<tr>
+ <td>Springer</td>
+ <td>6</td>
+</tr>
+<tr>
+ <td>PERSEE Program</td>
+ <td>5</td>
+</tr>
+<tr>
+ <td>Peer Community In</td>
+ <td>5</td>
+</tr>
+<tr>
+ <td>Institut de recherche et d'histoire des textes (France)</td>
+ <td>4</td>
+</tr>
+<tr>
+ <td>San Lucas Medical</td>
+ <td>4</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT publisher,
+COUNT(*)
+FROM journal
+LEFT JOIN homepage ON journal.issnl = homepage.issnl
+WHERE homepage.domain = 'archive.org'
+GROUP BY journal.publisher
+ORDER BY COUNT(*) DESC
+LIMIT 10;</pre>
+<br></code></div><p>Top publishers by number of journals missing a homepage:</p>
+<div style="margin: 1em 3em 1em 3em; "><code><table>
+ <thead><tr>
+ <th>publisher</th>
+ <th>COUNT(*)</th>
+</tr></thead>
+<tr>
+ <td></td>
+ <td>21472</td>
+</tr>
+<tr>
+ <td>Peter Lang International Academic Publishers</td>
+ <td>1270</td>
+</tr>
+<tr>
+ <td>Elsevier</td>
+ <td>866</td>
+</tr>
+<tr>
+ <td>J-STAGE</td>
+ <td>863</td>
+</tr>
+<tr>
+ <td>Egypts Presidential Specialized Council for Education and Scientific Research</td>
+ <td>340</td>
+</tr>
+<tr>
+ <td>Georg Thieme Verlag KG</td>
+ <td>288</td>
+</tr>
+<tr>
+ <td>Al Manhal FZ, LLC</td>
+ <td>217</td>
+</tr>
+<tr>
+ <td>Informa UK (Taylor & Francis)</td>
+ <td>202</td>
+</tr>
+<tr>
+ <td>Springer-Verlag</td>
+ <td>156</td>
+</tr>
+<tr>
+ <td>ELSEVIER LTD</td>
+ <td>145</td>
+</tr>
+<tr>
+ <td>Inderscience</td>
+ <td>122</td>
+</tr>
+<tr>
+ <td>African Journals Online</td>
+ <td>121</td>
+</tr>
+<tr>
+ <td>Diva Enterprises Private Limited</td>
+ <td>119</td>
+</tr>
+<tr>
+ <td>PERSEE Program</td>
+ <td>118</td>
+</tr>
+<tr>
+ <td>Sabinet</td>
+ <td>109</td>
+</tr>
+<tr>
+ <td>SAGE Publications</td>
+ <td>103</td>
+</tr>
+<tr>
+ <td>Brill</td>
+ <td>99</td>
+</tr>
+<tr>
+ <td>Superintendent of Government Documents</td>
+ <td>99</td>
+</tr>
+<tr>
+ <td>Taylor & Francis</td>
+ <td>98</td>
+</tr>
+<tr>
+ <td>Bentham Science</td>
+ <td>94</td>
+</tr>
+</table><pre><b>QUERY:</b> SELECT publisher,
+COUNT(*)
+FROM journal
+WHERE any_homepage=0
+GROUP BY publisher
+ORDER BY COUNT(*) DESC
+LIMIT 20;</pre>
+<br></code></div>