From 9a558d1a8fd4021908c6195de31237a714a41b9d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 8 Jul 2020 19:53:19 -0700 Subject: update reports --- reports/report_template.md | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'reports/report_template.md') diff --git a/reports/report_template.md b/reports/report_template.md index ac98649..ad64c5d 100644 --- a/reports/report_template.md +++ b/reports/report_template.md @@ -1,16 +1,17 @@ - - -# Chocula Journal Aggregate Stats +# Fatcat "Chocula" Journal Metadata Summary +This report is auto-generated from a sqlite database file, which should be available/included. ```sql SELECT datetime('now'); ``` +Note that pretty much all of the fatcat release stats are on a *release*, not +*work* basis, so there may be over-counting. Also, as of July 2019 there were +over 1.5 million OA longtail releases which are *not* linked to a container +(journal). + ```sql PRAGMA database_list; ``` @@ -118,6 +119,18 @@ Homepage URL counts: SELECT COUNT(DISTINCT surt) as unique_urls, COUNT(DISTINCT issnl) as journals_with_hompages FROM homepage; ``` +Journal counts by homepage status: + +```sql +SELECT any_homepage, any_live_homepage, any_gwb_homepage, COUNT(*), ROUND(1.0 * COUNT(*) / (SELECT COUNT(*) FROM journal), 2) AS frac FROM journal GROUP BY any_homepage, any_live_homepage, any_gwb_homepage; +``` + +Number of unique journals that have a homepage pointing to wayback or archive.org: + +```sql +SELECT COUNT(DISTINCT issnl) FROM homepage WHERE domain = 'archive.org'; +``` + Journals with the most homepage URLs: ```sql -- cgit v1.2.3