aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-03-10 22:43:15 -0700
committerBryan Newbold <bnewbold@archive.org>2020-03-10 23:01:20 -0700
commit7188d0ee2c0e10415a86f2eac04e7a5dee82a7ad (patch)
treebfc088774b6aa619484e9d99a805ae486a5ad16b
parent37ca92cc8923254bb579eb986bb924438bd5e9e8 (diff)
downloadsandcrawler-7188d0ee2c0e10415a86f2eac04e7a5dee82a7ad.tar.gz
sandcrawler-7188d0ee2c0e10415a86f2eac04e7a5dee82a7ad.zip
DOI prefix example queries (SQL)
-rw-r--r--sql/random_queries.md20
1 files changed, 17 insertions, 3 deletions
diff --git a/sql/random_queries.md b/sql/random_queries.md
index d88f45b..572b4f9 100644
--- a/sql/random_queries.md
+++ b/sql/random_queries.md
@@ -117,9 +117,23 @@ Or:
Can also do some quick lookups for a specific domain and protocol like:
SELECT *
- FROM ingest_file_result
- WHERE terminal_url LIKE 'https://insights.ovid.com/%'
- LIMIT 10;
+ FROM ingest_file_result
+ WHERE terminal_url LIKE 'https://insights.ovid.com/%'
+ LIMIT 10;
+
+For a given DOI prefix:
+
+ SELECT *
+ FROM ingest_file_result
+ WHERE base_url LIKE 'https://doi.org/10.17223/a%'
+ AND status = 'no-pdf-link'
+ LIMIT 10;
+
+ SELECT status, count(*)
+ FROM ingest_file_result
+ WHERE base_url LIKE 'https://doi.org/10.17223/%'
+ GROUP BY status
+ ORDER BY count(*) DESC;
## Bulk Ingest