summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-10-03 15:31:14 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-10-03 15:31:14 -0700
commit2e2bbc2ab9642675abdec6696b8862e67593323a (patch)
treed733b2c1e8308f80fbe779766083cb85e507e22a
parent0e5ca287570de54657f11723b8749ea688a2c11f (diff)
downloadfatcat-2e2bbc2ab9642675abdec6696b8862e67593323a.tar.gz
fatcat-2e2bbc2ab9642675abdec6696b8862e67593323a.zip
export raw affiliation strings for analysis
-rw-r--r--extra/sql_dumps/dump_affiliations.sql17
1 files changed, 17 insertions, 0 deletions
diff --git a/extra/sql_dumps/dump_affiliations.sql b/extra/sql_dumps/dump_affiliations.sql
new file mode 100644
index 00000000..3371b35c
--- /dev/null
+++ b/extra/sql_dumps/dump_affiliations.sql
@@ -0,0 +1,17 @@
+
+BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE;
+
+COPY (SELECT release_ident.id, release_contrib.raw_affiliation
+ FROM release_contrib
+ INNER JOIN release_ident ON release_ident.rev_id = release_contrib.release_rev
+ WHERE release_ident.is_live = 't' AND release_ident.redirect_id IS NULL
+ AND release_contrib.raw_affiliation IS NOT NULL)
+ TO '/tmp/fatcat_affiliations.tsv'
+ WITH NULL '';
+
+ROLLBACK;
+
+-- Post processing:
+--
+-- cut -f2 fatcat_affiliations.tsv | sort -S 4G | uniq -c | sort -nr | gzip > fatcat_affiliations.counts.txt.gz
+-- gzip fatcat_affiliations.tsv