diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-10-03 15:31:14 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-10-03 15:31:14 -0700 |
commit | 2e2bbc2ab9642675abdec6696b8862e67593323a (patch) | |
tree | d733b2c1e8308f80fbe779766083cb85e507e22a /extra | |
parent | 0e5ca287570de54657f11723b8749ea688a2c11f (diff) | |
download | fatcat-2e2bbc2ab9642675abdec6696b8862e67593323a.tar.gz fatcat-2e2bbc2ab9642675abdec6696b8862e67593323a.zip |
export raw affiliation strings for analysis
Diffstat (limited to 'extra')
-rw-r--r-- | extra/sql_dumps/dump_affiliations.sql | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/extra/sql_dumps/dump_affiliations.sql b/extra/sql_dumps/dump_affiliations.sql new file mode 100644 index 00000000..3371b35c --- /dev/null +++ b/extra/sql_dumps/dump_affiliations.sql @@ -0,0 +1,17 @@ + +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE; + +COPY (SELECT release_ident.id, release_contrib.raw_affiliation + FROM release_contrib + INNER JOIN release_ident ON release_ident.rev_id = release_contrib.release_rev + WHERE release_ident.is_live = 't' AND release_ident.redirect_id IS NULL + AND release_contrib.raw_affiliation IS NOT NULL) + TO '/tmp/fatcat_affiliations.tsv' + WITH NULL ''; + +ROLLBACK; + +-- Post processing: +-- +-- cut -f2 fatcat_affiliations.tsv | sort -S 4G | uniq -c | sort -nr | gzip > fatcat_affiliations.counts.txt.gz +-- gzip fatcat_affiliations.tsv |