diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-15 17:00:10 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-15 17:00:12 -0700 |
commit | be3a4333c5880f3af1a4e6adbba31a030a5ebc72 (patch) | |
tree | 68bfbab3a56a61b5370aebb640ab807db9da8163 /rust | |
parent | a11ee8d8c0fd1f924d7ba7feec58cd3fcf28752b (diff) | |
download | fatcat-be3a4333c5880f3af1a4e6adbba31a030a5ebc72.tar.gz fatcat-be3a4333c5880f3af1a4e6adbba31a030a5ebc72.zip |
update extid columns: varchar size limits, no char columns
In PostgreSQL, there is no query or size differences between CHAR,
VARCHAR, and TEXT: they use the same backend. CHAR will pad out with
spaces and VARCHAR enforces a size limit.
The enforced VARCHAR size limit seems like ok defensive
programming/schema here, preventing large values from getting stored for
some columns. I have some FUD concern about fixed-sized identifiers and
indexes, so switching old CHARs (eg, sha1 hex encoded columns) to
VARCHAR instead.
Diffstat (limited to 'rust')
-rw-r--r-- | rust/migrations/2018-05-12-001226_init/up.sql | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/rust/migrations/2018-05-12-001226_init/up.sql b/rust/migrations/2018-05-12-001226_init/up.sql index 2134b455..d297f787 100644 --- a/rust/migrations/2018-05-12-001226_init/up.sql +++ b/rust/migrations/2018-05-12-001226_init/up.sql @@ -40,7 +40,7 @@ CREATE TABLE changelog ( CREATE INDEX changelog_editgroup_idx ON changelog(editgroup_id); CREATE TABLE abstracts ( - sha1 CHAR(40) PRIMARY KEY, + sha1 VARCHAR(40) PRIMARY KEY, -- fixed size hash (in hex) content TEXT NOT NULL ); @@ -52,8 +52,8 @@ CREATE TABLE creator_rev ( display_name TEXT NOT NULL, given_name TEXT, surname TEXT, - orcid CHAR(19), - wikidata_qid TEXT + orcid VARCHAR(19), -- fixed size identifier + wikidata_qid VARCHAR(12) -- limited size for data quality -- TODO: viaf TEXT, -- TODO: aliases/alternatives -- TODO: sortable name? @@ -93,10 +93,10 @@ CREATE TABLE container_rev ( name TEXT NOT NULL, publisher TEXT, - issnl CHAR(9), - wikidata_qid TEXT, + issnl VARCHAR(9), -- fixed size identifier + wikidata_qid VARCHAR(12), -- limited size for data quality abbrev TEXT, - coden TEXT + coden VARCHAR(12) -- limited size for data quality ); CREATE INDEX container_rev_issnl_idx ON container_rev(issnl); @@ -130,9 +130,9 @@ CREATE TABLE file_rev ( extra_json JSONB, size BIGINT, - sha1 CHAR(40), - sha256 CHAR(64), - md5 CHAR(32), + sha1 VARCHAR(40), -- fixed size hash (in hex) + sha256 VARCHAR(64), -- fixed size hash (in hex) + md5 VARCHAR(32), -- fixed size hash (in hex) mimetype TEXT ); @@ -183,11 +183,11 @@ CREATE TABLE release_rev ( release_status TEXT, -- TODO: enum release_date DATE, doi TEXT, - pmid TEXT, -- TODO: BIGINT? - pmcid TEXT, -- TODO: BIGINT? - wikidata_qid TEXT, -- TODO: BIGINT? - isbn13 TEXT, -- TODO: fixed-length - core_id TEXT, -- TODO: BIGINT? + pmid VARCHAR(12), -- limited for data quality + pmcid VARCHAR(12), -- limited for data quality + wikidata_qid VARCHAR(12), -- limited for data quality + isbn13 VARCHAR(13), -- fixed size identifier + core_id VARCHAR(12), -- limited for data quality volume TEXT, issue TEXT, pages TEXT, @@ -208,7 +208,7 @@ CREATE INDEX release_rev_work_idx ON release_rev(work_ident_id); CREATE TABLE release_rev_abstract ( id BIGSERIAL PRIMARY KEY, release_rev UUID REFERENCES release_rev(id) NOT NULL, - abstract_sha1 CHAR(40) REFERENCES abstracts(sha1) NOT NULL, + abstract_sha1 VARCHAR(40) REFERENCES abstracts(sha1) NOT NULL, -- fixed size hash (in hex) mimetype TEXT, lang TEXT ); |