aboutsummaryrefslogtreecommitdiffstats
path: root/rust
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-09-15 17:00:10 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-09-15 17:00:12 -0700
commitbe3a4333c5880f3af1a4e6adbba31a030a5ebc72 (patch)
tree68bfbab3a56a61b5370aebb640ab807db9da8163 /rust
parenta11ee8d8c0fd1f924d7ba7feec58cd3fcf28752b (diff)
downloadfatcat-be3a4333c5880f3af1a4e6adbba31a030a5ebc72.tar.gz
fatcat-be3a4333c5880f3af1a4e6adbba31a030a5ebc72.zip
update extid columns: varchar size limits, no char columns
In PostgreSQL, there is no query or size differences between CHAR, VARCHAR, and TEXT: they use the same backend. CHAR will pad out with spaces and VARCHAR enforces a size limit. The enforced VARCHAR size limit seems like ok defensive programming/schema here, preventing large values from getting stored for some columns. I have some FUD concern about fixed-sized identifiers and indexes, so switching old CHARs (eg, sha1 hex encoded columns) to VARCHAR instead.
Diffstat (limited to 'rust')
-rw-r--r--rust/migrations/2018-05-12-001226_init/up.sql30
1 files changed, 15 insertions, 15 deletions
diff --git a/rust/migrations/2018-05-12-001226_init/up.sql b/rust/migrations/2018-05-12-001226_init/up.sql
index 2134b455..d297f787 100644
--- a/rust/migrations/2018-05-12-001226_init/up.sql
+++ b/rust/migrations/2018-05-12-001226_init/up.sql
@@ -40,7 +40,7 @@ CREATE TABLE changelog (
CREATE INDEX changelog_editgroup_idx ON changelog(editgroup_id);
CREATE TABLE abstracts (
- sha1 CHAR(40) PRIMARY KEY,
+ sha1 VARCHAR(40) PRIMARY KEY, -- fixed size hash (in hex)
content TEXT NOT NULL
);
@@ -52,8 +52,8 @@ CREATE TABLE creator_rev (
display_name TEXT NOT NULL,
given_name TEXT,
surname TEXT,
- orcid CHAR(19),
- wikidata_qid TEXT
+ orcid VARCHAR(19), -- fixed size identifier
+ wikidata_qid VARCHAR(12) -- limited size for data quality
-- TODO: viaf TEXT,
-- TODO: aliases/alternatives
-- TODO: sortable name?
@@ -93,10 +93,10 @@ CREATE TABLE container_rev (
name TEXT NOT NULL,
publisher TEXT,
- issnl CHAR(9),
- wikidata_qid TEXT,
+ issnl VARCHAR(9), -- fixed size identifier
+ wikidata_qid VARCHAR(12), -- limited size for data quality
abbrev TEXT,
- coden TEXT
+ coden VARCHAR(12) -- limited size for data quality
);
CREATE INDEX container_rev_issnl_idx ON container_rev(issnl);
@@ -130,9 +130,9 @@ CREATE TABLE file_rev (
extra_json JSONB,
size BIGINT,
- sha1 CHAR(40),
- sha256 CHAR(64),
- md5 CHAR(32),
+ sha1 VARCHAR(40), -- fixed size hash (in hex)
+ sha256 VARCHAR(64), -- fixed size hash (in hex)
+ md5 VARCHAR(32), -- fixed size hash (in hex)
mimetype TEXT
);
@@ -183,11 +183,11 @@ CREATE TABLE release_rev (
release_status TEXT, -- TODO: enum
release_date DATE,
doi TEXT,
- pmid TEXT, -- TODO: BIGINT?
- pmcid TEXT, -- TODO: BIGINT?
- wikidata_qid TEXT, -- TODO: BIGINT?
- isbn13 TEXT, -- TODO: fixed-length
- core_id TEXT, -- TODO: BIGINT?
+ pmid VARCHAR(12), -- limited for data quality
+ pmcid VARCHAR(12), -- limited for data quality
+ wikidata_qid VARCHAR(12), -- limited for data quality
+ isbn13 VARCHAR(13), -- fixed size identifier
+ core_id VARCHAR(12), -- limited for data quality
volume TEXT,
issue TEXT,
pages TEXT,
@@ -208,7 +208,7 @@ CREATE INDEX release_rev_work_idx ON release_rev(work_ident_id);
CREATE TABLE release_rev_abstract (
id BIGSERIAL PRIMARY KEY,
release_rev UUID REFERENCES release_rev(id) NOT NULL,
- abstract_sha1 CHAR(40) REFERENCES abstracts(sha1) NOT NULL,
+ abstract_sha1 VARCHAR(40) REFERENCES abstracts(sha1) NOT NULL, -- fixed size hash (in hex)
mimetype TEXT,
lang TEXT
);