From 4315b44a93ca31725b9b0a2a55c310725ac55efe Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 1 Nov 2021 20:05:16 -0700 Subject: sql: grobid_refs table JSON as 'JSON' not 'JSONB' I keep flip-flopping on this, but our disk usage is really large, and if 'JSON' is smaller than 'JSONB' in postgresql at all it is worth it. --- proposals/2021-10-28_grobid_refs.md | 4 ++-- sql/migrations/2019-12-19-060141_init/up.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/proposals/2021-10-28_grobid_refs.md b/proposals/2021-10-28_grobid_refs.md index ff835d4..3f87968 100644 --- a/proposals/2021-10-28_grobid_refs.md +++ b/proposals/2021-10-28_grobid_refs.md @@ -27,7 +27,7 @@ The overall output schema matches that of the `grobid_refs` SQL table: source_id: string, eg '10.1145/3366650.3366668' source_ts: optional timestamp (full ISO datetime with timezone (eg, `Z` suffix), which identifies version of upstream metadata - refs_json: JSONB, list of `GrobidBiblio` JSON objects + refs_json: JSON, list of `GrobidBiblio` JSON objects References are re-processed on a per-article (or per-release) basis. All the references for an article are handled as a batch and output as a batch. If @@ -74,7 +74,7 @@ comparing, etc. source_id TEXT NOT NULL CHECK (octet_length(source_id) >= 1), source_ts TIMESTAMP WITH TIME ZONE, updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, - refs_json JSONB NOT NULL, + refs_json JSON NOT NULL, PRIMARY KEY(source, source_id) ); diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql index 254c08a..18972cb 100644 --- a/sql/migrations/2019-12-19-060141_init/up.sql +++ b/sql/migrations/2019-12-19-060141_init/up.sql @@ -225,7 +225,7 @@ CREATE TABLE IF NOT EXISTS grobid_refs ( source_id TEXT NOT NULL CHECK (octet_length(source_id) >= 1), source_ts TIMESTAMP WITH TIME ZONE, updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, - refs_json JSONB NOT NULL, + refs_json JSON NOT NULL, PRIMARY KEY(source, source_id) ); -- cgit v1.2.3