aboutsummaryrefslogtreecommitdiffstats
path: root/sql
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-02-12 19:01:44 -0800
committerBryan Newbold <bnewbold@archive.org>2020-02-12 19:01:44 -0800
commitc61cb13ae42e3a170c29d4710ea2fc484081ee96 (patch)
tree7db2a372a72b69126341d04cc010a732b4cec46c /sql
parentc32d64f7a7b9e01ceb4c3dc161e0ab267cf63654 (diff)
downloadsandcrawler-c61cb13ae42e3a170c29d4710ea2fc484081ee96.tar.gz
sandcrawler-c61cb13ae42e3a170c29d4710ea2fc484081ee96.zip
pdftrio proposal and start on schema+kafka
Diffstat (limited to 'sql')
-rw-r--r--sql/migrations/2019-12-19-060141_init/up.sql13
1 files changed, 13 insertions, 0 deletions
diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql
index 0b2b19c..a27796b 100644
--- a/sql/migrations/2019-12-19-060141_init/up.sql
+++ b/sql/migrations/2019-12-19-060141_init/up.sql
@@ -74,6 +74,19 @@ CREATE TABLE IF NOT EXISTS grobid (
);
-- CREATE INDEX grobid_fatcat_release_idx ON grobid(fatcat_release);
+CREATE TABLE IF NOT EXISTS pdftrio (
+ sha1hex TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40),
+ updated TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
+ status_code INT NOT NULL,
+ status TEXT CHECK (octet_length(status) >= 1) NOT NULL,
+ pdftrio_version TEXT CHECK (octet_length(pdftrio_version) >= 1),
+ models_date DATE,
+ ensemble_score REAL,
+ bert_score REAL,
+ linear_score REAL,
+ image_score REAL
+);
+
CREATE TABLE IF NOT EXISTS ingest_request (
link_source TEXT NOT NULL CHECK (octet_length(link_source) >= 1),
link_source_id TEXT NOT NULL CHECK (octet_length(link_source_id) >= 1),