diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-02-12 19:01:44 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-02-12 19:01:44 -0800 | 
| commit | c61cb13ae42e3a170c29d4710ea2fc484081ee96 (patch) | |
| tree | 7db2a372a72b69126341d04cc010a732b4cec46c /sql | |
| parent | c32d64f7a7b9e01ceb4c3dc161e0ab267cf63654 (diff) | |
| download | sandcrawler-c61cb13ae42e3a170c29d4710ea2fc484081ee96.tar.gz sandcrawler-c61cb13ae42e3a170c29d4710ea2fc484081ee96.zip | |
pdftrio proposal and start on schema+kafka
Diffstat (limited to 'sql')
| -rw-r--r-- | sql/migrations/2019-12-19-060141_init/up.sql | 13 | 
1 files changed, 13 insertions, 0 deletions
| diff --git a/sql/migrations/2019-12-19-060141_init/up.sql b/sql/migrations/2019-12-19-060141_init/up.sql index 0b2b19c..a27796b 100644 --- a/sql/migrations/2019-12-19-060141_init/up.sql +++ b/sql/migrations/2019-12-19-060141_init/up.sql @@ -74,6 +74,19 @@ CREATE TABLE IF NOT EXISTS grobid (  );  -- CREATE INDEX grobid_fatcat_release_idx ON grobid(fatcat_release); +CREATE TABLE IF NOT EXISTS pdftrio ( +    sha1hex             TEXT PRIMARY KEY CHECK (octet_length(sha1hex) = 40), +    updated             TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, +    status_code         INT NOT NULL, +    status              TEXT CHECK (octet_length(status) >= 1) NOT NULL, +    pdftrio_version     TEXT CHECK (octet_length(pdftrio_version) >= 1), +    models_date         DATE, +    ensemble_score      REAL, +    bert_score          REAL, +    linear_score        REAL, +    image_score         REAL +); +  CREATE TABLE IF NOT EXISTS ingest_request (      link_source             TEXT NOT NULL CHECK (octet_length(link_source) >= 1),      link_source_id          TEXT NOT NULL CHECK (octet_length(link_source_id) >= 1), | 
