aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-11 18:20:13 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-11 18:20:13 -0800
commit91f5f53c90742c80890e3bd44fdc9044555b8209 (patch)
tree1be3a374d90c71d767c8c0117b8a53c389cfaecd
parente6983247ee6f3b02a8c2fa74d5f09a4440d7511f (diff)
downloadsandcrawler-91f5f53c90742c80890e3bd44fdc9044555b8209.tar.gz
sandcrawler-91f5f53c90742c80890e3bd44fdc9044555b8209.zip
add some GROBID metadata schema docs to SQL schema
-rw-r--r--sql/sandcrawler_schema.sql11
1 files changed, 11 insertions, 0 deletions
diff --git a/sql/sandcrawler_schema.sql b/sql/sandcrawler_schema.sql
index fd921ed..b6bc201 100644
--- a/sql/sandcrawler_schema.sql
+++ b/sql/sandcrawler_schema.sql
@@ -43,6 +43,17 @@ CREATE TABLE IF NOT EXISTS grobid (
status_code INT NOT NULL,
status TEXT CHECK (octet_length(status) >= 1),
fatcat_release TEXT CHECK (octet_length(fatcat_release) = 26),
+ -- extracted basic biblio metadata:
+ -- title
+ -- authors[]
+ -- full/display
+ -- given_name
+ -- surname
+ -- affiliation
+ -- year
+ -- journal_issn
+ -- journal_name
+ -- refs_count
metadata JSONB
);
-- CREATE INDEX grobid_fatcat_release_idx ON grobid(fatcat_release);