From 34ffc73485e871868ec73ff9bde4339f4bc4c753 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 2 Dec 2021 17:54:48 -0800 Subject: issue-db: fixes to schema The primary key on release_counts was resulting in only one row per pubid. Also the 'year_in_sim' column was never being used. --- fatcat_scholar/issue_db.py | 5 +---- schema/issue_db.sql | 10 +++++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index 4d6fe04..d6bc3f9 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -99,7 +99,6 @@ class SimIssueRow: @dataclass class ReleaseCountsRow: sim_pubid: str - year_in_sim: bool release_count: int year: Optional[int] volume: Optional[str] @@ -109,7 +108,6 @@ class ReleaseCountsRow: self.sim_pubid, self.year, self.volume, - self.year_in_sim, self.release_count, ) @@ -190,7 +188,7 @@ class IssueDB: if not cur: cur = self.db.cursor() cur.execute( - "INSERT OR REPLACE INTO release_counts VALUES (?,?,?,?,?)", counts.tuple() + "INSERT OR REPLACE INTO release_counts VALUES (?,?,?,?)", counts.tuple() ) def pubid2container(self, sim_pubid: str) -> Optional[str]: @@ -368,7 +366,6 @@ class IssueDB: for agg in aggs: row = ReleaseCountsRow( sim_pubid=sim_pubid, - year_in_sim=False, # TODO release_count=agg["count"], year=agg["year"], volume=agg["volume"], diff --git a/schema/issue_db.sql b/schema/issue_db.sql index e07e97e..422717e 100644 --- a/schema/issue_db.sql +++ b/schema/issue_db.sql @@ -26,10 +26,10 @@ CREATE TABLE IF NOT EXISTS sim_issue ( -- intent here is to capture how many releases are just not getting matched due -- to missing issue metadata CREATE TABLE IF NOT EXISTS release_counts ( - sim_pubid TEXT NOT NULL PRIMARY KEY, - year TEXT, - volume TEXT, - year_in_sim BOOLEAN, - release_count INTEGER, + sim_pubid TEXT NOT NULL, + year TEXT NOT NULL, + volume TEXT NOT NULL, + release_count INTEGER NOT NULL, + PRIMARY KEY(sim_pubid, year, volume), FOREIGN KEY(sim_pubid) REFERENCES sim_pub(sim_pubid) ); -- cgit v1.2.3