diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-12-02 17:54:48 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-12-02 17:54:50 -0800 |
commit | 34ffc73485e871868ec73ff9bde4339f4bc4c753 (patch) | |
tree | 05383713db93b416f051c97b12bd475b1b93add9 | |
parent | 25d1afe6b22eccbf86de867ec86a294f2771faff (diff) | |
download | fatcat-scholar-34ffc73485e871868ec73ff9bde4339f4bc4c753.tar.gz fatcat-scholar-34ffc73485e871868ec73ff9bde4339f4bc4c753.zip |
issue-db: fixes to schema
The primary key on release_counts was resulting in only one row per
pubid.
Also the 'year_in_sim' column was never being used.
-rw-r--r-- | fatcat_scholar/issue_db.py | 5 | ||||
-rw-r--r-- | schema/issue_db.sql | 10 |
2 files changed, 6 insertions, 9 deletions
diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index 4d6fe04..d6bc3f9 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -99,7 +99,6 @@ class SimIssueRow: @dataclass class ReleaseCountsRow: sim_pubid: str - year_in_sim: bool release_count: int year: Optional[int] volume: Optional[str] @@ -109,7 +108,6 @@ class ReleaseCountsRow: self.sim_pubid, self.year, self.volume, - self.year_in_sim, self.release_count, ) @@ -190,7 +188,7 @@ class IssueDB: if not cur: cur = self.db.cursor() cur.execute( - "INSERT OR REPLACE INTO release_counts VALUES (?,?,?,?,?)", counts.tuple() + "INSERT OR REPLACE INTO release_counts VALUES (?,?,?,?)", counts.tuple() ) def pubid2container(self, sim_pubid: str) -> Optional[str]: @@ -368,7 +366,6 @@ class IssueDB: for agg in aggs: row = ReleaseCountsRow( sim_pubid=sim_pubid, - year_in_sim=False, # TODO release_count=agg["count"], year=agg["year"], volume=agg["volume"], diff --git a/schema/issue_db.sql b/schema/issue_db.sql index e07e97e..422717e 100644 --- a/schema/issue_db.sql +++ b/schema/issue_db.sql @@ -26,10 +26,10 @@ CREATE TABLE IF NOT EXISTS sim_issue ( -- intent here is to capture how many releases are just not getting matched due -- to missing issue metadata CREATE TABLE IF NOT EXISTS release_counts ( - sim_pubid TEXT NOT NULL PRIMARY KEY, - year TEXT, - volume TEXT, - year_in_sim BOOLEAN, - release_count INTEGER, + sim_pubid TEXT NOT NULL, + year TEXT NOT NULL, + volume TEXT NOT NULL, + release_count INTEGER NOT NULL, + PRIMARY KEY(sim_pubid, year, volume), FOREIGN KEY(sim_pubid) REFERENCES sim_pub(sim_pubid) ); |