aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-12-02 17:54:48 -0800
committerBryan Newbold <bnewbold@archive.org>2021-12-02 17:54:50 -0800
commit34ffc73485e871868ec73ff9bde4339f4bc4c753 (patch)
tree05383713db93b416f051c97b12bd475b1b93add9
parent25d1afe6b22eccbf86de867ec86a294f2771faff (diff)
downloadfatcat-scholar-34ffc73485e871868ec73ff9bde4339f4bc4c753.tar.gz
fatcat-scholar-34ffc73485e871868ec73ff9bde4339f4bc4c753.zip
issue-db: fixes to schema
The primary key on release_counts was resulting in only one row per pubid. Also the 'year_in_sim' column was never being used.
-rw-r--r--fatcat_scholar/issue_db.py5
-rw-r--r--schema/issue_db.sql10
2 files changed, 6 insertions, 9 deletions
diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py
index 4d6fe04..d6bc3f9 100644
--- a/fatcat_scholar/issue_db.py
+++ b/fatcat_scholar/issue_db.py
@@ -99,7 +99,6 @@ class SimIssueRow:
@dataclass
class ReleaseCountsRow:
sim_pubid: str
- year_in_sim: bool
release_count: int
year: Optional[int]
volume: Optional[str]
@@ -109,7 +108,6 @@ class ReleaseCountsRow:
self.sim_pubid,
self.year,
self.volume,
- self.year_in_sim,
self.release_count,
)
@@ -190,7 +188,7 @@ class IssueDB:
if not cur:
cur = self.db.cursor()
cur.execute(
- "INSERT OR REPLACE INTO release_counts VALUES (?,?,?,?,?)", counts.tuple()
+ "INSERT OR REPLACE INTO release_counts VALUES (?,?,?,?)", counts.tuple()
)
def pubid2container(self, sim_pubid: str) -> Optional[str]:
@@ -368,7 +366,6 @@ class IssueDB:
for agg in aggs:
row = ReleaseCountsRow(
sim_pubid=sim_pubid,
- year_in_sim=False, # TODO
release_count=agg["count"],
year=agg["year"],
volume=agg["volume"],
diff --git a/schema/issue_db.sql b/schema/issue_db.sql
index e07e97e..422717e 100644
--- a/schema/issue_db.sql
+++ b/schema/issue_db.sql
@@ -26,10 +26,10 @@ CREATE TABLE IF NOT EXISTS sim_issue (
-- intent here is to capture how many releases are just not getting matched due
-- to missing issue metadata
CREATE TABLE IF NOT EXISTS release_counts (
- sim_pubid TEXT NOT NULL PRIMARY KEY,
- year TEXT,
- volume TEXT,
- year_in_sim BOOLEAN,
- release_count INTEGER,
+ sim_pubid TEXT NOT NULL,
+ year TEXT NOT NULL,
+ volume TEXT NOT NULL,
+ release_count INTEGER NOT NULL,
+ PRIMARY KEY(sim_pubid, year, volume),
FOREIGN KEY(sim_pubid) REFERENCES sim_pub(sim_pubid)
);