From 0abb779be2cd6fc913f3c57d891b040b40baf6c3 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 16 May 2020 19:52:17 -0700 Subject: initial progress on work pipeline --- fatcat_scholar/issue_db.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'fatcat_scholar/issue_db.py') diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index 0d33e17..5278750 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -44,6 +44,19 @@ class SimIssueRow: def tuple(self): return (self.issue_item, self.sim_pubid, self.year, self.volume, self.issue, self.first_page, self.last_page, self.release_count) + @classmethod + def from_tuple(self, row: Any): + return SimIssueRow( + issue_item=row[0], + sim_pubid=row[1], + year=row[2], + volume=row[3], + issue=row[4], + first_page=row[5], + last_page=row[6], + release_count=row[7], + ) + @dataclass class ReleaseCountsRow: sim_pubid: str @@ -95,6 +108,7 @@ class IssueDB(): """ self.db = sqlite3.connect(db_file, isolation_level='EXCLUSIVE') self._pubid2container_map: Dict[str, Optional[str]] = dict() + self._container2pubid_map: Dict[str, Optional[str]] = dict() def init_db(self): self.db.executescript(""" @@ -135,6 +149,23 @@ class IssueDB(): self._pubid2container_map[sim_pubid] = None return None + def container2pubid(self, container_ident: str) -> Optional[str]: + if container_ident in self._container2pubid_map: + return self._container2pubid_map[container_ident] + row = list(self.db.execute("SELECT sim_pubid FROM sim_pub WHERE container_ident = ?;", [container_ident])) + if row: + self._container2pubid_map[container_ident] = row[0][0] + return row[0][0] + else: + self._pubid2container_map[container_ident] = None + return None + + def lookup_issue(self, sim_pubid: str, volume: str, issue: str) -> Optional[SimIssueRow]: + row = list(self.db.execute("SELECT * FROM sim_issue WHERE sim_pubid = ? AND volume = ? AND issue = ?;", [sim_pubid, volume, issue])) + if not row: + return None + return SimIssueRow.from_tuple(row[0]) + def load_pubs(self, json_lines: Sequence[str], api: Any): """ Reads a file (or some other iterator) of JSON lines, parses them into a -- cgit v1.2.3