diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-05-16 19:52:17 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-05-16 19:55:17 -0700 |
commit | 0abb779be2cd6fc913f3c57d891b040b40baf6c3 (patch) | |
tree | 7fe8f572963bbfd19e34fde1270b2d2aa82ed49f /fatcat_scholar/issue_db.py | |
parent | 0d9c230bd74a94006a6ff9e9e32be7ea8a6b51ac (diff) | |
download | fatcat-scholar-0abb779be2cd6fc913f3c57d891b040b40baf6c3.tar.gz fatcat-scholar-0abb779be2cd6fc913f3c57d891b040b40baf6c3.zip |
initial progress on work pipeline
Diffstat (limited to 'fatcat_scholar/issue_db.py')
-rw-r--r-- | fatcat_scholar/issue_db.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index 0d33e17..5278750 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -44,6 +44,19 @@ class SimIssueRow: def tuple(self): return (self.issue_item, self.sim_pubid, self.year, self.volume, self.issue, self.first_page, self.last_page, self.release_count) + @classmethod + def from_tuple(self, row: Any): + return SimIssueRow( + issue_item=row[0], + sim_pubid=row[1], + year=row[2], + volume=row[3], + issue=row[4], + first_page=row[5], + last_page=row[6], + release_count=row[7], + ) + @dataclass class ReleaseCountsRow: sim_pubid: str @@ -95,6 +108,7 @@ class IssueDB(): """ self.db = sqlite3.connect(db_file, isolation_level='EXCLUSIVE') self._pubid2container_map: Dict[str, Optional[str]] = dict() + self._container2pubid_map: Dict[str, Optional[str]] = dict() def init_db(self): self.db.executescript(""" @@ -135,6 +149,23 @@ class IssueDB(): self._pubid2container_map[sim_pubid] = None return None + def container2pubid(self, container_ident: str) -> Optional[str]: + if container_ident in self._container2pubid_map: + return self._container2pubid_map[container_ident] + row = list(self.db.execute("SELECT sim_pubid FROM sim_pub WHERE container_ident = ?;", [container_ident])) + if row: + self._container2pubid_map[container_ident] = row[0][0] + return row[0][0] + else: + self._pubid2container_map[container_ident] = None + return None + + def lookup_issue(self, sim_pubid: str, volume: str, issue: str) -> Optional[SimIssueRow]: + row = list(self.db.execute("SELECT * FROM sim_issue WHERE sim_pubid = ? AND volume = ? AND issue = ?;", [sim_pubid, volume, issue])) + if not row: + return None + return SimIssueRow.from_tuple(row[0]) + def load_pubs(self, json_lines: Sequence[str], api: Any): """ Reads a file (or some other iterator) of JSON lines, parses them into a |