diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-01-26 00:53:17 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-01-26 00:53:30 -0800 |
commit | 2995379f558e8f5c2712bb17467586644d2d2fb4 (patch) | |
tree | 74c2aee2064d30f2d3f6b4d08371576e08965d8f /fatcat_scholar/issue_db.py | |
parent | 401e133df4411d343a6f7a47ea1087f2dfbb2ae4 (diff) | |
download | fatcat-scholar-2995379f558e8f5c2712bb17467586644d2d2fb4.tar.gz fatcat-scholar-2995379f558e8f5c2712bb17467586644d2d2fb4.zip |
issue-db: bug fixes; refactors
Diffstat (limited to 'fatcat_scholar/issue_db.py')
-rw-r--r-- | fatcat_scholar/issue_db.py | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index e01c6d9..3c55b51 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -124,7 +124,7 @@ def es_issue_count( .filter("term", issue=issue) .extra(request_cache=True) ) - search = search.params(request_cache="true") + search = search.params() return search.count() @@ -173,6 +173,7 @@ class IssueDB: def insert_sim_pub(self, pub: SimPubRow, cur: Any = None) -> None: if not cur: cur = self.db.cursor() + # print(pub.tuple(), file=sys.stderr) cur.execute( "INSERT OR REPLACE INTO sim_pub VALUES (?,?,?,?,?,?,?,?,?)", pub.tuple() ) @@ -258,7 +259,7 @@ class IssueDB: meta = obj["metadata"] assert "periodicals" in meta["collection"] container: Optional[fatcat_openapi_client.ContainerEntity] = None - if meta.get("issn"): + if meta.get("issn") and len(meta["issn"]) == 9: try: container = api.lookup_container(issnl=meta["issn"]) except fatcat_openapi_client.ApiException as ae: @@ -275,6 +276,8 @@ class IssueDB: container_ident=container and container.ident, wikidata_qid=container and container.wikidata_qid, ) + if isinstance(row.publisher, list): + row.publisher = row.publisher[0] self.insert_sim_pub(row, cur) cur.close() self.db.commit() @@ -350,8 +353,16 @@ class IssueDB: "SELECT sim_pubid, container_ident FROM sim_pub WHERE container_ident IS NOT NULL;" ) ) + print( + f"Loading fatcat container counts for {len(all_pub_containers)} entities...", + file=sys.stderr, + ) cur: Any = self.db.cursor() + count = 0 for (sim_pubid, container_ident) in all_pub_containers: + count += 1 + if count % 500 == 0: + print(f" {count}...", file=sys.stderr) aggs = es_container_aggs(es_client, container_ident) for agg in aggs: row = ReleaseCountsRow( |