diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-06-01 02:20:13 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-06-02 00:38:51 -0700 |
commit | a8d75f256d92da3a05a4f8d080f3d518f4a2b12e (patch) | |
tree | a1d49985149a0f09e2d91e9c98d4e852d454273f /fatcat_scholar | |
parent | 4a40c62f6616825342bb23d03b9c4b9eebfe809c (diff) | |
download | fatcat-scholar-a8d75f256d92da3a05a4f8d080f3d518f4a2b12e.tar.gz fatcat-scholar-a8d75f256d92da3a05a4f8d080f3d518f4a2b12e.zip |
lint fixes, and run fmt
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/sandcrawler.py | 4 | ||||
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 14 | ||||
-rw-r--r-- | fatcat_scholar/worker.py | 10 |
3 files changed, 10 insertions, 18 deletions
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py index 9b033b8..0501f8e 100644 --- a/fatcat_scholar/sandcrawler.py +++ b/fatcat_scholar/sandcrawler.py @@ -39,9 +39,7 @@ class SandcrawlerPostgrestClient: return None def get_crossref(self, doi: str) -> Optional[Dict[str, Any]]: - resp = requests.get( - self.api_url + "/crossref", params=dict(doi="eq." + doi) - ) + resp = requests.get(self.api_url + "/crossref", params=dict(doi="eq." + doi)) resp.raise_for_status() resp_json = resp.json() if resp_json: diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index b90b747..2b09821 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -218,9 +218,7 @@ class WorkPipeline: webcapture_ident=wc.ident, ) - def fetch_crossref( - self, re: ReleaseEntity - ) -> Optional[Dict[str, Any]]: + def fetch_crossref(self, re: ReleaseEntity) -> Optional[Dict[str, Any]]: """ Fetches (cached) crossref metadata JSON from sandcrawler-db via postgrest HTTP interface. @@ -234,16 +232,18 @@ class WorkPipeline: if not re.ext_ids.doi: # can't do lookup without a DOI return None - if re.extra and (not re.extra.get('crossref')) and (re.extra.get('datacite') or re.extra.get('jalc')): + if ( + re.extra + and (not re.extra.get("crossref")) + and (re.extra.get("datacite") or re.extra.get("jalc")) + ): # if this is definitely a Datacite or JALC DOI, can skip the Crossref cache lookup return None doi = re.ext_ids.doi.lower() crossref_meta = self.sandcrawler_db_client.get_crossref(doi) if not crossref_meta or not crossref_meta.get("record"): return None - return dict( - release_ident=re.ident, doi=doi, record=crossref_meta["record"], - ) + return dict(release_ident=re.ident, doi=doi, record=crossref_meta["record"],) def lookup_sim(self, release: ReleaseEntity) -> Optional[SimIssueRow]: """ diff --git a/fatcat_scholar/worker.py b/fatcat_scholar/worker.py index 7787d42..7d2b3d6 100644 --- a/fatcat_scholar/worker.py +++ b/fatcat_scholar/worker.py @@ -1,6 +1,5 @@ import os import sys -import json import argparse import datetime from typing import List, Any @@ -10,7 +9,6 @@ import sentry_sdk import elasticsearch import elasticsearch.helpers import fatcat_openapi_client -from fatcat_openapi_client import ReleaseEntity from fatcat_scholar.config import settings, GIT_REVISION from fatcat_scholar.issue_db import IssueDB @@ -18,12 +16,8 @@ from fatcat_scholar.sandcrawler import ( SandcrawlerPostgrestClient, SandcrawlerMinioClient, ) -from fatcat_scholar.schema import ( - DocType, - IntermediateBundle, -) +from fatcat_scholar.schema import IntermediateBundle from fatcat_scholar.transform import transform_heavy -from fatcat_scholar.api_entities import entity_from_json from fatcat_scholar.work_pipeline import WorkPipeline from fatcat_scholar.sim_pipeline import SimPipeline from fatcat_scholar.kafka import KafkaWorker @@ -124,7 +118,7 @@ class IndexDocsWorker(KafkaWorker): bulk_actions = [] for obj in batch: bundle = IntermediateBundle.from_json(obj) - assert bundle.get('doc_type') + assert bundle.doc_type es_doc = transform_heavy(bundle) if not es_doc: continue |