aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-06-01 02:20:13 -0700
committerBryan Newbold <bnewbold@archive.org>2021-06-02 00:38:51 -0700
commita8d75f256d92da3a05a4f8d080f3d518f4a2b12e (patch)
treea1d49985149a0f09e2d91e9c98d4e852d454273f
parent4a40c62f6616825342bb23d03b9c4b9eebfe809c (diff)
downloadfatcat-scholar-a8d75f256d92da3a05a4f8d080f3d518f4a2b12e.tar.gz
fatcat-scholar-a8d75f256d92da3a05a4f8d080f3d518f4a2b12e.zip
lint fixes, and run fmt
-rw-r--r--fatcat_scholar/sandcrawler.py4
-rw-r--r--fatcat_scholar/work_pipeline.py14
-rw-r--r--fatcat_scholar/worker.py10
-rw-r--r--tests/test_work_pipeline.py5
4 files changed, 11 insertions, 22 deletions
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py
index 9b033b8..0501f8e 100644
--- a/fatcat_scholar/sandcrawler.py
+++ b/fatcat_scholar/sandcrawler.py
@@ -39,9 +39,7 @@ class SandcrawlerPostgrestClient:
return None
def get_crossref(self, doi: str) -> Optional[Dict[str, Any]]:
- resp = requests.get(
- self.api_url + "/crossref", params=dict(doi="eq." + doi)
- )
+ resp = requests.get(self.api_url + "/crossref", params=dict(doi="eq." + doi))
resp.raise_for_status()
resp_json = resp.json()
if resp_json:
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index b90b747..2b09821 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -218,9 +218,7 @@ class WorkPipeline:
webcapture_ident=wc.ident,
)
- def fetch_crossref(
- self, re: ReleaseEntity
- ) -> Optional[Dict[str, Any]]:
+ def fetch_crossref(self, re: ReleaseEntity) -> Optional[Dict[str, Any]]:
"""
Fetches (cached) crossref metadata JSON from sandcrawler-db via
postgrest HTTP interface.
@@ -234,16 +232,18 @@ class WorkPipeline:
if not re.ext_ids.doi:
# can't do lookup without a DOI
return None
- if re.extra and (not re.extra.get('crossref')) and (re.extra.get('datacite') or re.extra.get('jalc')):
+ if (
+ re.extra
+ and (not re.extra.get("crossref"))
+ and (re.extra.get("datacite") or re.extra.get("jalc"))
+ ):
# if this is definitely a Datacite or JALC DOI, can skip the Crossref cache lookup
return None
doi = re.ext_ids.doi.lower()
crossref_meta = self.sandcrawler_db_client.get_crossref(doi)
if not crossref_meta or not crossref_meta.get("record"):
return None
- return dict(
- release_ident=re.ident, doi=doi, record=crossref_meta["record"],
- )
+ return dict(release_ident=re.ident, doi=doi, record=crossref_meta["record"],)
def lookup_sim(self, release: ReleaseEntity) -> Optional[SimIssueRow]:
"""
diff --git a/fatcat_scholar/worker.py b/fatcat_scholar/worker.py
index 7787d42..7d2b3d6 100644
--- a/fatcat_scholar/worker.py
+++ b/fatcat_scholar/worker.py
@@ -1,6 +1,5 @@
import os
import sys
-import json
import argparse
import datetime
from typing import List, Any
@@ -10,7 +9,6 @@ import sentry_sdk
import elasticsearch
import elasticsearch.helpers
import fatcat_openapi_client
-from fatcat_openapi_client import ReleaseEntity
from fatcat_scholar.config import settings, GIT_REVISION
from fatcat_scholar.issue_db import IssueDB
@@ -18,12 +16,8 @@ from fatcat_scholar.sandcrawler import (
SandcrawlerPostgrestClient,
SandcrawlerMinioClient,
)
-from fatcat_scholar.schema import (
- DocType,
- IntermediateBundle,
-)
+from fatcat_scholar.schema import IntermediateBundle
from fatcat_scholar.transform import transform_heavy
-from fatcat_scholar.api_entities import entity_from_json
from fatcat_scholar.work_pipeline import WorkPipeline
from fatcat_scholar.sim_pipeline import SimPipeline
from fatcat_scholar.kafka import KafkaWorker
@@ -124,7 +118,7 @@ class IndexDocsWorker(KafkaWorker):
bulk_actions = []
for obj in batch:
bundle = IntermediateBundle.from_json(obj)
- assert bundle.get('doc_type')
+ assert bundle.doc_type
es_doc = transform_heavy(bundle)
if not es_doc:
continue
diff --git a/tests/test_work_pipeline.py b/tests/test_work_pipeline.py
index bf423b7..977a708 100644
--- a/tests/test_work_pipeline.py
+++ b/tests/test_work_pipeline.py
@@ -77,10 +77,7 @@ def test_run_transform(mocker: Any) -> None:
{
"doi": "10.7717/peerj.4375",
"indexed": "2020-07-07T02:15:52.98309+00:00",
- "record": {
- "title": "something",
- "TODO_better_object": 3,
- },
+ "record": {"title": "something", "TODO_better_object": 3,},
}
],
)