aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/api_auth.py15
-rw-r--r--python/fatcat_tools/fcid.py14
-rw-r--r--python/fatcat_tools/kafka.py10
-rw-r--r--python/fatcat_tools/normal.py34
-rw-r--r--python/fatcat_tools/references.py8
-rw-r--r--python/fatcat_tools/reviewers/review_common.py87
-rw-r--r--python/fatcat_tools/transforms/csl.py14
-rw-r--r--python/fatcat_tools/transforms/entities.py4
-rw-r--r--python/fatcat_tools/transforms/ingest.py12
9 files changed, 129 insertions, 69 deletions
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
index d8f0c46d..5eba583e 100644
--- a/python/fatcat_tools/api_auth.py
+++ b/python/fatcat_tools/api_auth.py
@@ -1,27 +1,28 @@
import os
import sys
+from typing import Optional
-import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, Configuration, DefaultApi
-def public_api(host_uri):
+def public_api(host_uri: str) -> DefaultApi:
"""
Note: unlike the authenticated variant, this helper might get called even
if the API isn't going to be used, so it's important that it doesn't try to
actually connect to the API host or something.
"""
- conf = fatcat_openapi_client.Configuration()
+ conf = Configuration()
conf.host = host_uri
- return fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(conf))
+ return DefaultApi(ApiClient(conf))
-def authenticated_api(host_uri, token=None):
+def authenticated_api(host_uri: str, token: Optional[str] = None) -> DefaultApi:
"""
Note: if this helper is called, it's implied that an actual API connection
is needed, so it does try to connect and verify credentials.
"""
- conf = fatcat_openapi_client.Configuration()
+ conf = Configuration()
conf.host = host_uri
if not token:
token = os.environ["FATCAT_API_AUTH_TOKEN"]
@@ -33,7 +34,7 @@ def authenticated_api(host_uri, token=None):
conf.api_key["Authorization"] = token
conf.api_key_prefix["Authorization"] = "Bearer"
- api = fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(conf))
+ api = DefaultApi(ApiClient(conf))
# verify up front that auth is working
api.auth_check()
diff --git a/python/fatcat_tools/fcid.py b/python/fatcat_tools/fcid.py
index 53891e5a..07463f62 100644
--- a/python/fatcat_tools/fcid.py
+++ b/python/fatcat_tools/fcid.py
@@ -2,17 +2,17 @@ import base64
import uuid
-def fcid2uuid(s):
+def fcid2uuid(fcid: str) -> str:
"""
Converts a fatcat identifier (base32 encoded string) to a uuid.UUID object
"""
- s = s.split("_")[-1].upper().encode("utf-8")
- assert len(s) == 26
- raw = base64.b32decode(s + b"======")
- return str(uuid.UUID(bytes=raw)).lower()
+ b = fcid.split("_")[-1].upper().encode("utf-8")
+ assert len(b) == 26
+ raw_bytes = base64.b32decode(b + b"======")
+ return str(uuid.UUID(bytes=raw_bytes)).lower()
-def uuid2fcid(s):
+def uuid2fcid(s: str) -> str:
"""
Converts a uuid.UUID object to a fatcat identifier (base32 encoded string)
"""
@@ -20,6 +20,6 @@ def uuid2fcid(s):
return base64.b32encode(raw)[:26].lower().decode("utf-8")
-def test_fcid():
+def test_fcid() -> None:
test_uuid = "00000000-0000-0000-3333-000000000001"
assert test_uuid == fcid2uuid(uuid2fcid(test_uuid))
diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py
index 2a4451ad..fe9f36e9 100644
--- a/python/fatcat_tools/kafka.py
+++ b/python/fatcat_tools/kafka.py
@@ -1,7 +1,9 @@
+from typing import Any, Optional
+
from confluent_kafka import KafkaException, Producer
-def kafka_fail_fast(err, msg):
+def kafka_fail_fast(err: Optional[Any], _msg: Any) -> None:
if err is not None:
print("Kafka producer delivery error: {}".format(err))
print("Bailing out...")
@@ -9,7 +11,11 @@ def kafka_fail_fast(err, msg):
raise KafkaException(err)
-def simple_kafka_producer(kafka_hosts):
+def simple_kafka_producer(kafka_hosts: str) -> Producer:
+ """
+ kafka_hosts should be a string with hostnames separated by ',', not a list
+ of hostnames
+ """
kafka_config = {
"bootstrap.servers": kafka_hosts,
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index daf47ded..34e5c3d1 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -70,7 +70,7 @@ def clean_doi(raw: Optional[str]) -> Optional[str]:
return raw
-def test_clean_doi():
+def test_clean_doi() -> None:
assert clean_doi("10.1234/asdf ") == "10.1234/asdf"
assert clean_doi("10.1037//0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50"
assert clean_doi("10.1037/0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50"
@@ -117,7 +117,7 @@ def clean_arxiv_id(raw: Optional[str]) -> Optional[str]:
return raw
-def test_clean_arxiv_id():
+def test_clean_arxiv_id() -> None:
assert clean_arxiv_id("0806.2878v1") == "0806.2878v1"
assert clean_arxiv_id("0806.2878") == "0806.2878"
assert clean_arxiv_id("1501.00001v1") == "1501.00001v1"
@@ -146,7 +146,7 @@ def test_clean_arxiv_id():
assert clean_arxiv_id("08062878v1") is None
-def clean_wikidata_qid(raw):
+def clean_wikidata_qid(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip()
@@ -157,7 +157,7 @@ def clean_wikidata_qid(raw):
return None
-def test_clean_wikidata_qid():
+def test_clean_wikidata_qid() -> None:
assert clean_wikidata_qid("Q1234") == "Q1234"
assert clean_wikidata_qid("Q1") == "Q1"
assert clean_wikidata_qid(" Q1234 ") == "Q1234"
@@ -181,7 +181,7 @@ def clean_pmid(raw: Optional[str]) -> Optional[str]:
return None
-def test_clean_pmid():
+def test_clean_pmid() -> None:
assert clean_pmid("1234") == "1234"
assert clean_pmid("1234 ") == "1234"
assert clean_pmid("PMC123") is None
@@ -214,7 +214,7 @@ def clean_sha1(raw: Optional[str]) -> Optional[str]:
return raw
-def test_clean_sha1():
+def test_clean_sha1() -> None:
assert (
clean_sha1("0fba3fba0e1937aa0297de3836b768b5dfb23d7b")
== "0fba3fba0e1937aa0297de3836b768b5dfb23d7b"
@@ -242,7 +242,7 @@ def clean_sha256(raw: Optional[str]) -> Optional[str]:
return raw
-def test_clean_sha256():
+def test_clean_sha256() -> None:
assert (
clean_sha256("6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f")
== "6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f"
@@ -264,7 +264,7 @@ def clean_issn(raw: Optional[str]) -> Optional[str]:
return raw
-def test_clean_issn():
+def test_clean_issn() -> None:
assert clean_issn("1234-4567") == "1234-4567"
assert clean_issn("1234-456X") == "1234-456X"
assert clean_issn("134-4567") is None
@@ -283,7 +283,7 @@ def clean_isbn13(raw: Optional[str]) -> Optional[str]:
return raw
-def test_clean_isbn13():
+def test_clean_isbn13() -> None:
assert clean_isbn13("978-1-56619-909-4") == "978-1-56619-909-4"
assert clean_isbn13("978-1-4028-9462-6") == "978-1-4028-9462-6"
assert clean_isbn13("978-1-56619-909-4 ") == "978-1-56619-909-4"
@@ -302,7 +302,7 @@ def clean_orcid(raw: Optional[str]) -> Optional[str]:
return raw
-def test_clean_orcid():
+def test_clean_orcid() -> None:
assert clean_orcid("0123-4567-3456-6789") == "0123-4567-3456-6789"
assert clean_orcid("0123-4567-3456-678X") == "0123-4567-3456-678X"
assert clean_orcid("0123-4567-3456-6789 ") == "0123-4567-3456-6789"
@@ -313,7 +313,7 @@ def test_clean_orcid():
HDL_REGEX = re.compile(r"^\d+(\.\d+)*/\S+$")
-def clean_hdl(raw):
+def clean_hdl(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip().lower()
@@ -332,7 +332,7 @@ def clean_hdl(raw):
return raw
-def test_clean_hdl():
+def test_clean_hdl() -> None:
assert clean_hdl("20.500.23456/ABC/DUMMY") == "20.500.23456/abc/dummy"
assert clean_hdl("hdl:20.500.23456/ABC/DUMMY") == "20.500.23456/abc/dummy"
assert (
@@ -373,7 +373,7 @@ def clean_str(thing: Optional[str], force_xml: bool = False) -> Optional[str]:
return fixed
-def test_clean_str():
+def test_clean_str() -> None:
assert clean_str(None) is None
assert clean_str("") is None
@@ -384,7 +384,7 @@ def test_clean_str():
assert clean_str("<b>a&amp;b</b>", force_xml=True) == "<b>a&b</b>"
-def b32_hex(s):
+def b32_hex(s: str) -> str:
s = s.strip().split()[0].lower()
if s.startswith("sha1:"):
s = s[5:]
@@ -393,7 +393,7 @@ def b32_hex(s):
return base64.b16encode(base64.b32decode(s.upper())).lower().decode("utf-8")
-def is_cjk(s):
+def is_cjk(s: Optional[str]) -> bool:
if not s:
return False
for c in s:
@@ -403,7 +403,7 @@ def is_cjk(s):
return False
-def test_is_cjk():
+def test_is_cjk() -> None:
assert is_cjk(None) is False
assert is_cjk("") is False
assert is_cjk("blah") is False
@@ -593,7 +593,7 @@ def parse_country_name(s: Optional[str]) -> Optional[str]:
return None
-def test_parse_country_name():
+def test_parse_country_name() -> None:
assert parse_country_name("") is None
assert parse_country_name("asdf blah") is None
assert parse_country_name("us") == "us"
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py
index 624020b5..f41b5973 100644
--- a/python/fatcat_tools/references.py
+++ b/python/fatcat_tools/references.py
@@ -8,7 +8,7 @@ See bulk citation and citation API proposals for design documentation.
import argparse
import datetime
import sys
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
import elasticsearch
from elasticsearch_dsl import Search
@@ -65,7 +65,7 @@ class BiblioRef(BaseModel):
target_unstructured: Optional[str]
target_csl: Optional[Dict[str, Any]]
- def hacks(self):
+ def hacks(self) -> "BiblioRef":
"""
Temporary (?) hacks to work around schema/data issues
"""
@@ -105,7 +105,7 @@ class EnrichedBiblioRef(BaseModel):
@validator("release")
@classmethod
- def check_release(cls, v):
+ def check_release(cls: Any, v: ReleaseEntity) -> ReleaseEntity:
if v is not None and not isinstance(v, ReleaseEntity):
raise ValueError("expected a ReleaseEntity")
return v
@@ -399,7 +399,7 @@ def enrich_outbound_refs(
return enriched
-def run_ref_query(args) -> None:
+def run_ref_query(args: argparse.Namespace) -> None:
"""
CLI helper/debug tool (prints to stdout)
"""
diff --git a/python/fatcat_tools/reviewers/review_common.py b/python/fatcat_tools/reviewers/review_common.py
index 59ff1c4e..d599b31f 100644
--- a/python/fatcat_tools/reviewers/review_common.py
+++ b/python/fatcat_tools/reviewers/review_common.py
@@ -5,6 +5,7 @@ from collections import Counter
from typing import Any, List, Optional
import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, Editgroup, EditgroupAnnotation, EntityEdit
"""
checks should return:
@@ -29,7 +30,13 @@ class CheckResult:
rev = None
check_type = None
- def __init__(self, status, check_type=None, description=None, **kwargs):
+ def __init__(
+ self,
+ status: str,
+ check_type: Optional[str] = None,
+ description: Optional[str] = None,
+ **kwargs
+ ):
self.status = status
self.check_type = check_type
self.description = description
@@ -45,36 +52,64 @@ class EditCheck:
scope: List[Any] = []
name: Optional[str] = None
- def check_editgroup(self, editgroup):
+ def check_editgroup(self, editgroup: fatcat_openapi_client.Editgroup) -> CheckResult:
raise NotImplementedError
- def check_container(self, edit, entity):
+ def check_container(
+ self,
+ edit: EntityEdit,
+ entity: fatcat_openapi_client.ContainerEntity,
+ ) -> CheckResult:
raise NotImplementedError
- def check_creator(self, edit, entity):
+ def check_creator(
+ self,
+ edit: EntityEdit,
+ entity: fatcat_openapi_client.CreatorEntity,
+ ) -> CheckResult:
raise NotImplementedError
- def check_file(self, edit, entity):
+ def check_file(
+ self,
+ edit: EntityEdit,
+ entity: fatcat_openapi_client.FileEntity,
+ ) -> CheckResult:
raise NotImplementedError
- def check_fileset(self, edit, entity):
+ def check_fileset(
+ self,
+ edit: EntityEdit,
+ entity: fatcat_openapi_client.FilesetEntity,
+ ) -> CheckResult:
raise NotImplementedError
- def check_webcapture(self, edit, entity):
+ def check_webcapture(
+ self,
+ edit: EntityEdit,
+ entity: fatcat_openapi_client.WebcaptureEntity,
+ ) -> CheckResult:
raise NotImplementedError
- def check_release(self, edit, entity):
+ def check_release(
+ self,
+ edit: EntityEdit,
+ entity: fatcat_openapi_client.ReleaseEntity,
+ ) -> CheckResult:
raise NotImplementedError
- def check_work(self, edit, work):
+ def check_work(
+ self,
+ edit: EntityEdit,
+ work: fatcat_openapi_client.WorkEntity,
+ ) -> CheckResult:
raise NotImplementedError
class ReviewBot:
- def __init__(self, api, verbose=False, **kwargs):
+ def __init__(self, api: fatcat_openapi_client.ApiClient, verbose: bool = False, **kwargs):
self.api = api
- self.checks = []
+ self.checks: List[EditCheck] = []
self.verbose = verbose
self.extra = kwargs.get("extra", dict())
self.extra["git_rev"] = self.extra.get(
@@ -83,16 +118,18 @@ class ReviewBot:
self.extra["agent"] = self.extra.get("agent", "fatcat_tools.ReviewBot")
self.poll_interval = kwargs.get("poll_interval", 10.0)
- def run_single(self, editgroup_id, annotate=True):
+ def run_single(self, editgroup_id: str, annotate: bool = True) -> CheckResult:
eg = self.api.get_editgroup(editgroup_id)
annotation = self.review_editgroup(eg)
if annotate:
self.api.create_editgroup_annotation(eg.editgroup_id, annotation)
return annotation
- def run(self, since=None):
- if since is None:
+ def run(self, start_since: Optional[datetime.datetime] = None) -> None:
+ if start_since is None:
since = datetime.datetime.utcnow()
+ else:
+ since = start_since
while True:
# XXX: better isoformat conversion?
eg_list = self.api.get_editgroups_reviewable(
@@ -116,7 +153,7 @@ class ReviewBot:
# editgroups in the same second)
since = since + datetime.timedelta(seconds=1)
- def review_editgroup(self, editgroup):
+ def review_editgroup(self, editgroup: Editgroup) -> EditgroupAnnotation:
results = self.run_checks(editgroup)
result_counts = self.result_counts(results)
disposition = self.disposition(results)
@@ -159,20 +196,20 @@ class ReviewBot:
)
return annotation
- def result_counts(self, results):
- counts = Counter()
+ def result_counts(self, results: List[CheckResult]) -> Counter:
+ counts: Counter = Counter()
for result in results:
counts["total"] += 1
counts[result.status] += 1
return counts
- def disposition(self, results):
+ def disposition(self, results: List[CheckResult]) -> str:
"""
Returns one of: accept, revise, reject
"""
raise NotImplementedError
- def run_checks(self, editgroup):
+ def run_checks(self, editgroup: Editgroup) -> List[CheckResult]:
results = []
@@ -222,7 +259,7 @@ class DummyCheck(EditCheck):
scope = ["editgroup", "work"]
name = "DummyCheck"
- def check_editgroup(self, editgroup):
+ def check_editgroup(self, editgroup: Editgroup) -> CheckResult:
return CheckResult(
"pass",
"editgroup",
@@ -231,7 +268,11 @@ class DummyCheck(EditCheck):
),
)
- def check_work(self, entity, edit):
+ def check_work(
+ self,
+ edit: EntityEdit,
+ work: fatcat_openapi_client.WorkEntity,
+ ) -> CheckResult:
return CheckResult("pass", "work", "this work edit is beautiful")
@@ -240,9 +281,9 @@ class DummyReviewBot(ReviewBot):
This bot reviews everything and always passes.
"""
- def __init__(self, api, **kwargs):
+ def __init__(self, api: ApiClient, **kwargs):
super().__init__(api, **kwargs)
self.checks = [DummyCheck()]
- def disposition(self, results):
+ def disposition(self, results: List[CheckResult]) -> str:
return "accept"
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index 2b39068a..03410ffb 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -1,4 +1,5 @@
import json
+from typing import Any, Dict, List
from citeproc import (
Citation,
@@ -9,20 +10,21 @@ from citeproc import (
)
from citeproc.source.json import CiteProcJSON
from citeproc_styles import get_style_filepath
+from fatcat_openapi_client import ReleaseContrib, ReleaseEntity
-def contribs_by_role(contribs, role):
+def contribs_by_role(contribs: List[ReleaseContrib], role: str) -> List[ReleaseContrib]:
ret = [c.copy() for c in contribs if c["role"] == role]
[c.pop("role") for c in ret]
# TODO: some note to self here
[c.pop("literal") for c in ret if "literal" in c]
if not ret:
- return None
+ return []
else:
return ret
-def release_to_csl(entity):
+def release_to_csl(entity: ReleaseEntity) -> Dict[str, Any]:
"""
Returns a python dict which can be json.dumps() to get a CSL-JSON (aka,
citeproc-JSON, aka Citation Style Language JSON)
@@ -188,9 +190,9 @@ def release_to_csl(entity):
return csl
-def refs_to_csl(entity):
+def refs_to_csl(entity: ReleaseEntity) -> List[Dict[str, Any]]:
ret = []
- for ref in entity.refs:
+ for ref in entity.refs or []:
if ref.release_id and False:
# TODO: fetch full entity from API and convert with release_to_csl
raise NotImplementedError
@@ -207,7 +209,7 @@ def refs_to_csl(entity):
return ret
-def citeproc_csl(csl_json, style, html=False):
+def citeproc_csl(csl_json: Dict[str, Any], style: str, html: bool = False) -> str:
"""
Renders a release entity to a styled citation.
diff --git a/python/fatcat_tools/transforms/entities.py b/python/fatcat_tools/transforms/entities.py
index ee4017d8..e5da633f 100644
--- a/python/fatcat_tools/transforms/entities.py
+++ b/python/fatcat_tools/transforms/entities.py
@@ -36,7 +36,9 @@ def entity_from_json(
return api_client.deserialize(thing, entity_type)
-def entity_from_dict(obj: Mapping[str, Any], entity_type, api_client=None):
+def entity_from_dict(
+ obj: Mapping[str, Any], entity_type: Any, api_client: Optional[ApiClient] = None
+) -> Any:
json_str = json.dumps(obj)
return entity_from_json(json_str, entity_type, api_client=api_client)
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py
index 30b5b190..cbf9e9bf 100644
--- a/python/fatcat_tools/transforms/ingest.py
+++ b/python/fatcat_tools/transforms/ingest.py
@@ -1,4 +1,8 @@
-INGEST_TYPE_CONTAINER_MAP = {
+from typing import Any, Dict, Optional
+
+from fatcat_openapi_client import ReleaseEntity
+
+INGEST_TYPE_CONTAINER_MAP: Dict[str, str] = {
# Optica
"twtpsm6ytje3nhuqfu3pa7ca7u": "html",
# Optics Express
@@ -14,7 +18,11 @@ INGEST_TYPE_CONTAINER_MAP = {
}
-def release_ingest_request(release, ingest_request_source="fatcat", ingest_type=None):
+def release_ingest_request(
+ release: ReleaseEntity,
+ ingest_request_source: str = "fatcat",
+ ingest_type: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
"""
Takes a full release entity object and returns an ingest request (as dict),
or None if it seems like this release shouldn't be ingested.