aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fatcat_scholar/api_entities.py2
-rw-r--r--fatcat_scholar/config.py1
-rw-r--r--fatcat_scholar/djvu.py4
-rwxr-xr-xfatcat_scholar/grobid2json.py4
-rw-r--r--fatcat_scholar/hacks.py2
-rw-r--r--fatcat_scholar/issue_db.py9
-rw-r--r--fatcat_scholar/kafka.py6
-rw-r--r--fatcat_scholar/query_citation.py11
-rw-r--r--fatcat_scholar/query_fatcat.py10
-rw-r--r--fatcat_scholar/sandcrawler.py3
-rw-r--r--fatcat_scholar/schema.py13
-rw-r--r--fatcat_scholar/search.py19
-rw-r--r--fatcat_scholar/sim_pipeline.py17
-rw-r--r--fatcat_scholar/transform.py10
-rw-r--r--fatcat_scholar/web.py31
-rw-r--r--fatcat_scholar/work_pipeline.py25
-rw-r--r--fatcat_scholar/worker.py18
-rw-r--r--tests/test_djvu_parse.py1
-rw-r--r--tests/test_issue_db.py2
-rw-r--r--tests/test_refs_transform.py3
-rw-r--r--tests/test_scrub.py2
-rw-r--r--tests/test_transform.py2
-rw-r--r--tests/test_web.py2
-rw-r--r--tests/test_work_pipeline.py4
24 files changed, 101 insertions, 100 deletions
diff --git a/fatcat_scholar/api_entities.py b/fatcat_scholar/api_entities.py
index 605a7ac..0664718 100644
--- a/fatcat_scholar/api_entities.py
+++ b/fatcat_scholar/api_entities.py
@@ -1,5 +1,5 @@
-import json
import collections
+import json
from typing import Any, Optional
from fatcat_openapi_client import ApiClient
diff --git a/fatcat_scholar/config.py b/fatcat_scholar/config.py
index 2619df0..ddb2844 100644
--- a/fatcat_scholar/config.py
+++ b/fatcat_scholar/config.py
@@ -1,4 +1,5 @@
import subprocess
+
from dynaconf import Dynaconf
settings = Dynaconf(settings_file="settings.toml", environments=True,)
diff --git a/fatcat_scholar/djvu.py b/fatcat_scholar/djvu.py
index 3df61dd..58d6761 100644
--- a/fatcat_scholar/djvu.py
+++ b/fatcat_scholar/djvu.py
@@ -1,6 +1,6 @@
-from io import StringIO
-from typing import List, Dict, Optional
import xml.etree.ElementTree as ET
+from io import StringIO
+from typing import Dict, List, Optional
def djvu_extract_leaf_texts(
diff --git a/fatcat_scholar/grobid2json.py b/fatcat_scholar/grobid2json.py
index 5c44953..c99b9ed 100755
--- a/fatcat_scholar/grobid2json.py
+++ b/fatcat_scholar/grobid2json.py
@@ -25,11 +25,11 @@ Prints JSON to stdout, errors to stderr
This file copied from the sandcrawler repository.
"""
+import argparse
import io
import json
-import argparse
import xml.etree.ElementTree as ET
-from typing import List, Any, Dict, AnyStr, Optional
+from typing import Any, AnyStr, Dict, List, Optional
xml_ns = "http://www.w3.org/XML/1998/namespace"
ns = "http://www.tei-c.org/ns/1.0"
diff --git a/fatcat_scholar/hacks.py b/fatcat_scholar/hacks.py
index 29274c9..541edda 100644
--- a/fatcat_scholar/hacks.py
+++ b/fatcat_scholar/hacks.py
@@ -1,6 +1,6 @@
import typing
-import jinja2
+import jinja2
from starlette.background import BackgroundTask
from starlette.templating import _TemplateResponse
diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py
index 3c55b51..3ca6b79 100644
--- a/fatcat_scholar/issue_db.py
+++ b/fatcat_scholar/issue_db.py
@@ -1,11 +1,12 @@
-import sys
+import argparse
import json
import sqlite3
-import argparse
+import sys
from dataclasses import dataclass
-from typing import List, Dict, Optional, Any, Sequence, Tuple
-import fatcat_openapi_client
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
import elasticsearch
+import fatcat_openapi_client
from elasticsearch_dsl import Search
from fatcat_scholar.config import settings
diff --git a/fatcat_scholar/kafka.py b/fatcat_scholar/kafka.py
index f412f8a..71067c1 100644
--- a/fatcat_scholar/kafka.py
+++ b/fatcat_scholar/kafka.py
@@ -1,10 +1,10 @@
-import sys
import json
import signal
+import sys
from collections import Counter
-from typing import List, Any
+from typing import Any, List
-from confluent_kafka import Consumer, Producer, KafkaException
+from confluent_kafka import Consumer, KafkaException, Producer
class KafkaWorker:
diff --git a/fatcat_scholar/query_citation.py b/fatcat_scholar/query_citation.py
index 4f40c73..3f741f0 100644
--- a/fatcat_scholar/query_citation.py
+++ b/fatcat_scholar/query_citation.py
@@ -12,16 +12,16 @@ parallel with "regular" query?
import io
import sys
-from typing import Optional, Any, Tuple
import xml.etree.ElementTree as ET
+from typing import Any, Optional, Tuple
-import requests
-from fatcat_openapi_client import ReleaseEntity, ReleaseExtIds, ReleaseContrib
-from fatcat_scholar.api_entities import entity_to_dict
-from fuzzycat.matching import match_release_fuzzy
import fuzzycat.common
import fuzzycat.verify
+import requests
+from fatcat_openapi_client import ReleaseContrib, ReleaseEntity, ReleaseExtIds
+from fuzzycat.matching import match_release_fuzzy
+from fatcat_scholar.api_entities import entity_to_dict
from fatcat_scholar.grobid2json import biblio_info
@@ -172,6 +172,7 @@ if __name__ == "__main__":
Demo showing how to integrate the above functions together.
"""
import os
+
import elasticsearch
import fatcat_openapi_client
diff --git a/fatcat_scholar/query_fatcat.py b/fatcat_scholar/query_fatcat.py
index edac35d..45c7e47 100644
--- a/fatcat_scholar/query_fatcat.py
+++ b/fatcat_scholar/query_fatcat.py
@@ -1,14 +1,14 @@
+import argparse
+import json
import os
import sys
-import json
-import argparse
-from typing import List, Any
+from typing import Any, List
+import elasticsearch
import requests
+from elasticsearch_dsl import Q, Search
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error
-import elasticsearch
-from elasticsearch_dsl import Search, Q
def requests_retry_session(
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py
index 0501f8e..9f9f7e4 100644
--- a/fatcat_scholar/sandcrawler.py
+++ b/fatcat_scholar/sandcrawler.py
@@ -1,6 +1,7 @@
+from typing import Any, Dict, Optional
+
import minio
import requests
-from typing import Dict, Optional, Any
class SandcrawlerPostgrestClient:
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index 0fcf56e..4230b7e 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -4,23 +4,24 @@ get serialization for free with those. This is useful for things like
auto-conversion of datetime objects.
"""
-import re
-import json
import datetime
+import json
+import re
from enum import Enum
-from typing import Optional, List, Any, Dict
+from typing import Any, Dict, List, Optional
import ftfy
from bs4 import BeautifulSoup
+from fatcat_openapi_client import ReleaseContrib, ReleaseEntity
# pytype: disable=import-error
from pydantic import BaseModel
+from fatcat_scholar.api_entities import entity_from_json, entity_to_dict
+from fatcat_scholar.biblio_hacks import doi_link_domain
+
# pytype: enable=import-error
-from fatcat_openapi_client import ReleaseEntity, ReleaseContrib
-from fatcat_scholar.api_entities import entity_to_dict, entity_from_json
-from fatcat_scholar.biblio_hacks import doi_link_domain
class DocType(str, Enum):
diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py
index dccaf07..6522fe3 100644
--- a/fatcat_scholar/search.py
+++ b/fatcat_scholar/search.py
@@ -3,27 +3,28 @@ Helpers to make elasticsearch queries.
"""
import copy
-import logging
import datetime
+import logging
from gettext import gettext
-from typing import List, Optional, Any
+from typing import Any, List, Optional
-import sentry_sdk
import elasticsearch
-from elasticsearch_dsl import Search, Q
-from elasticsearch_dsl.response import Response
import fatcat_openapi_client
+import sentry_sdk
+from elasticsearch_dsl import Q, Search
+from elasticsearch_dsl.response import Response
# pytype: disable=import-error
from pydantic import BaseModel
-# pytype: enable=import-error
-
from fatcat_scholar.config import settings
from fatcat_scholar.identifiers import *
-from fatcat_scholar.schema import ScholarDoc
-from fatcat_scholar.query_parse import sniff_citation_query, pre_parse_query
from fatcat_scholar.query_citation import try_fuzzy_match
+from fatcat_scholar.query_parse import pre_parse_query, sniff_citation_query
+from fatcat_scholar.schema import ScholarDoc
+
+# pytype: enable=import-error
+
# i18n note: the use of gettext below doesn't actually do the translation here,
# it just ensures that the strings are caught by babel for translation later
diff --git a/fatcat_scholar/sim_pipeline.py b/fatcat_scholar/sim_pipeline.py
index aa6f4da..9157852 100644
--- a/fatcat_scholar/sim_pipeline.py
+++ b/fatcat_scholar/sim_pipeline.py
@@ -1,21 +1,18 @@
+import argparse
import io
-import sys
import sqlite3
-import argparse
-from typing import List, Dict, Optional, Any
-import urllib3.exceptions
+import sys
+from typing import Any, Dict, List, Optional
+import internetarchive
import requests
import sentry_sdk
-import internetarchive
+import urllib3.exceptions
-from fatcat_scholar.config import settings, GIT_REVISION
+from fatcat_scholar.config import GIT_REVISION, settings
from fatcat_scholar.djvu import djvu_extract_leaf_texts
from fatcat_scholar.issue_db import IssueDB
-from fatcat_scholar.schema import (
- DocType,
- IntermediateBundle,
-)
+from fatcat_scholar.schema import DocType, IntermediateBundle
def truncate_pub_meta(full: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 3a7102a..7264540 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -1,18 +1,18 @@
-import sys
import argparse
import datetime
+import sys
import xml.etree.ElementTree
import xml.etree.ElementTree as ET
-from typing import List, Dict, Optional, Any, Sequence
+from typing import Any, Dict, List, Optional, Sequence
import sentry_sdk
-from fatcat_openapi_client import ReleaseEntity, FileEntity, WebcaptureEntity
+from fatcat_openapi_client import FileEntity, ReleaseEntity, WebcaptureEntity
from fatcat_scholar.api_entities import *
-from fatcat_scholar.schema import *
-from fatcat_scholar.config import settings, GIT_REVISION
+from fatcat_scholar.config import GIT_REVISION, settings
from fatcat_scholar.grobid2json import teixml2json
from fatcat_scholar.identifiers import clean_doi, clean_pmcid
+from fatcat_scholar.schema import *
MAX_BODY_CHARS = 512 * 1024
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index 8cf2c88..e835c01 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -6,41 +6,40 @@ So far there are few endpoints, so we just put them all here!
import logging
import urllib.parse
-from typing import Optional, Any, List, Dict
+from typing import Any, Dict, List, Optional
-from pydantic import BaseModel
import babel.numbers
import babel.support
-from fastapi import FastAPI, APIRouter, Request, Depends, Response, HTTPException, Query
-from fastapi.staticfiles import StaticFiles
+import fatcat_openapi_client
+import sentry_sdk
+from fastapi import APIRouter, Depends, FastAPI, HTTPException, Query, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import (
- PlainTextResponse,
- JSONResponse,
FileResponse,
+ JSONResponse,
+ PlainTextResponse,
RedirectResponse,
)
-from fastapi.middleware.cors import CORSMiddleware
-import fatcat_openapi_client
-import sentry_sdk
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
from sentry_sdk.integrations.asgi import SentryAsgiMiddleware
-from starlette_prometheus import metrics, PrometheusMiddleware
from starlette.exceptions import HTTPException as StarletteHTTPException
+from starlette_prometheus import PrometheusMiddleware, metrics
-from fatcat_scholar.config import settings, GIT_REVISION
+from fatcat_scholar.config import GIT_REVISION, settings
from fatcat_scholar.hacks import (
Jinja2Templates,
- parse_accept_lang,
make_access_redirect_url,
+ parse_accept_lang,
)
+from fatcat_scholar.schema import ScholarDoc
from fatcat_scholar.search import (
- process_query,
- FulltextQuery,
FulltextHits,
+ FulltextQuery,
es_scholar_index_alive,
get_es_scholar_doc,
+ process_query,
)
-from fatcat_scholar.schema import ScholarDoc
-
logger = logging.getLogger()
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index b9c8d33..54e1ed3 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -1,29 +1,26 @@
-import os
+import argparse
import io
+import os
import sys
-import argparse
-from typing import List, Dict, Tuple, Optional, Any, Sequence
-import urllib3.exceptions
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+import internetarchive
import minio
import requests
import sentry_sdk
-import internetarchive
-from fatcat_openapi_client import ReleaseEntity, FileEntity, WebcaptureEntity
+import urllib3.exceptions
+from fatcat_openapi_client import FileEntity, ReleaseEntity, WebcaptureEntity
from fatcat_scholar.api_entities import *
-from fatcat_scholar.config import settings, GIT_REVISION
+from fatcat_scholar.config import GIT_REVISION, settings
from fatcat_scholar.djvu import djvu_extract_leaf_texts
+from fatcat_scholar.issue_db import IssueDB, SimIssueRow, SimPubRow
from fatcat_scholar.sandcrawler import (
- SandcrawlerPostgrestClient,
SandcrawlerMinioClient,
+ SandcrawlerPostgrestClient,
)
-from fatcat_scholar.issue_db import IssueDB, SimIssueRow, SimPubRow
-from fatcat_scholar.schema import (
- DocType,
- IntermediateBundle,
-)
-from fatcat_scholar.sim_pipeline import truncate_pub_meta, truncate_issue_meta
+from fatcat_scholar.schema import DocType, IntermediateBundle
+from fatcat_scholar.sim_pipeline import truncate_issue_meta, truncate_pub_meta
def parse_pages(raw: str) -> Tuple[Optional[int], Optional[int]]:
diff --git a/fatcat_scholar/worker.py b/fatcat_scholar/worker.py
index 7d2b3d6..b836d7c 100644
--- a/fatcat_scholar/worker.py
+++ b/fatcat_scholar/worker.py
@@ -1,26 +1,26 @@
-import os
-import sys
import argparse
import datetime
-from typing import List, Any
+import os
+import sys
+from typing import Any, List
-import requests
-import sentry_sdk
import elasticsearch
import elasticsearch.helpers
import fatcat_openapi_client
+import requests
+import sentry_sdk
-from fatcat_scholar.config import settings, GIT_REVISION
+from fatcat_scholar.config import GIT_REVISION, settings
from fatcat_scholar.issue_db import IssueDB
+from fatcat_scholar.kafka import KafkaWorker
from fatcat_scholar.sandcrawler import (
- SandcrawlerPostgrestClient,
SandcrawlerMinioClient,
+ SandcrawlerPostgrestClient,
)
from fatcat_scholar.schema import IntermediateBundle
+from fatcat_scholar.sim_pipeline import SimPipeline
from fatcat_scholar.transform import transform_heavy
from fatcat_scholar.work_pipeline import WorkPipeline
-from fatcat_scholar.sim_pipeline import SimPipeline
-from fatcat_scholar.kafka import KafkaWorker
class FetchDocsWorker(KafkaWorker):
diff --git a/tests/test_djvu_parse.py b/tests/test_djvu_parse.py
index 777f8bf..621bf2e 100644
--- a/tests/test_djvu_parse.py
+++ b/tests/test_djvu_parse.py
@@ -1,4 +1,5 @@
import io
+
from fatcat_scholar.djvu import djvu_extract_leaf_texts
diff --git a/tests/test_issue_db.py b/tests/test_issue_db.py
index 9fdab94..6aab879 100644
--- a/tests/test_issue_db.py
+++ b/tests/test_issue_db.py
@@ -1,8 +1,8 @@
import elasticsearch
import fatcat_openapi_client
-from fatcat_scholar.issue_db import IssueDB
from fatcat_scholar.config import settings
+from fatcat_scholar.issue_db import IssueDB
def test_issue_db_basics() -> None:
diff --git a/tests/test_refs_transform.py b/tests/test_refs_transform.py
index 078b73b..0300fd5 100644
--- a/tests/test_refs_transform.py
+++ b/tests/test_refs_transform.py
@@ -1,8 +1,9 @@
import json
+
from fatcat_openapi_client import ReleaseEntity
from fatcat_scholar.grobid2json import teixml2json
-from fatcat_scholar.transform import refs_from_grobid, refs_from_crossref
+from fatcat_scholar.transform import refs_from_crossref, refs_from_grobid
def test_transform_refs_grobid() -> None:
diff --git a/tests/test_scrub.py b/tests/test_scrub.py
index 063478e..37faebb 100644
--- a/tests/test_scrub.py
+++ b/tests/test_scrub.py
@@ -1,4 +1,4 @@
-from fatcat_scholar.schema import scrub_text, clean_str
+from fatcat_scholar.schema import clean_str, scrub_text
def test_scrub() -> None:
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 927c13b..42f51d3 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -1,7 +1,7 @@
from fatcat_openapi_client import ReleaseEntity
-from fatcat_scholar.schema import *
from fatcat_scholar.api_entities import *
+from fatcat_scholar.schema import *
from fatcat_scholar.transform import *
diff --git a/tests/test_web.py b/tests/test_web.py
index d9cfab6..8225731 100644
--- a/tests/test_web.py
+++ b/tests/test_web.py
@@ -1,9 +1,9 @@
import json
from typing import Any
+import fatcat_openapi_client
import pytest
from fastapi.testclient import TestClient
-import fatcat_openapi_client
from fatcat_scholar.web import app
diff --git a/tests/test_work_pipeline.py b/tests/test_work_pipeline.py
index 977a708..d0512cf 100644
--- a/tests/test_work_pipeline.py
+++ b/tests/test_work_pipeline.py
@@ -1,12 +1,12 @@
import responses
+from fatcat_scholar.config import settings
from fatcat_scholar.issue_db import IssueDB
from fatcat_scholar.sandcrawler import (
- SandcrawlerPostgrestClient,
SandcrawlerMinioClient,
+ SandcrawlerPostgrestClient,
)
from fatcat_scholar.work_pipeline import *
-from fatcat_scholar.config import settings
@responses.activate