diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-06-03 23:16:07 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-03 23:16:07 -0700 | 
| commit | 62fde540ef9c38d403ea89a6fb1db51bfba23da8 (patch) | |
| tree | 2142c739349237b0ef137a57000850db1b2ff39d | |
| parent | 74ef1c6f4e0f08dd6a6b3e6eacc4d780c990eb3f (diff) | |
| download | fatcat-scholar-62fde540ef9c38d403ea89a6fb1db51bfba23da8.tar.gz fatcat-scholar-62fde540ef9c38d403ea89a6fb1db51bfba23da8.zip  | |
flake8 fixes (partial)
| -rw-r--r-- | fatcat_scholar/djvu.py | 2 | ||||
| -rwxr-xr-x | fatcat_scholar/grobid2json.py | 4 | ||||
| -rw-r--r-- | fatcat_scholar/issue_db.py | 4 | ||||
| -rw-r--r-- | fatcat_scholar/sandcrawler.py | 1 | ||||
| -rw-r--r-- | fatcat_scholar/schema.py | 2 | ||||
| -rw-r--r-- | fatcat_scholar/search.py | 14 | ||||
| -rw-r--r-- | fatcat_scholar/sim_pipeline.py | 17 | ||||
| -rw-r--r-- | fatcat_scholar/transform.py | 13 | ||||
| -rw-r--r-- | fatcat_scholar/web.py | 6 | ||||
| -rw-r--r-- | fatcat_scholar/work_pipeline.py | 7 | ||||
| -rw-r--r-- | tests/test_scrub.py | 2 | ||||
| -rw-r--r-- | tests/test_transform.py | 1 | 
12 files changed, 22 insertions, 51 deletions
diff --git a/fatcat_scholar/djvu.py b/fatcat_scholar/djvu.py index ca3e412..c715608 100644 --- a/fatcat_scholar/djvu.py +++ b/fatcat_scholar/djvu.py @@ -1,5 +1,5 @@  from io import StringIO -from typing import List, Dict, Tuple, Optional, Any, Sequence +from typing import List, Dict, Optional  import xml.etree.ElementTree as ET diff --git a/fatcat_scholar/grobid2json.py b/fatcat_scholar/grobid2json.py index 57d039e..4019363 100755 --- a/fatcat_scholar/grobid2json.py +++ b/fatcat_scholar/grobid2json.py @@ -108,7 +108,7 @@ def biblio_info(elem):      if ref["publisher"] == "":          ref["publisher"] = None      date = elem.find('.//{%s}date[@type="published"]' % ns) -    ref["date"] = (date != None) and date.attrib.get("when") +    ref["date"] = (date is not None) and date.attrib.get("when")      ref["volume"] = elem.findtext('.//{%s}biblScope[@unit="volume"]' % ns)      ref["issue"] = elem.findtext('.//{%s}biblScope[@unit="issue"]' % ns)      el = elem.find(".//{%s}ptr[@target]" % ns) @@ -148,7 +148,7 @@ def teixml2json(content, encumbered=True):      )      info["journal"] = journal_info(header)      date = header.find('.//{%s}date[@type="published"]' % ns) -    info["date"] = (date != None) and date.attrib.get("when") +    info["date"] = (date is not None) and date.attrib.get("when")      info["fatcat_release"] = header.findtext('.//{%s}idno[@type="fatcat"]' % ns)      info["doi"] = header.findtext('.//{%s}idno[@type="DOI"]' % ns)      if info["doi"]: diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index 12ffa32..829560b 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -3,10 +3,10 @@ import json  import sqlite3  import argparse  from dataclasses import dataclass -from typing import List, Dict, Tuple, Optional, Any, Sequence +from typing import List, Dict, Optional, Any, Sequence  import fatcat_openapi_client  import elasticsearch -from elasticsearch_dsl import Search, Q +from elasticsearch_dsl import Search  @dataclass diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py index 408682f..347364f 100644 --- a/fatcat_scholar/sandcrawler.py +++ b/fatcat_scholar/sandcrawler.py @@ -1,4 +1,3 @@ -import json  import minio  import requests  from typing import Dict, Optional, Any diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 110991d..29bbe92 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -253,7 +253,7 @@ def es_abstracts_from_release(release: ReleaseEntity) -> List[ScholarAbstract]:      d = dict()      for abst in release.abstracts: -        if not abst.lang in d: +        if abst.lang not in d:              d[abst.lang] = ScholarAbstract(                  lang_code=abst.lang, body=scrub_text(abst.content)              ) diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py index 5a61f53..3d9ca9b 100644 --- a/fatcat_scholar/search.py +++ b/fatcat_scholar/search.py @@ -3,15 +3,13 @@ Helpers to make elasticsearch queries.  """  import sys -import json  from gettext import gettext  import datetime  import elasticsearch  from pydantic import BaseModel  from dynaconf import settings -from dataclasses import dataclass  from elasticsearch_dsl import Search, Q -from typing import List, Dict, Tuple, Optional, Any, Sequence +from typing import List, Optional, Any  # i18n note: the use of gettext below doesn't actually do the translation here,  # it just ensures that the strings are caught by babel for translation later @@ -106,7 +104,7 @@ def do_fulltext_search(          search = search.filter("terms", type=["report", "standard",])      elif query.filter_type == "datasets":          search = search.filter("terms", type=["dataset", "software",]) -    elif query.filter_type == "everything" or query.filter_type == None: +    elif query.filter_type == "everything" or query.filter_type is None:          pass      else:          raise ValueError( @@ -129,7 +127,7 @@ def do_fulltext_search(          search = search.filter("range", year=dict(gte=2000))      elif query.filter_time == "before_1925":          search = search.filter("range", year=dict(lt=1925)) -    elif query.filter_time == "all_time" or query.filter_time == None: +    elif query.filter_time == "all_time" or query.filter_time is None:          pass      else:          raise ValueError( @@ -141,7 +139,7 @@ def do_fulltext_search(          search = search.filter("term", tag="oa")      elif query.filter_availability == "everything":          pass -    elif query.filter_availability == "fulltext" or query.filter_availability == None: +    elif query.filter_availability == "fulltext" or query.filter_availability is None:          search = search.filter("terms", access_type=["wayback", "ia_file", "ia_sim"])      else:          raise ValueError( @@ -199,7 +197,7 @@ def do_fulltext_search(          search = search.sort("year", "date")      elif query.sort_order == "time_desc":          search = search.sort("-year", "-date") -    elif query.sort_order == "relevancy" or query.sort_order == None: +    elif query.sort_order == "relevancy" or query.sort_order is None:          pass      else:          raise ValueError(f"Unknown 'sort_order' parameter value: '{query.sort_order}'") @@ -211,7 +209,7 @@ def do_fulltext_search(          # Avoid deep paging problem.          offset = deep_page_limit -    search = search[offset : offset + limit] +    search = search[offset:(offset+limit)]      try:          resp = search.execute() diff --git a/fatcat_scholar/sim_pipeline.py b/fatcat_scholar/sim_pipeline.py index b84ac47..cfc197f 100644 --- a/fatcat_scholar/sim_pipeline.py +++ b/fatcat_scholar/sim_pipeline.py @@ -1,24 +1,15 @@ -import os  import io  import sys  import sqlite3  import argparse +from typing import List, Dict, Optional, Any +  import requests -from pydantic import BaseModel, validator -from typing import List, Dict, Tuple, Optional, Any, Sequence -from fatcat_openapi_client import ReleaseEntity, FileEntity  import internetarchive -from fatcat_scholar.api_entities import *  from fatcat_scholar.djvu import djvu_extract_leaf_texts -from fatcat_scholar.sandcrawler import ( -    SandcrawlerPostgrestClient, -    SandcrawlerMinioClient, -) -from fatcat_scholar.issue_db import IssueDB, SimIssueRow +from fatcat_scholar.issue_db import IssueDB  from fatcat_scholar.schema import ( -    es_biblio_from_release, -    es_release_from_release,      DocType,      IntermediateBundle,  ) @@ -57,7 +48,7 @@ class SimPipeline:      def fetch_sim_issue(self, issue_db_row: Any) -> Optional[Any]:          """ -        issue_item  +        issue_item          pages: str          page_texts: list              raw_text diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index b5a0223..28c959b 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -1,19 +1,10 @@ -import os -import io  import sys  import argparse -from pydantic import BaseModel, validator -from typing import List, Dict, Tuple, Optional, Any, Sequence +from typing import List, Dict, Optional, Any +  from fatcat_openapi_client import ReleaseEntity, FileEntity -import internetarchive  from fatcat_scholar.api_entities import * -from fatcat_scholar.djvu import djvu_extract_leaf_texts -from fatcat_scholar.sandcrawler import ( -    SandcrawlerPostgrestClient, -    SandcrawlerMinioClient, -) -from fatcat_scholar.issue_db import IssueDB, SimIssueRow  from fatcat_scholar.schema import *  from fatcat_scholar.grobid2json import teixml2json diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index 6c8a2e9..3f6982d 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -5,13 +5,11 @@ So far there are few endpoints, so we just put them all here!  """  import sys -from enum import Enum  import babel.support -from fastapi import FastAPI, APIRouter, Request, Depends, Header +from fastapi import FastAPI, APIRouter, Request, Depends  from fastapi.staticfiles import StaticFiles -from fastapi.responses import HTMLResponse  from dynaconf import settings -from typing import List, Dict, Tuple, Optional, Any, Sequence +from typing import Optional  from fatcat_scholar.hacks import Jinja2Templates  from fatcat_scholar.search import do_fulltext_search, FulltextQuery, FulltextHits diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index af558a3..09ae02f 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -4,7 +4,6 @@ import sys  import minio  import requests  import argparse -from pydantic import BaseModel, validator  from typing import List, Dict, Tuple, Optional, Any, Sequence  from fatcat_openapi_client import ReleaseEntity, FileEntity  import internetarchive @@ -17,8 +16,6 @@ from fatcat_scholar.sandcrawler import (  )  from fatcat_scholar.issue_db import IssueDB, SimIssueRow, SimPubRow  from fatcat_scholar.schema import ( -    es_biblio_from_release, -    es_release_from_release,      DocType,      IntermediateBundle,  ) @@ -171,7 +168,7 @@ class WorkPipeline:          release_ident: str,      ) -> Optional[Any]:          """ -        issue_item  +        issue_item          pages: str          page_texts: list              page_num @@ -214,7 +211,7 @@ class WorkPipeline:          # override 'close()' method so we can still read out contents          djvu_bytes = io.BytesIO()          djvu_bytes.close = lambda: None  # type: ignore -        assert issue_item_djvu.download(fileobj=djvu_bytes) == True +        assert issue_item_djvu.download(fileobj=djvu_bytes)          djvu_bytes.seek(0)          djvu_xml = io.StringIO(djvu_bytes.read().decode("UTF-8"))          del djvu_bytes diff --git a/tests/test_scrub.py b/tests/test_scrub.py index 5929b65..692ed64 100644 --- a/tests/test_scrub.py +++ b/tests/test_scrub.py @@ -1,5 +1,3 @@ -import pytest -  from fatcat_scholar.schema import * diff --git a/tests/test_transform.py b/tests/test_transform.py index d831f47..21efa51 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -1,4 +1,3 @@ -import pytest  from fatcat_openapi_client import ReleaseEntity  from fatcat_scholar.schema import *  | 
