diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-06-29 21:24:02 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-29 21:24:02 -0700 | 
| commit | 9f99cf6926b09f906b21d64b631e797fdbb51bac (patch) | |
| tree | 112e206564b7cb836c638167b80bd1acfd367454 | |
| parent | 8c75ce9e78dfe4295188d8a57632d1630f987f8e (diff) | |
| download | fatcat-scholar-9f99cf6926b09f906b21d64b631e797fdbb51bac.tar.gz fatcat-scholar-9f99cf6926b09f906b21d64b631e797fdbb51bac.zip | |
make fmt
| -rw-r--r-- | fatcat_scholar/sandcrawler.py | 4 | ||||
| -rw-r--r-- | fatcat_scholar/search.py | 6 | ||||
| -rw-r--r-- | fatcat_scholar/transform.py | 21 | ||||
| -rw-r--r-- | fatcat_scholar/work_pipeline.py | 4 | 
4 files changed, 22 insertions, 13 deletions
| diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py index 9c48cd9..25c7002 100644 --- a/fatcat_scholar/sandcrawler.py +++ b/fatcat_scholar/sandcrawler.py @@ -17,7 +17,9 @@ class SandcrawlerPostgrestClient:              return None      def get_pdf_meta(self, sha1: str) -> Optional[Dict[str, Any]]: -        resp = requests.get(self.api_url + "/pdf_meta", params=dict(sha1hex="eq." + sha1)) +        resp = requests.get( +            self.api_url + "/pdf_meta", params=dict(sha1hex="eq." + sha1) +        )          resp.raise_for_status()          resp_json = resp.json()          if resp_json: diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py index a2e19bc..0985081 100644 --- a/fatcat_scholar/search.py +++ b/fatcat_scholar/search.py @@ -207,7 +207,7 @@ def do_fulltext_search(          number_of_fragments=2,          fragment_size=300,          # TODO: this will fix highlight encoding, but requires ES 7.x -        #encoder="html", +        # encoder="html",      )      # sort order @@ -270,8 +270,8 @@ def do_fulltext_search(              if type(h[key]) is str:                  h[key] = h[key].encode("utf8", "ignore").decode("utf8")          # ensure collapse_key is a single value, not an array -        if type(h['collapse_key']) == list: -            h['collapse_key'] = h['collapse_key'][0] +        if type(h["collapse_key"]) == list: +            h["collapse_key"] = h["collapse_key"][0]      count_found: int = int(resp.hits.total)      count_returned = len(results) diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index c219528..ac80efc 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -104,10 +104,12 @@ def es_biblio_from_sim(sim: Dict[str, Any]) -> ScholarBiblio:          # original_title=release.original_title,          release_date=release_date,          release_year=release_year, -        release_type=SIM_RELEASE_TYPE_MAP.get(issue_meta.get("pub_type")) or SIM_RELEASE_TYPE_MAP.get(pub_meta.get("pub_type")), +        release_type=SIM_RELEASE_TYPE_MAP.get(issue_meta.get("pub_type")) +        or SIM_RELEASE_TYPE_MAP.get(pub_meta.get("pub_type")),          release_stage="published",  # as a default          # withdrawn_status=release.withdrawn_status, -        lang_code=SIM_LANG_MAP.get(issue_meta.get("language")) or SIM_LANG_MAP.get(pub_meta.get("language")), +        lang_code=SIM_LANG_MAP.get(issue_meta.get("language")) +        or SIM_LANG_MAP.get(pub_meta.get("language")),          country_code=SIM_COUNTRY_MAP.get(pub_meta.get("country")),          volume=volume,          volume_int=volume_int, @@ -133,7 +135,10 @@ def es_biblio_from_sim(sim: Dict[str, Any]) -> ScholarBiblio:  def _add_file_release_meta( -    fulltext: ScholarFulltext, pdf_meta: Optional[dict], re: ReleaseEntity, fe: FileEntity +    fulltext: ScholarFulltext, +    pdf_meta: Optional[dict], +    re: ReleaseEntity, +    fe: FileEntity,  ) -> ScholarFulltext:      best_url = None      best_url_type = None @@ -181,10 +186,7 @@ def es_fulltext_from_pdftotext(  ) -> Optional[ScholarFulltext]:      ret = ScholarFulltext( -        lang_code=re.language, -        body=raw_text, -        acknowledgement=None, -        annex=None, +        lang_code=re.language, body=raw_text, acknowledgement=None, annex=None,      )      return _add_file_release_meta(ret, pdf_meta, re, fe) @@ -257,7 +259,10 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:              if f.ident == heavy.pdftotext_fulltext["file_ident"]          ][0]          fulltext = es_fulltext_from_pdftotext( -            heavy.pdftotext_fulltext["raw_text"], heavy.pdf_meta, fulltext_release, fulltext_file +            heavy.pdftotext_fulltext["raw_text"], +            heavy.pdf_meta, +            fulltext_release, +            fulltext_file,          )      # TODO: additional access list diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index e3a0d8d..17a0f7a 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -119,7 +119,9 @@ class WorkPipeline:              tei_xml=grobid_xml, release_ident=release_ident, file_ident=fe.ident,          ) -    def fetch_pdf_meta(self, fe: FileEntity, release_ident: str) -> Optional[Dict[str, Any]]: +    def fetch_pdf_meta( +        self, fe: FileEntity, release_ident: str +    ) -> Optional[Dict[str, Any]]:          """          Fetches pdftext metadata from sandcrawler-db via postgrest HTTP          interface. | 
