diff options
| -rw-r--r-- | fatcat_scholar/search.py | 2 | ||||
| -rw-r--r-- | fatcat_scholar/transform.py | 22 | ||||
| -rw-r--r-- | fatcat_scholar/work_pipeline.py | 2 | 
3 files changed, 22 insertions, 4 deletions
diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py index 7bb7424..5c7aba0 100644 --- a/fatcat_scholar/search.py +++ b/fatcat_scholar/search.py @@ -18,7 +18,7 @@ from elasticsearch_dsl.response import Response  from pydantic import BaseModel  from fatcat_scholar.config import settings -from fatcat_scholar.identifiers import * +from fatcat_scholar.identifiers import clean_doi, clean_pmcid  from fatcat_scholar.query_citation import try_fuzzy_match  from fatcat_scholar.query_parse import pre_parse_query, sniff_citation_query  from fatcat_scholar.schema import ScholarDoc diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index db631cf..3213b8d 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -1,5 +1,6 @@  import argparse  import datetime +import json  import sys  import xml.etree.ElementTree  import xml.etree.ElementTree as ET @@ -8,11 +9,28 @@ from typing import Any, Dict, List, Optional, Sequence  import sentry_sdk  from fatcat_openapi_client import FileEntity, ReleaseEntity, WebcaptureEntity -from fatcat_scholar.api_entities import *  from fatcat_scholar.config import GIT_REVISION, settings  from fatcat_scholar.grobid2json import teixml2json  from fatcat_scholar.identifiers import clean_doi, clean_pmcid -from fatcat_scholar.schema import * +from fatcat_scholar.schema import ( +    AccessType, +    DocType, +    IntermediateBundle, +    RefBiblio, +    RefStructured, +    ScholarAbstract, +    ScholarAccess, +    ScholarBiblio, +    ScholarDoc, +    ScholarFulltext, +    ScholarSim, +    clean_small_int, +    clean_url_conservative, +    es_abstracts_from_grobid, +    es_abstracts_from_release, +    es_biblio_from_release, +    es_release_from_release, +)  MAX_BODY_CHARS = 512 * 1024 diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 92b0943..191e898 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -11,7 +11,7 @@ import sentry_sdk  import urllib3.exceptions  from fatcat_openapi_client import FileEntity, ReleaseEntity, WebcaptureEntity -from fatcat_scholar.api_entities import * +from fatcat_scholar.api_entities import entity_from_json  from fatcat_scholar.config import GIT_REVISION, settings  from fatcat_scholar.djvu import djvu_extract_leaf_texts  from fatcat_scholar.issue_db import IssueDB, SimIssueRow, SimPubRow  | 
