aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/work_pipeline.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-05-20 13:27:55 -0700
committerBryan Newbold <bnewbold@archive.org>2020-05-20 13:27:55 -0700
commitf2c465fffc76ca752249e11d32673db43efc35f1 (patch)
treee000389c916c56c322a984ebdf440a2c6129a0e0 /fatcat_scholar/work_pipeline.py
parent3ee18580dd108c69c01cdf838a7f1a7d3d181629 (diff)
downloadfatcat-scholar-f2c465fffc76ca752249e11d32673db43efc35f1.tar.gz
fatcat-scholar-f2c465fffc76ca752249e11d32673db43efc35f1.zip
first pass transform from pipelines to ES schema
Diffstat (limited to 'fatcat_scholar/work_pipeline.py')
-rw-r--r--fatcat_scholar/work_pipeline.py17
1 files changed, 1 insertions, 16 deletions
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index b9dcbe8..081878c 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -12,7 +12,7 @@ from fatcat_scholar.api_entities import *
from fatcat_scholar.djvu import djvu_extract_leaf_texts
from fatcat_scholar.sandcrawler import SandcrawlerPostgrestClient, SandcrawlerMinioClient
from fatcat_scholar.issue_db import IssueDB, SimIssueRow
-from fatcat_scholar.es_transform import es_biblio_from_release, es_release_from_release, DocType
+from fatcat_scholar.schema import es_biblio_from_release, es_release_from_release, DocType, IntermediateBundle
def parse_pages(raw: str) -> Tuple[Optional[int], Optional[int]]:
@@ -44,21 +44,6 @@ def test_parse_pages():
assert parse_pages("iiv") == (None, None)
-class IntermediateBundle(BaseModel):
- doc_type: DocType
- releases: List[ReleaseEntity]
- biblio_release_ident: Optional[str]
- grobid_fulltext: Optional[Any]
- pdftotext_fulltext: Optional[Any]
- sim_fulltext: Optional[Any]
-
- class Config:
- arbitrary_types_allowed = True
- json_encoders = {
- ReleaseEntity: lambda re: entity_to_dict(re),
- }
-
-
def fulltext_pref_list(releases: List[ReleaseEntity]) -> List[str]:
"""
Returns a list of release idents in preference order (best first) to