summaryrefslogtreecommitdiffstats
path: root/tests/test_work_pipeline.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_work_pipeline.py')
-rw-r--r--tests/test_work_pipeline.py64
1 files changed, 63 insertions, 1 deletions
diff --git a/tests/test_work_pipeline.py b/tests/test_work_pipeline.py
index 439a186..e0e4a82 100644
--- a/tests/test_work_pipeline.py
+++ b/tests/test_work_pipeline.py
@@ -1,3 +1,5 @@
+import responses
+
from fatcat_scholar.issue_db import IssueDB
from fatcat_scholar.sandcrawler import (
SandcrawlerPostgrestClient,
@@ -7,11 +9,71 @@ from fatcat_scholar.work_pipeline import *
from fatcat_scholar.config import settings
-def test_run_transform() -> None:
+@responses.activate
+def test_run_transform(mocker: Any) -> None:
issue_db = IssueDB(settings.SCHOLAR_ISSUEDB_PATH)
issue_db.init_db()
+ responses.add(
+ responses.GET,
+ "http://disabled-during-tests-bogus.xyz:3333/grobid?sha1hex=eq.bca1531b0562c6d72e0c283c1ccb97eb5cb02117",
+ status=200,
+ json=[
+ {
+ "sha1hex": "bca1531b0562c6d72e0c283c1ccb97eb5cb02117",
+ "updated": "2019-11-30T04:44:00+00:00",
+ "grobid_version": "0.5.5-fatcat",
+ "status_code": 200,
+ "status": "success",
+ "fatcat_release": "hsmo6p4smrganpb3fndaj2lon4",
+ "metadata": {
+ "biblio": {
+ "doi": "10.7717/peerj.4375",
+ "date": "2018-02-13",
+ "title": "Distributed under Creative Commons CC-BY 4.0 The state of OA: a large-scale analysis of the prevalence and impact of Open Access articles",
+ "authors": [],
+ },
+ "language_code": "en",
+ "grobid_timestamp": "2019-11-30T04:44+0000",
+ },
+ }
+ ],
+ )
+
+ responses.add(
+ responses.GET,
+ "http://disabled-during-tests-bogus.xyz:3333/pdf_meta?sha1hex=eq.bca1531b0562c6d72e0c283c1ccb97eb5cb02117",
+ status=200,
+ json=[
+ {
+ "sha1hex": "bca1531b0562c6d72e0c283c1ccb97eb5cb02117",
+ "updated": "2020-07-07T02:15:52.98309+00:00",
+ "status": "success",
+ "has_page0_thumbnail": True,
+ "page_count": 23,
+ "word_count": 10534,
+ "page0_height": 792,
+ "page0_width": 612,
+ "permanent_id": "52f2164b9cc9e47fd150e7ee389b595a",
+ "pdf_created": "2018-02-09T06:06:06+00:00",
+ "pdf_version": "1.5",
+ "metadata": {
+ "title": "",
+ "author": "",
+ "creator": "River Valley",
+ "subject": "Legal Issues, Science Policy, Data Science",
+ "producer": "pdfTeX-1.40.16",
+ },
+ }
+ ],
+ )
+
+ es_raw = mocker.patch("fatcat_scholar.work_pipeline.WorkPipeline.fetch_file_grobid")
+ es_raw.side_effect = [
+ {"tei_xml": "<xml>dummy", "release_ident": "asdf123", "file_ident": "xyq9876"},
+ ]
+
wp = WorkPipeline(
issue_db=issue_db,
sandcrawler_db_client=SandcrawlerPostgrestClient(