summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fatcat_scholar/work_pipeline.py13
-rw-r--r--settings.toml2
2 files changed, 4 insertions, 11 deletions
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index 17a0f7a..303f9b1 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -82,13 +82,11 @@ class WorkPipeline:
issue_db: IssueDB,
sandcrawler_db_client: SandcrawlerPostgrestClient,
sandcrawler_s3_client: SandcrawlerMinioClient,
- fulltext_cache_dir=None,
):
self.issue_db: IssueDB = issue_db
self.ia_client = internetarchive.get_session()
self.sandcrawler_db_client = sandcrawler_db_client
self.sandcrawler_s3_client = sandcrawler_s3_client
- self.fulltext_cache_dir = fulltext_cache_dir
def fetch_file_grobid(self, fe: FileEntity, release_ident: str) -> Optional[Any]:
"""
@@ -388,13 +386,13 @@ def main():
parser.add_argument(
"--sandcrawler-db-api",
help="Sandcrawler Postgrest API endpoint",
- default="http://aitio.us.archive.org:3030",
+ default=settings.SANDCRAWLER_DB_API,
type=str,
)
parser.add_argument(
"--sandcrawler-s3-api",
help="Sandcrawler S3 (minio/seaweedfs) API endpoint",
- default="aitio.us.archive.org:9000",
+ default=settings.SANDCRAWLER_S3_API,
type=str,
)
@@ -409,12 +407,6 @@ def main():
default=sys.stdin,
type=argparse.FileType("r"),
)
- sub.add_argument(
- "--fulltext-cache-dir",
- help="path of local directory with pdftotext fulltext (and thumbnails)",
- default=None,
- type=str,
- )
args = parser.parse_args()
if not args.__dict__.get("func"):
@@ -431,7 +423,6 @@ def main():
access_key=os.environ.get("MINIO_ACCESS_KEY"),
secret_key=os.environ.get("MINIO_SECRET_KEY"),
),
- fulltext_cache_dir=args.fulltext_cache_dir,
)
if args.func == "run_releases":
diff --git a/settings.toml b/settings.toml
index 1e9ccc6..c4f3b17 100644
--- a/settings.toml
+++ b/settings.toml
@@ -6,6 +6,8 @@ I18N_LANG_DEFAULT = "en"
ELASTICSEARCH_BACKEND = "https://search.fatcat.wiki"
ELASTICSEARCH_FULLTEXT_INDEX = "scholar_fulltext"
THUMBNAIL_URL_PREFIX = "https://blobs.fatcat.wiki/thumbnail/pdf/"
+SANDCRAWLER_DB_API = "http://aitio.us.archive.org:3030"
+SANDCRAWLER_S3_API = "wbgrp-svc169.us.archive.org:8333"
[dev]
SCHOLAR_ENV = "dev"