diff options
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 13 | ||||
-rw-r--r-- | settings.toml | 2 |
2 files changed, 4 insertions, 11 deletions
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 17a0f7a..303f9b1 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -82,13 +82,11 @@ class WorkPipeline: issue_db: IssueDB, sandcrawler_db_client: SandcrawlerPostgrestClient, sandcrawler_s3_client: SandcrawlerMinioClient, - fulltext_cache_dir=None, ): self.issue_db: IssueDB = issue_db self.ia_client = internetarchive.get_session() self.sandcrawler_db_client = sandcrawler_db_client self.sandcrawler_s3_client = sandcrawler_s3_client - self.fulltext_cache_dir = fulltext_cache_dir def fetch_file_grobid(self, fe: FileEntity, release_ident: str) -> Optional[Any]: """ @@ -388,13 +386,13 @@ def main(): parser.add_argument( "--sandcrawler-db-api", help="Sandcrawler Postgrest API endpoint", - default="http://aitio.us.archive.org:3030", + default=settings.SANDCRAWLER_DB_API, type=str, ) parser.add_argument( "--sandcrawler-s3-api", help="Sandcrawler S3 (minio/seaweedfs) API endpoint", - default="aitio.us.archive.org:9000", + default=settings.SANDCRAWLER_S3_API, type=str, ) @@ -409,12 +407,6 @@ def main(): default=sys.stdin, type=argparse.FileType("r"), ) - sub.add_argument( - "--fulltext-cache-dir", - help="path of local directory with pdftotext fulltext (and thumbnails)", - default=None, - type=str, - ) args = parser.parse_args() if not args.__dict__.get("func"): @@ -431,7 +423,6 @@ def main(): access_key=os.environ.get("MINIO_ACCESS_KEY"), secret_key=os.environ.get("MINIO_SECRET_KEY"), ), - fulltext_cache_dir=args.fulltext_cache_dir, ) if args.func == "run_releases": diff --git a/settings.toml b/settings.toml index 1e9ccc6..c4f3b17 100644 --- a/settings.toml +++ b/settings.toml @@ -6,6 +6,8 @@ I18N_LANG_DEFAULT = "en" ELASTICSEARCH_BACKEND = "https://search.fatcat.wiki" ELASTICSEARCH_FULLTEXT_INDEX = "scholar_fulltext" THUMBNAIL_URL_PREFIX = "https://blobs.fatcat.wiki/thumbnail/pdf/" +SANDCRAWLER_DB_API = "http://aitio.us.archive.org:3030" +SANDCRAWLER_S3_API = "wbgrp-svc169.us.archive.org:8333" [dev] SCHOLAR_ENV = "dev" |