From 35c1e59e1b4903ab7b4ac922199c0a55e3a20f5a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 29 Jun 2020 21:33:23 -0700 Subject: seaweedfs for S3 API; pull config from dynaconf --- fatcat_scholar/work_pipeline.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'fatcat_scholar/work_pipeline.py') diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 17a0f7a..303f9b1 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -82,13 +82,11 @@ class WorkPipeline: issue_db: IssueDB, sandcrawler_db_client: SandcrawlerPostgrestClient, sandcrawler_s3_client: SandcrawlerMinioClient, - fulltext_cache_dir=None, ): self.issue_db: IssueDB = issue_db self.ia_client = internetarchive.get_session() self.sandcrawler_db_client = sandcrawler_db_client self.sandcrawler_s3_client = sandcrawler_s3_client - self.fulltext_cache_dir = fulltext_cache_dir def fetch_file_grobid(self, fe: FileEntity, release_ident: str) -> Optional[Any]: """ @@ -388,13 +386,13 @@ def main(): parser.add_argument( "--sandcrawler-db-api", help="Sandcrawler Postgrest API endpoint", - default="http://aitio.us.archive.org:3030", + default=settings.SANDCRAWLER_DB_API, type=str, ) parser.add_argument( "--sandcrawler-s3-api", help="Sandcrawler S3 (minio/seaweedfs) API endpoint", - default="aitio.us.archive.org:9000", + default=settings.SANDCRAWLER_S3_API, type=str, ) @@ -409,12 +407,6 @@ def main(): default=sys.stdin, type=argparse.FileType("r"), ) - sub.add_argument( - "--fulltext-cache-dir", - help="path of local directory with pdftotext fulltext (and thumbnails)", - default=None, - type=str, - ) args = parser.parse_args() if not args.__dict__.get("func"): @@ -431,7 +423,6 @@ def main(): access_key=os.environ.get("MINIO_ACCESS_KEY"), secret_key=os.environ.get("MINIO_SECRET_KEY"), ), - fulltext_cache_dir=args.fulltext_cache_dir, ) if args.func == "run_releases": -- cgit v1.2.3