diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-29 21:33:23 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-29 21:33:23 -0700 |
commit | 35c1e59e1b4903ab7b4ac922199c0a55e3a20f5a (patch) | |
tree | 94b44a594d9254ee9b3c8a6cfa0c8f2e468c39d4 /fatcat_scholar | |
parent | 9f99cf6926b09f906b21d64b631e797fdbb51bac (diff) | |
download | fatcat-scholar-35c1e59e1b4903ab7b4ac922199c0a55e3a20f5a.tar.gz fatcat-scholar-35c1e59e1b4903ab7b4ac922199c0a55e3a20f5a.zip |
seaweedfs for S3 API; pull config from dynaconf
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 13 |
1 files changed, 2 insertions, 11 deletions
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 17a0f7a..303f9b1 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -82,13 +82,11 @@ class WorkPipeline: issue_db: IssueDB, sandcrawler_db_client: SandcrawlerPostgrestClient, sandcrawler_s3_client: SandcrawlerMinioClient, - fulltext_cache_dir=None, ): self.issue_db: IssueDB = issue_db self.ia_client = internetarchive.get_session() self.sandcrawler_db_client = sandcrawler_db_client self.sandcrawler_s3_client = sandcrawler_s3_client - self.fulltext_cache_dir = fulltext_cache_dir def fetch_file_grobid(self, fe: FileEntity, release_ident: str) -> Optional[Any]: """ @@ -388,13 +386,13 @@ def main(): parser.add_argument( "--sandcrawler-db-api", help="Sandcrawler Postgrest API endpoint", - default="http://aitio.us.archive.org:3030", + default=settings.SANDCRAWLER_DB_API, type=str, ) parser.add_argument( "--sandcrawler-s3-api", help="Sandcrawler S3 (minio/seaweedfs) API endpoint", - default="aitio.us.archive.org:9000", + default=settings.SANDCRAWLER_S3_API, type=str, ) @@ -409,12 +407,6 @@ def main(): default=sys.stdin, type=argparse.FileType("r"), ) - sub.add_argument( - "--fulltext-cache-dir", - help="path of local directory with pdftotext fulltext (and thumbnails)", - default=None, - type=str, - ) args = parser.parse_args() if not args.__dict__.get("func"): @@ -431,7 +423,6 @@ def main(): access_key=os.environ.get("MINIO_ACCESS_KEY"), secret_key=os.environ.get("MINIO_SECRET_KEY"), ), - fulltext_cache_dir=args.fulltext_cache_dir, ) if args.func == "run_releases": |