aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/work_pipeline.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-29 21:33:23 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-29 21:33:23 -0700
commit35c1e59e1b4903ab7b4ac922199c0a55e3a20f5a (patch)
tree94b44a594d9254ee9b3c8a6cfa0c8f2e468c39d4 /fatcat_scholar/work_pipeline.py
parent9f99cf6926b09f906b21d64b631e797fdbb51bac (diff)
downloadfatcat-scholar-35c1e59e1b4903ab7b4ac922199c0a55e3a20f5a.tar.gz
fatcat-scholar-35c1e59e1b4903ab7b4ac922199c0a55e3a20f5a.zip
seaweedfs for S3 API; pull config from dynaconf
Diffstat (limited to 'fatcat_scholar/work_pipeline.py')
-rw-r--r--fatcat_scholar/work_pipeline.py13
1 files changed, 2 insertions, 11 deletions
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index 17a0f7a..303f9b1 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -82,13 +82,11 @@ class WorkPipeline:
issue_db: IssueDB,
sandcrawler_db_client: SandcrawlerPostgrestClient,
sandcrawler_s3_client: SandcrawlerMinioClient,
- fulltext_cache_dir=None,
):
self.issue_db: IssueDB = issue_db
self.ia_client = internetarchive.get_session()
self.sandcrawler_db_client = sandcrawler_db_client
self.sandcrawler_s3_client = sandcrawler_s3_client
- self.fulltext_cache_dir = fulltext_cache_dir
def fetch_file_grobid(self, fe: FileEntity, release_ident: str) -> Optional[Any]:
"""
@@ -388,13 +386,13 @@ def main():
parser.add_argument(
"--sandcrawler-db-api",
help="Sandcrawler Postgrest API endpoint",
- default="http://aitio.us.archive.org:3030",
+ default=settings.SANDCRAWLER_DB_API,
type=str,
)
parser.add_argument(
"--sandcrawler-s3-api",
help="Sandcrawler S3 (minio/seaweedfs) API endpoint",
- default="aitio.us.archive.org:9000",
+ default=settings.SANDCRAWLER_S3_API,
type=str,
)
@@ -409,12 +407,6 @@ def main():
default=sys.stdin,
type=argparse.FileType("r"),
)
- sub.add_argument(
- "--fulltext-cache-dir",
- help="path of local directory with pdftotext fulltext (and thumbnails)",
- default=None,
- type=str,
- )
args = parser.parse_args()
if not args.__dict__.get("func"):
@@ -431,7 +423,6 @@ def main():
access_key=os.environ.get("MINIO_ACCESS_KEY"),
secret_key=os.environ.get("MINIO_SECRET_KEY"),
),
- fulltext_cache_dir=args.fulltext_cache_dir,
)
if args.func == "run_releases":