diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 19:12:14 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 19:12:16 -0800 | 
| commit | 3adcaf9802928346dda597cefd4b66b2e62fa942 (patch) | |
| tree | dc7794d8d142a73a41fbad4221d058dc3567a2e4 /python/sandcrawler_worker.py | |
| parent | e99d9f2fddcb8b52ba52128b290ec5e0f367392f (diff) | |
| download | sandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.tar.gz sandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.zip | |
refactor 'minio' to 'seaweedfs'; and BLOB env vars
This goes along with changes to ansible deployment to use the correct
key names and values.
Diffstat (limited to 'python/sandcrawler_worker.py')
| -rwxr-xr-x | python/sandcrawler_worker.py | 20 | 
1 files changed, 10 insertions, 10 deletions
| diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index a653771..537398e 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -3,7 +3,7 @@  """  These are generally for continuously running workers that consume from Kafka.  Outputs might either be pushed back into Kafka, or directly into sandcrawler-db -or minio. +or S3 (SeaweedFS).  """  import os @@ -242,16 +242,16 @@ def main():          help="postgresql database connection string",          default="postgres:///sandcrawler")      parser.add_argument('--s3-url', -        help="S3 (minio) backend URL", +        help="S3 (seaweedfs) backend URL",          default="localhost:9000")      parser.add_argument('--s3-access-key', -        help="S3 (minio) credential", -        default=os.environ.get('MINIO_ACCESS_KEY')) +        help="S3 (seaweedfs) credential", +        default=os.environ.get('SANDCRAWLER_BLOB_ACCESS_KEY') or os.environ.get('MINIO_ACCESS_KEY'))      parser.add_argument('--s3-secret-key', -        help="S3 (minio) credential", -        default=os.environ.get('MINIO_SECRET_KEY')) +        help="S3 (seaweedfs) credential", +        default=os.environ.get('SANDCRAWLER_BLOB_SECRET_KEY') or os.environ.get('MINIO_SECRET_KEY'))      parser.add_argument('--s3-bucket', -        help="S3 (minio) bucket to persist into", +        help="S3 (seaweedfs) bucket to persist into",          default="sandcrawler-dev")      subparsers = parser.add_subparsers() @@ -264,7 +264,7 @@ def main():      sub_pdf_extract.set_defaults(func=run_pdf_extract)      sub_persist_grobid = subparsers.add_parser('persist-grobid', -        help="daemon that consumes GROBID output from Kafka and pushes to minio and postgres") +        help="daemon that consumes GROBID output from Kafka and pushes to S3 (seaweedfs) and postgres")      sub_persist_grobid.add_argument('--s3-only',          action='store_true',          help="only upload TEI-XML to S3 (don't write to database)") @@ -274,7 +274,7 @@ def main():      sub_persist_grobid.set_defaults(func=run_persist_grobid)      sub_persist_pdftext = subparsers.add_parser('persist-pdftext', -        help="daemon that consumes pdftext output from Kafka and pushes to minio and postgres") +        help="daemon that consumes pdftext output from Kafka and pushes to S3 (seaweedfs) and postgres")      sub_persist_pdftext.add_argument('--s3-only',          action='store_true',          help="only upload TEI-XML to S3 (don't write to database)") @@ -284,7 +284,7 @@ def main():      sub_persist_pdftext.set_defaults(func=run_persist_pdftext)      sub_persist_thumbnail = subparsers.add_parser('persist-thumbnail', -        help="daemon that consumes thumbnail output from Kafka and pushes to minio and postgres") +        help="daemon that consumes thumbnail output from Kafka and pushes to S3 (seaweedfs) and postgres")      sub_persist_thumbnail.set_defaults(func=run_persist_thumbnail)      sub_persist_pdftrio = subparsers.add_parser('persist-pdftrio', | 
