From 5a508d61daf23a4bfa337c4229bbb6795b69fbd2 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 20 May 2020 20:39:02 -0700 Subject: fixes from manual testing --- fatcat_scholar/work_pipeline.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'fatcat_scholar/work_pipeline.py') diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index c93cb29..9ce72b1 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -2,6 +2,7 @@ import os import io import sys +import minio import argparse from pydantic import BaseModel, validator from typing import List, Dict, Tuple, Optional, Any, Sequence @@ -88,14 +89,17 @@ class WorkPipeline(): if not grobid_meta or grobid_meta['status'] != 'success': return None #print(grobid_meta) - grobid_xml = self.sandcrawler_s3_client.get_blob( - folder="grobid", - sha1hex=fe.sha1, - extension=".tei.xml", - prefix="", - bucket="sandcrawler", - ) - #print(grobid_xml) + try: + grobid_xml = self.sandcrawler_s3_client.get_blob( + folder="grobid", + sha1hex=fe.sha1, + extension=".tei.xml", + prefix="", + bucket="sandcrawler", + ) + #print(grobid_xml) + except minio.error.NoSuchKey: + return None return dict( tei_xml=grobid_xml, release_ident=release_ident, @@ -338,6 +342,7 @@ def main(): access_key=os.environ.get('MINIO_ACCESS_KEY'), secret_key=os.environ.get('MINIO_SECRET_KEY'), ), + fulltext_cache_dir=args.fulltext_cache_dir, ) if args.func == 'run_releases': -- cgit v1.2.3