From c556ab44d3f518a23aeb7bf69437fc3b49fcfc65 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 Jan 2021 00:57:31 -0800 Subject: work pipeline: hack to skip seaweedfs errors for now This isn't great becasue it turns a lot of problems into silent failures. --- fatcat_scholar/work_pipeline.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fatcat_scholar') diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index d06bbcc..aef2064 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -3,6 +3,7 @@ import io import sys import argparse from typing import List, Dict, Tuple, Optional, Any, Sequence +import urllib3.exceptions import minio import requests @@ -118,6 +119,10 @@ class WorkPipeline: # print(grobid_xml) except minio.error.NoSuchKey: return None + except urllib3.exceptions.MaxRetryError: + # HACK: work around broken seaweedfs keys + print(f"seaweedfs failure: sha1hex={fe.sha1}", file=sys.stderr) + return None return dict( tei_xml=grobid_xml, release_ident=release_ident, file_ident=fe.ident, ) -- cgit v1.2.3