diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-01-26 00:57:31 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-01-26 00:57:34 -0800 |
commit | c556ab44d3f518a23aeb7bf69437fc3b49fcfc65 (patch) | |
tree | 81d9e38c77aa14a1580b00e4cb0434d7d44503d2 /fatcat_scholar | |
parent | 15c7c9ea0f09b2e30dffa85cd79a9f761ea29607 (diff) | |
download | fatcat-scholar-c556ab44d3f518a23aeb7bf69437fc3b49fcfc65.tar.gz fatcat-scholar-c556ab44d3f518a23aeb7bf69437fc3b49fcfc65.zip |
work pipeline: hack to skip seaweedfs errors for now
This isn't great becasue it turns a lot of problems into silent
failures.
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index d06bbcc..aef2064 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -3,6 +3,7 @@ import io import sys import argparse from typing import List, Dict, Tuple, Optional, Any, Sequence +import urllib3.exceptions import minio import requests @@ -118,6 +119,10 @@ class WorkPipeline: # print(grobid_xml) except minio.error.NoSuchKey: return None + except urllib3.exceptions.MaxRetryError: + # HACK: work around broken seaweedfs keys + print(f"seaweedfs failure: sha1hex={fe.sha1}", file=sys.stderr) + return None return dict( tei_xml=grobid_xml, release_ident=release_ident, file_ident=fe.ident, ) |