aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-01-26 00:57:31 -0800
committerBryan Newbold <bnewbold@archive.org>2021-01-26 00:57:34 -0800
commitc556ab44d3f518a23aeb7bf69437fc3b49fcfc65 (patch)
tree81d9e38c77aa14a1580b00e4cb0434d7d44503d2 /fatcat_scholar
parent15c7c9ea0f09b2e30dffa85cd79a9f761ea29607 (diff)
downloadfatcat-scholar-c556ab44d3f518a23aeb7bf69437fc3b49fcfc65.tar.gz
fatcat-scholar-c556ab44d3f518a23aeb7bf69437fc3b49fcfc65.zip
work pipeline: hack to skip seaweedfs errors for now
This isn't great becasue it turns a lot of problems into silent failures.
Diffstat (limited to 'fatcat_scholar')
-rw-r--r--fatcat_scholar/work_pipeline.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index d06bbcc..aef2064 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -3,6 +3,7 @@ import io
import sys
import argparse
from typing import List, Dict, Tuple, Optional, Any, Sequence
+import urllib3.exceptions
import minio
import requests
@@ -118,6 +119,10 @@ class WorkPipeline:
# print(grobid_xml)
except minio.error.NoSuchKey:
return None
+ except urllib3.exceptions.MaxRetryError:
+ # HACK: work around broken seaweedfs keys
+ print(f"seaweedfs failure: sha1hex={fe.sha1}", file=sys.stderr)
+ return None
return dict(
tei_xml=grobid_xml, release_ident=release_ident, file_ident=fe.ident,
)