aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-12-15 16:29:47 -0800
committerBryan Newbold <bnewbold@archive.org>2021-12-15 16:29:47 -0800
commita3719d6501e4b7cc520ba05264d99bdcc2d7bed7 (patch)
treecd28d79213489f3c3d16ba4369e2c43ec0ac35ee
parentffdc901fa067db55fe6cfeb8d0c3807d29df092c (diff)
downloadsandcrawler-a3719d6501e4b7cc520ba05264d99bdcc2d7bed7.tar.gz
sandcrawler-a3719d6501e4b7cc520ba05264d99bdcc2d7bed7.zip
more fileset ingest tweaks
-rw-r--r--python/sandcrawler/fileset_platforms.py2
-rw-r--r--python/sandcrawler/fileset_strategies.py5
2 files changed, 7 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py
index 81aad87..4adc9b7 100644
--- a/python/sandcrawler/fileset_platforms.py
+++ b/python/sandcrawler/fileset_platforms.py
@@ -205,6 +205,8 @@ class DataverseHelper(FilesetPlatformHelper):
)
resp.raise_for_status()
obj = resp.json()
+ if not "latestVersion" in obj["data"]:
+ raise PlatformScopeError("could not find latest version for dataverse record")
obj_latest = obj["data"]["latestVersion"]
dataset_version = (
f"{obj_latest['versionNumber']}.{obj_latest['versionMinorNumber']}"
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index f83d1ce..9696f3c 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -150,6 +150,11 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy):
m.status = "downloaded-local"
except requests.exceptions.RequestException:
m.status = "error-platform-download"
+ return ArchiveStrategyResult(
+ ingest_strategy=self.ingest_strategy,
+ manifest=item.manifest,
+ status="error-platform-download",
+ )
print(f" verifying {m.path}", file=sys.stderr)
file_meta = gen_file_metadata_path(local_path, allow_empty=True)