diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-12-15 16:29:47 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-12-15 16:29:47 -0800 |
commit | a3719d6501e4b7cc520ba05264d99bdcc2d7bed7 (patch) | |
tree | cd28d79213489f3c3d16ba4369e2c43ec0ac35ee | |
parent | ffdc901fa067db55fe6cfeb8d0c3807d29df092c (diff) | |
download | sandcrawler-a3719d6501e4b7cc520ba05264d99bdcc2d7bed7.tar.gz sandcrawler-a3719d6501e4b7cc520ba05264d99bdcc2d7bed7.zip |
more fileset ingest tweaks
-rw-r--r-- | python/sandcrawler/fileset_platforms.py | 2 | ||||
-rw-r--r-- | python/sandcrawler/fileset_strategies.py | 5 |
2 files changed, 7 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py index 81aad87..4adc9b7 100644 --- a/python/sandcrawler/fileset_platforms.py +++ b/python/sandcrawler/fileset_platforms.py @@ -205,6 +205,8 @@ class DataverseHelper(FilesetPlatformHelper): ) resp.raise_for_status() obj = resp.json() + if not "latestVersion" in obj["data"]: + raise PlatformScopeError("could not find latest version for dataverse record") obj_latest = obj["data"]["latestVersion"] dataset_version = ( f"{obj_latest['versionNumber']}.{obj_latest['versionMinorNumber']}" diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py index f83d1ce..9696f3c 100644 --- a/python/sandcrawler/fileset_strategies.py +++ b/python/sandcrawler/fileset_strategies.py @@ -150,6 +150,11 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy): m.status = "downloaded-local" except requests.exceptions.RequestException: m.status = "error-platform-download" + return ArchiveStrategyResult( + ingest_strategy=self.ingest_strategy, + manifest=item.manifest, + status="error-platform-download", + ) print(f" verifying {m.path}", file=sys.stderr) file_meta = gen_file_metadata_path(local_path, allow_empty=True) |