aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-12-16 02:01:31 -0800
committerBryan Newbold <bnewbold@archive.org>2021-12-16 02:01:31 -0800
commite4c153d4f6d8c815c7e792fec2f40cdac39bab35 (patch)
tree164d8486d96728f91f88e9835519d2f1ca33932e
parent423892e6fbd40851224bddddb9e76279b78e0e1a (diff)
downloadsandcrawler-e4c153d4f6d8c815c7e792fec2f40cdac39bab35.tar.gz
sandcrawler-e4c153d4f6d8c815c7e792fec2f40cdac39bab35.zip
filesets: handle weird figshare link-only case better
-rw-r--r--python/sandcrawler/fileset_platforms.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py
index 07d9844..b0925b9 100644
--- a/python/sandcrawler/fileset_platforms.py
+++ b/python/sandcrawler/fileset_platforms.py
@@ -472,7 +472,10 @@ class FigshareHelper(FilesetPlatformHelper):
# extra=dict(),
)
)
- assert not row.get("is_link_only")
+ if row.get("is_link_only"):
+ raise PlatformScopeError(
+ f"figshare.org file is just a link (not a file): {row['name']} at {row['download_url']}"
+ )
authors = []
for author in obj["authors"]: