diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-12-16 02:01:31 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-12-16 02:01:31 -0800 |
commit | e4c153d4f6d8c815c7e792fec2f40cdac39bab35 (patch) | |
tree | 164d8486d96728f91f88e9835519d2f1ca33932e | |
parent | 423892e6fbd40851224bddddb9e76279b78e0e1a (diff) | |
download | sandcrawler-e4c153d4f6d8c815c7e792fec2f40cdac39bab35.tar.gz sandcrawler-e4c153d4f6d8c815c7e792fec2f40cdac39bab35.zip |
filesets: handle weird figshare link-only case better
-rw-r--r-- | python/sandcrawler/fileset_platforms.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py index 07d9844..b0925b9 100644 --- a/python/sandcrawler/fileset_platforms.py +++ b/python/sandcrawler/fileset_platforms.py @@ -472,7 +472,10 @@ class FigshareHelper(FilesetPlatformHelper): # extra=dict(), ) ) - assert not row.get("is_link_only") + if row.get("is_link_only"): + raise PlatformScopeError( + f"figshare.org file is just a link (not a file): {row['name']} at {row['download_url']}" + ) authors = [] for author in obj["authors"]: |