diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-01-13 15:47:37 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-01-13 15:47:37 -0800 |
commit | 23c560af176a8c2e15c20ddcac78fb3eb736d19d (patch) | |
tree | 375c95f5dff7cbd4664bc25307aa8c15aa86f53f /python | |
parent | dd840dd1150cf5d9be28286d5487047ff863d7a2 (diff) | |
download | sandcrawler-23c560af176a8c2e15c20ddcac78fb3eb736d19d.tar.gz sandcrawler-23c560af176a8c2e15c20ddcac78fb3eb736d19d.zip |
filesets: more figshare URL patterns
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/fileset_platforms.py | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py index b0925b9..a208dc4 100644 --- a/python/sandcrawler/fileset_platforms.py +++ b/python/sandcrawler/fileset_platforms.py @@ -375,6 +375,11 @@ class FigshareHelper(FilesetPlatformHelper): comp = comp[2:] if comp[0] in [ "dataset", + # TODO: should the following be considered "out of scope"? + "journal_contribution", + "presentation", + "poster", + "thesis", ]: comp = comp[1:] @@ -524,6 +529,14 @@ def test_parse_figshare_url_path() -> None: "12127176", "4", ), + "/articles/journal_contribution/Improved_Time_Resolved_Measurements_of_Inorganic_Ions_in_Particulate_Matter_by_PILS_IC_Integrated_with_a_Sample_Pre_Concentration_System/1407386/3": ( + "1407386", + "3", + ), + "/articles/poster/Effect_of_nanoclay_loading_on_the_thermal_decomposition_of_nanoclay_polyurethane_elastomers_obtained_by_bulk_polymerization/1094056/1": ( + "1094056", + "1", + ), } invalid = [ |