From 23c560af176a8c2e15c20ddcac78fb3eb736d19d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 13 Jan 2022 15:47:37 -0800 Subject: filesets: more figshare URL patterns --- python/sandcrawler/fileset_platforms.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'python/sandcrawler') diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py index b0925b9..a208dc4 100644 --- a/python/sandcrawler/fileset_platforms.py +++ b/python/sandcrawler/fileset_platforms.py @@ -375,6 +375,11 @@ class FigshareHelper(FilesetPlatformHelper): comp = comp[2:] if comp[0] in [ "dataset", + # TODO: should the following be considered "out of scope"? + "journal_contribution", + "presentation", + "poster", + "thesis", ]: comp = comp[1:] @@ -524,6 +529,14 @@ def test_parse_figshare_url_path() -> None: "12127176", "4", ), + "/articles/journal_contribution/Improved_Time_Resolved_Measurements_of_Inorganic_Ions_in_Particulate_Matter_by_PILS_IC_Integrated_with_a_Sample_Pre_Concentration_System/1407386/3": ( + "1407386", + "3", + ), + "/articles/poster/Effect_of_nanoclay_loading_on_the_thermal_decomposition_of_nanoclay_polyurethane_elastomers_obtained_by_bulk_polymerization/1094056/1": ( + "1094056", + "1", + ), } invalid = [ -- cgit v1.2.3