aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-01-13 15:47:37 -0800
committerBryan Newbold <bnewbold@archive.org>2022-01-13 15:47:37 -0800
commit23c560af176a8c2e15c20ddcac78fb3eb736d19d (patch)
tree375c95f5dff7cbd4664bc25307aa8c15aa86f53f
parentdd840dd1150cf5d9be28286d5487047ff863d7a2 (diff)
downloadsandcrawler-23c560af176a8c2e15c20ddcac78fb3eb736d19d.tar.gz
sandcrawler-23c560af176a8c2e15c20ddcac78fb3eb736d19d.zip
filesets: more figshare URL patterns
-rw-r--r--python/sandcrawler/fileset_platforms.py13
1 files changed, 13 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py
index b0925b9..a208dc4 100644
--- a/python/sandcrawler/fileset_platforms.py
+++ b/python/sandcrawler/fileset_platforms.py
@@ -375,6 +375,11 @@ class FigshareHelper(FilesetPlatformHelper):
comp = comp[2:]
if comp[0] in [
"dataset",
+ # TODO: should the following be considered "out of scope"?
+ "journal_contribution",
+ "presentation",
+ "poster",
+ "thesis",
]:
comp = comp[1:]
@@ -524,6 +529,14 @@ def test_parse_figshare_url_path() -> None:
"12127176",
"4",
),
+ "/articles/journal_contribution/Improved_Time_Resolved_Measurements_of_Inorganic_Ions_in_Particulate_Matter_by_PILS_IC_Integrated_with_a_Sample_Pre_Concentration_System/1407386/3": (
+ "1407386",
+ "3",
+ ),
+ "/articles/poster/Effect_of_nanoclay_loading_on_the_thermal_decomposition_of_nanoclay_polyurethane_elastomers_obtained_by_bulk_polymerization/1094056/1": (
+ "1094056",
+ "1",
+ ),
}
invalid = [