diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 17:13:39 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 18:15:29 -0700 |
commit | 13d56402cf5e01ca8e2306f85dbcc3b3f92a94d7 (patch) | |
tree | 853a3dc60dcf3bd635be0816ff59b23f0975ae7d /python/sandcrawler/misc.py | |
parent | a09396caefe709b521e560add5b01c1a5c94cb53 (diff) | |
download | sandcrawler-13d56402cf5e01ca8e2306f85dbcc3b3f92a94d7.tar.gz sandcrawler-13d56402cf5e01ca8e2306f85dbcc3b3f92a94d7.zip |
improve fileset ingest integration with file ingest
Diffstat (limited to 'python/sandcrawler/misc.py')
-rw-r--r-- | python/sandcrawler/misc.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/python/sandcrawler/misc.py b/python/sandcrawler/misc.py index dc46e9a..37a2a82 100644 --- a/python/sandcrawler/misc.py +++ b/python/sandcrawler/misc.py @@ -1,4 +1,5 @@ +import os import base64 import magic import hashlib @@ -261,3 +262,17 @@ def requests_retry_session(retries=10, backoff_factor=3, session.mount('https://', adapter) return session +def sanitize_fs_path(path: str) -> str: + """ + From: https://stackoverflow.com/questions/13939120/sanitizing-a-file-path-in-python/66950540#66950540 + """ + # - pretending to chroot to the current directory + # - cancelling all redundant paths (/.. = /) + # - making the path relative + return os.path.relpath(os.path.normpath(os.path.join("/", path)), "/") + +def test_sanitize_fs_path() -> None: + assert sanitize_fs_path("/thing.png") == "thing.png" + assert sanitize_fs_path("../../thing.png") == "thing.png" + assert sanitize_fs_path("thing.png") == "thing.png" + assert sanitize_fs_path("subdir/thing.png") == "subdir/thing.png" |