diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-04 13:01:58 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 18:15:20 -0700 |
commit | e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e (patch) | |
tree | bee2b4343f30d59fec463dbdcaafafc11d7cd513 /python/sandcrawler/fileset_strategies.py | |
parent | 452475df7619f3743eac5ad86e2e1fb8ba9972da (diff) | |
download | sandcrawler-e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e.tar.gz sandcrawler-e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e.zip |
progress on fileset/dataset ingest
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r-- | python/sandcrawler/fileset_strategies.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py new file mode 100644 index 0000000..592b475 --- /dev/null +++ b/python/sandcrawler/fileset_strategies.py @@ -0,0 +1,22 @@ + +import sys +import json +import gzip +import time +from collections import namedtuple +from typing import Optional, Tuple, Any, Dict, List + +from sandcrawler.html_metadata import BiblioMetadata +from sandcrawler.ia import ResourceResult +from sandcrawler.fileset_types import IngestStrategy, FilesetManifestFile, DatasetPlatformItem + + +class FilesetIngestStrategy(class): + + def __init__(): + self.ingest_strategy = 'unknown' + + def check_existing(): # XXX: -> Any: + raise NotImplementedError() + + def process(item: DatasetPlatformItem): |