From e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 4 Oct 2021 13:01:58 -0700 Subject: progress on fileset/dataset ingest --- python/sandcrawler/fileset_strategies.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 python/sandcrawler/fileset_strategies.py (limited to 'python/sandcrawler/fileset_strategies.py') diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py new file mode 100644 index 0000000..592b475 --- /dev/null +++ b/python/sandcrawler/fileset_strategies.py @@ -0,0 +1,22 @@ + +import sys +import json +import gzip +import time +from collections import namedtuple +from typing import Optional, Tuple, Any, Dict, List + +from sandcrawler.html_metadata import BiblioMetadata +from sandcrawler.ia import ResourceResult +from sandcrawler.fileset_types import IngestStrategy, FilesetManifestFile, DatasetPlatformItem + + +class FilesetIngestStrategy(class): + + def __init__(): + self.ingest_strategy = 'unknown' + + def check_existing(): # XXX: -> Any: + raise NotImplementedError() + + def process(item: DatasetPlatformItem): -- cgit v1.2.3