aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/fileset_platforms.py
blob: 7aeacf229ce64fe0dc0d932c051b86119448efd4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

import sys
import json
import gzip
import time
from collections import namedtuple
from typing import Optional, Tuple, Any, Dict, List

from sandcrawler.html_metadata import BiblioMetadata
from sandcrawler.ia import ResourceResult


class DatasetPlatformHelper(class):

    def __init__():
        self.platform_name = 'unknown'

    def match_request(request: dict , resource: ResourceResult, html_biblio: Optional[BiblioMetadata]) -> bool:
        """
        Does this request look like it matches this platform?
        """
        raise NotImplemented

    def get_item(request: dict, resource: ResourceResult, html_biblio: Optional[BiblioMetadata]) -> DatasetPlatformItem:
        """
        Fetch platform-specific metadata for this request (eg, via API calls)
        """
        raise NotImplemented


class DataverseHelper(DatasetPlatformHelper):

    def __init__():
        self.platform_name = 'dataverse'

class ArchiveOrgHelper(DatasetPlatformHelper):

    def __init__():
        self.platform_name = 'archiveorg'