blob: 7aeacf229ce64fe0dc0d932c051b86119448efd4 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
import sys
import json
import gzip
import time
from collections import namedtuple
from typing import Optional, Tuple, Any, Dict, List
from sandcrawler.html_metadata import BiblioMetadata
from sandcrawler.ia import ResourceResult
class DatasetPlatformHelper(class):
def __init__():
self.platform_name = 'unknown'
def match_request(request: dict , resource: ResourceResult, html_biblio: Optional[BiblioMetadata]) -> bool:
"""
Does this request look like it matches this platform?
"""
raise NotImplemented
def get_item(request: dict, resource: ResourceResult, html_biblio: Optional[BiblioMetadata]) -> DatasetPlatformItem:
"""
Fetch platform-specific metadata for this request (eg, via API calls)
"""
raise NotImplemented
class DataverseHelper(DatasetPlatformHelper):
def __init__():
self.platform_name = 'dataverse'
class ArchiveOrgHelper(DatasetPlatformHelper):
def __init__():
self.platform_name = 'archiveorg'
|