1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
"""
datacite has a REST API as well as OAI-PMH endpoint.
have about 8 million
bulk export notes: https://github.com/datacite/datacite/issues/188
fundamentally, very similar to crossref. don't have a scrape... maybe
could/should use this script for that, and dump to JSON?
"""
from fatcat_tools.harvest.ingest_common import DoiApiHarvest
class HarvestDataciteWorker(DoiApiHarvest):
def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
api_host_url="https://api.datacite.org/works",
start_date=None, end_date=None):
super().__init__(kafka_hosts=kafka_hosts,
produce_topic=produce_topic,
state_topic=state_topic,
api_host_url=api_host_url,
contact_email=contact_email,
start_date=start_date,
end_date=end_date)
self.update_filter_name = "update"
|