summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest/datacite.py
blob: 12860810daa3375c79c4aacbd29dbfdb64f5346d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

"""
datacite has a REST API as well as OAI-PMH endpoint.

have about 8 million 

bulk export notes: https://github.com/datacite/datacite/issues/188

fundamentally, very similar to crossref. don't have a scrape... maybe
could/should use this script for that, and dump to JSON?
"""

from fatcat_tools.harvest.ingest_common import DoiApiHarvest

class HarvestDataciteWorker(DoiApiHarvest):

    def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
            api_host_url="https://api.datacite.org/works",
            start_date=None, end_date=None):
        super().__init__(kafka_hosts=kafka_hosts,
                         produce_topic=produce_topic,
                         state_topic=state_topic,
                         api_host_url=api_host_url,
                         contact_email=contact_email,
                         start_date=start_date,
                         end_date=end_date)

        self.update_filter_name = "update"