summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xpython/fatcat_harvest.py12
-rw-r--r--python/fatcat_tools/harvest/oaipmh.py7
2 files changed, 12 insertions, 7 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
index c16e9d60..5f6f471b 100755
--- a/python/fatcat_harvest.py
+++ b/python/fatcat_harvest.py
@@ -15,7 +15,7 @@ def run_crossref(args):
contact_email=args.contact_email,
start_date=args.start_date,
end_date=args.end_date)
- worker.run()
+ worker.run(continuous=args.continuous)
def run_datacite(args):
worker = HarvestDataciteWorker(
@@ -25,7 +25,7 @@ def run_datacite(args):
contact_email=args.contact_email,
start_date=args.start_date,
end_date=args.end_date)
- worker.run()
+ worker.run(continuous=args.continuous)
def run_arxiv(args):
worker = HarvestArxivWorker(
@@ -34,7 +34,7 @@ def run_arxiv(args):
state_topic="fatcat-{}.oaipmh-arxiv-state".format(args.env),
start_date=args.start_date,
end_date=args.end_date)
- worker.run()
+ worker.run(continuous=args.continuous)
def run_pubmed(args):
worker = HarvestPubmedWorker(
@@ -43,7 +43,7 @@ def run_pubmed(args):
state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env),
start_date=args.start_date,
end_date=args.end_date)
- worker.run()
+ worker.run(continuous=args.continuous)
def run_doaj_article(args):
worker = HarvestDoajArticleWorker(
@@ -52,7 +52,7 @@ def run_doaj_article(args):
state_topic="fatcat-{}.oaipmh-doaj-article-state".format(args.env),
start_date=args.start_date,
end_date=args.end_date)
- worker.run()
+ worker.run(continuous=args.continuous)
def run_doaj_journal(args):
worker = HarvestDoajJournalWorker(
@@ -61,7 +61,7 @@ def run_doaj_journal(args):
state_topic="fatcat-{}.oaipmh-doaj-journal-state".format(args.env),
start_date=args.start_date,
end_date=args.end_date)
- worker.run()
+ worker.run(continuous=args.continuous)
def mkdate(raw):
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index 4044ff10..ffcd5e78 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -75,6 +75,7 @@ class HarvestOaiPmhWorker:
self.endpoint_url = None # needs override
self.metadata_prefix = None # needs override
+ self.name = "unnamed"
self.state = HarvestState(start_date, end_date)
self.state.initialize_from_kafka(self.kafka.topics[self.state_topic])
@@ -119,7 +120,7 @@ class HarvestOaiPmhWorker:
time.sleep(self.loop_sleep())
else:
break
- print("{} DOI ingest caught up".format(self.name))
+ print("{} OAI-PMH ingest caught up".format(self.name))
class HarvestArxivWorker(HarvestOaiPmhWorker):
@@ -128,6 +129,7 @@ class HarvestArxivWorker(HarvestOaiPmhWorker):
super().__init__(**kwargs)
self.endpoint_url = "https://export.arxiv.org/oai2"
self.metadata_prefix = "arXiv"
+ self.name = "arxiv"
class HarvestPubmedWorker(HarvestOaiPmhWorker):
@@ -136,6 +138,7 @@ class HarvestPubmedWorker(HarvestOaiPmhWorker):
super().__init__(**kwargs)
self.endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi"
self.metadata_prefix = "pmc_fm"
+ self.name = "pubmed"
class HarvestDoajJournalWorker(HarvestOaiPmhWorker):
@@ -147,6 +150,7 @@ class HarvestDoajJournalWorker(HarvestOaiPmhWorker):
super().__init__(**kwargs)
self.endpoint_url = "https://www.doaj.org/oai"
self.metadata_prefix = "oai_dc"
+ self.name = "doaj-journal"
class HarvestDoajArticleWorker(HarvestOaiPmhWorker):
@@ -158,4 +162,5 @@ class HarvestDoajArticleWorker(HarvestOaiPmhWorker):
super().__init__(**kwargs)
self.endpoint_url = "https://www.doaj.org/oai.article"
self.metadata_prefix = "oai_doaj"
+ self.name = "doaj-article"