diff options
Diffstat (limited to 'python')
| -rwxr-xr-x | python/fatcat_harvest.py | 12 | ||||
| -rw-r--r-- | python/fatcat_tools/harvest/oaipmh.py | 7 | 
2 files changed, 12 insertions, 7 deletions
| diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py index c16e9d60..5f6f471b 100755 --- a/python/fatcat_harvest.py +++ b/python/fatcat_harvest.py @@ -15,7 +15,7 @@ def run_crossref(args):          contact_email=args.contact_email,          start_date=args.start_date,          end_date=args.end_date) -    worker.run() +    worker.run(continuous=args.continuous)  def run_datacite(args):      worker = HarvestDataciteWorker( @@ -25,7 +25,7 @@ def run_datacite(args):          contact_email=args.contact_email,          start_date=args.start_date,          end_date=args.end_date) -    worker.run() +    worker.run(continuous=args.continuous)  def run_arxiv(args):      worker = HarvestArxivWorker( @@ -34,7 +34,7 @@ def run_arxiv(args):          state_topic="fatcat-{}.oaipmh-arxiv-state".format(args.env),          start_date=args.start_date,          end_date=args.end_date) -    worker.run() +    worker.run(continuous=args.continuous)  def run_pubmed(args):      worker = HarvestPubmedWorker( @@ -43,7 +43,7 @@ def run_pubmed(args):          state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env),          start_date=args.start_date,          end_date=args.end_date) -    worker.run() +    worker.run(continuous=args.continuous)  def run_doaj_article(args):      worker = HarvestDoajArticleWorker( @@ -52,7 +52,7 @@ def run_doaj_article(args):          state_topic="fatcat-{}.oaipmh-doaj-article-state".format(args.env),          start_date=args.start_date,          end_date=args.end_date) -    worker.run() +    worker.run(continuous=args.continuous)  def run_doaj_journal(args):      worker = HarvestDoajJournalWorker( @@ -61,7 +61,7 @@ def run_doaj_journal(args):          state_topic="fatcat-{}.oaipmh-doaj-journal-state".format(args.env),          start_date=args.start_date,          end_date=args.end_date) -    worker.run() +    worker.run(continuous=args.continuous)  def mkdate(raw): diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index 4044ff10..ffcd5e78 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -75,6 +75,7 @@ class HarvestOaiPmhWorker:          self.endpoint_url = None # needs override          self.metadata_prefix = None  # needs override +        self.name = "unnamed"          self.state = HarvestState(start_date, end_date)          self.state.initialize_from_kafka(self.kafka.topics[self.state_topic]) @@ -119,7 +120,7 @@ class HarvestOaiPmhWorker:                  time.sleep(self.loop_sleep())              else:                  break -        print("{} DOI ingest caught up".format(self.name)) +        print("{} OAI-PMH ingest caught up".format(self.name))  class HarvestArxivWorker(HarvestOaiPmhWorker): @@ -128,6 +129,7 @@ class HarvestArxivWorker(HarvestOaiPmhWorker):          super().__init__(**kwargs)           self.endpoint_url = "https://export.arxiv.org/oai2"          self.metadata_prefix = "arXiv" +        self.name = "arxiv"  class HarvestPubmedWorker(HarvestOaiPmhWorker): @@ -136,6 +138,7 @@ class HarvestPubmedWorker(HarvestOaiPmhWorker):          super().__init__(**kwargs)           self.endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi"          self.metadata_prefix = "pmc_fm" +        self.name = "pubmed"  class HarvestDoajJournalWorker(HarvestOaiPmhWorker): @@ -147,6 +150,7 @@ class HarvestDoajJournalWorker(HarvestOaiPmhWorker):          super().__init__(**kwargs)           self.endpoint_url = "https://www.doaj.org/oai"          self.metadata_prefix = "oai_dc" +        self.name = "doaj-journal"  class HarvestDoajArticleWorker(HarvestOaiPmhWorker): @@ -158,4 +162,5 @@ class HarvestDoajArticleWorker(HarvestOaiPmhWorker):          super().__init__(**kwargs)           self.endpoint_url = "https://www.doaj.org/oai.article"          self.metadata_prefix = "oai_doaj" +        self.name = "doaj-article" | 
