summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/harvest')
-rw-r--r--python/fatcat_tools/harvest/oaipmh.py34
1 files changed, 0 insertions, 34 deletions
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index 8e9efea8..c95f3445 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -132,40 +132,6 @@ class HarvestArxivWorker(HarvestOaiPmhWorker):
self.name = "arxiv"
-class HarvestPubmedWorker(HarvestOaiPmhWorker):
- """
- Will likely be doing MEDLINE daily batch imports for primary metadata, but
- might also want to run a PMC importer to update fulltext and assign OA
- licenses (when appropriate).
-
- Pubmed refs:
- - https://www.ncbi.nlm.nih.gov/pmc/tools/oai/
- - https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi?verb=GetRecord&identifier=oai:pubmedcentral.nih.gov:152494&metadataPrefix=pmc_fm
- - https://github.com/titipata/pubmed_parser
-
- TODO(martin): OAI does not seem to support the format we already have an
- importer for. Maybe we can use "Daily Update Files" --
-
- Daily Update Files
- ------------------
- Each day, NLM produces update files that include new, revised and deleted
- citations. The first Update file to be loaded after loading the complete
- set of 2019 MEDLINE/PubMed Baseline files is pubmed20n1016.xml.
- ftp://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles
-
- NOTES:
-
- * OAI: https://dtd.nlm.nih.gov/archiving/2.3/xsd/archivearticle.xsd
- * FTP: https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_190101.dtd
- """
-
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi"
- self.metadata_prefix = "pmc_fm"
- self.name = "pubmed"
-
-
class HarvestDoajJournalWorker(HarvestOaiPmhWorker):
"""
WARNING: DOAJ OAI-PMH doesn't seem to respect 'from' and 'until' params