From d0772cc01462d532d0cd82cb0c6dd859f4c4e74d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 15 Mar 2019 15:16:50 -0700 Subject: MEDLINE/Pubmed note Also, arXivRaw, not arXiv (though see WIP on more-importers branch) --- python/fatcat_tools/harvest/oaipmh.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'python/fatcat_tools/harvest') diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index 923500fc..0b482924 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -98,18 +98,22 @@ class HarvestArxivWorker(HarvestOaiPmhWorker): - http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:0804.2273&metadataPrefix=arXivRaw All records are work-level. Some metadata formats have internal info about - specific versions. The 'arXiv' format does, so i'm using that. + specific versions. The 'arXivRaw' format does, so i'm using that. """ def __init__(self, **kwargs): super().__init__(**kwargs) self.endpoint_url = "https://export.arxiv.org/oai2" - self.metadata_prefix = "arXiv" + self.metadata_prefix = "arXivRaw" self.name = "arxiv" class HarvestPubmedWorker(HarvestOaiPmhWorker): """ + Will likely be doing MEDLINE daily batch imports for primary metadata, but + might also want to run a PMC importer to update fulltext and assign OA + licenses (when appropriate). + Pubmed refs: - https://www.ncbi.nlm.nih.gov/pmc/tools/oai/ - https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi?verb=GetRecord&identifier=oai:pubmedcentral.nih.gov:152494&metadataPrefix=pmc_fm -- cgit v1.2.3