From 4c11f65f202ef8f71bfd640232ed30ccd6f4c3a4 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 15 Aug 2018 21:47:00 -0700 Subject: improve handling of invalid identifiers --- python/fatcat/importer_common.py | 17 ++++++++++++++--- python/fatcat/orcid_importer.py | 7 +++++-- python/tests/files/0000-0001-8254-710X.json | 1 + .../files/crossref-works.2018-01-21.badsample.json | 1 + python/tests/importer.py | 20 ++++++++++++++++++++ python/tests/orcid.py | 4 ++++ 6 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 python/tests/files/0000-0001-8254-710X.json diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py index 9d495aa7..e084d8c4 100644 --- a/python/fatcat/importer_common.py +++ b/python/fatcat/importer_common.py @@ -1,4 +1,5 @@ +import re import sys import csv import json @@ -22,6 +23,7 @@ class FatcatImporter: self._orcid_id_map = dict() self._doi_id_map = dict() self._issn_issnl_map = None + self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{4}$") if issn_map_file: self.read_issn_map_file(issn_map_file) @@ -54,9 +56,11 @@ class FatcatImporter: reader = csv.DictReader(source, delimiter=delimiter) self.process_batch(reader, size) + def is_issnl(self, issnl): + return len(issnl) == 9 and issnl[4] == '-' + def lookup_issnl(self, issnl): """Caches calls to the ISSN-L lookup API endpoint in a local dict""" - assert len(issnl) == 9 and issnl[4] == '-' if issnl in self._issnl_id_map: return self._issnl_id_map[issnl] container_id = None @@ -69,9 +73,13 @@ class FatcatImporter: self._issnl_id_map[issnl] = container_id # might be None return container_id + def is_orcid(self, orcid): + return self._orcid_regex.match(orcid) != None + def lookup_orcid(self, orcid): """Caches calls to the Orcid lookup API endpoint in a local dict""" - assert len(orcid) == 19 and orcid[4] == '-' + if not self.is_orcid(orcid): + return None if orcid in self._orcid_id_map: return self._orcid_id_map[orcid] creator_id = None @@ -84,9 +92,12 @@ class FatcatImporter: self._orcid_id_map[orcid] = creator_id # might be None return creator_id + def is_doi(self, doi): + return doi.startswith("10.") and doi.count("/") >= 1 + def lookup_doi(self, doi): """Caches calls to the doi lookup API endpoint in a local dict""" - assert doi.startswith('10.') + assert self.is_doi(doi) doi = doi.lower() if doi in self._doi_id_map: return self._doi_id_map[doi] diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py index 69b184d5..e57703d5 100644 --- a/python/fatcat/orcid_importer.py +++ b/python/fatcat/orcid_importer.py @@ -5,7 +5,6 @@ import itertools import fatcat_client from fatcat.importer_common import FatcatImporter - def value_or_none(e): if type(e) == dict: e = e.get('value') @@ -46,8 +45,12 @@ class FatcatOrcidImporter(FatcatImporter): else: # must have *some* name return None + orcid = obj['orcid-identifier']['path'] + if not self.is_orcid(orcid): + sys.stderr.write("Bad ORCID: {}\n".format(orcid)) + return None ce = fatcat_client.CreatorEntity( - orcid=obj['orcid-identifier']['path'], + orcid=orcid, given_name=given, surname=sur, display_name=display, diff --git a/python/tests/files/0000-0001-8254-710X.json b/python/tests/files/0000-0001-8254-710X.json new file mode 100644 index 00000000..094cae67 --- /dev/null +++ b/python/tests/files/0000-0001-8254-710X.json @@ -0,0 +1 @@ +{"orcid-identifier":{"uri":"http://orcid.org/0000-0001-8254-710X","path":"0000-0001-8254-710X","host":"orcid.org"},"preferences":{"locale":"en"},"history":{"creation-method":"Member-referred","completion-date":null,"submission-date":{"value":1407501041999},"last-modified-date":{"value":1465949566770},"claimed":true,"source":null,"deactivation-date":null,"verified-email":true,"verified-primary-email":true},"person":{"last-modified-date":null,"name":{"created-date":{"value":1460755375159},"last-modified-date":{"value":1460755375159},"given-names":{"value":"Man-Hui"},"family-name":{"value":"Li"},"credit-name":null,"source":null,"visibility":"public","path":"0000-0001-8254-710X"},"other-names":{"last-modified-date":null,"other-name":null,"path":"/0000-0001-8254-710X/other-names"},"biography":{"created-date":{"value":1460755375161},"last-modified-date":{"value":1460755375161},"content":null,"visibility":"public","path":"/0000-0001-8254-710X/biography"},"researcher-urls":{"last-modified-date":null,"researcher-url":null,"path":"/0000-0001-8254-710X/researcher-urls"},"emails":{"last-modified-date":null,"email":null,"path":"/0000-0001-8254-710X/email"},"addresses":{"last-modified-date":null,"address":null,"path":"/0000-0001-8254-710X/address"},"keywords":{"last-modified-date":null,"keyword":null,"path":"/0000-0001-8254-710X/keywords"},"external-identifiers":{"last-modified-date":null,"external-identifier":null,"path":"/0000-0001-8254-710X/external-identifiers"},"path":"/0000-0001-8254-710X/person"},"activities-summary":{"last-modified-date":null,"educations":{"last-modified-date":null,"education-summary":null,"path":"/0000-0001-8254-710X/educations"},"employments":{"last-modified-date":null,"employment-summary":null,"path":"/0000-0001-8254-710X/employments"},"fundings":{"last-modified-date":null,"group":null,"path":"/0000-0001-8254-710X/fundings"},"peer-reviews":{"last-modified-date":null,"group":null,"path":"/0000-0001-8254-710X/peer-reviews"},"works":{"last-modified-date":null,"group":null,"path":"/0000-0001-8254-710X/works"},"path":"/0000-0001-8254-710X/activities"},"path":"/0000-0001-8254-710X"} diff --git a/python/tests/files/crossref-works.2018-01-21.badsample.json b/python/tests/files/crossref-works.2018-01-21.badsample.json index d0ce191f..931da7a7 100644 --- a/python/tests/files/crossref-works.2018-01-21.badsample.json +++ b/python/tests/files/crossref-works.2018-01-21.badsample.json @@ -9,5 +9,6 @@ { "_id" : { "$oid" : "5a55196988a035a45bda0cb1" }, "indexed" : { "date-parts" : [ [ 2017, 10, 23 ] ], "date-time" : "2017-10-23T14:41:48Z", "timestamp" : { "$numberLong" : "1508769708308" } }, "reference-count" : 44, "publisher" : "Elsevier BV", "issue" : "1", "license" : [ { "URL" : "http://www.elsevier.com/tdm/userlicense/1.0/", "start" : { "date-parts" : [ [ 1998, 11, 1 ] ], "date-time" : "1998-11-01T00:00:00Z", "timestamp" : { "$numberLong" : "909878400000" } }, "delay-in-days" : 0, "content-version" : "tdm" } ], "content-domain" : { "domain" : [], "crossmark-restriction" : false }, "short-container-title" : [ "Toxicology and Applied Pharmacology" ], "published-print" : { "date-parts" : [ [ 1998, 11 ] ] }, "DOI" : "10.1006/taap.1998.8543", "type" : "journal-article", "created" : { "date-parts" : [ [ 2002, 9, 18 ] ], "date-time" : "2002-09-18T22:01:25Z", "timestamp" : { "$numberLong" : "1032386485000" } }, "page" : "102-108", "source" : "Crossref", "is-referenced-by-count" : 44, "title" : [ "Role of CYP1A2 in the Hepatotoxicity of Acetaminophen: Investigations UsingCyp1a2Null Mice" ], "prefix" : "10.1006", "volume" : "153", "author" : [ { "given" : "Robert P.", "family" : "Tonge", "affiliation" : [] }, { "given" : "Edward J.", "family" : "Kelly", "affiliation" : [] }, { "given" : "Sam A.", "family" : "Bruschi", "affiliation" : [] }, { "given" : "Tom", "family" : "Kalhorn", "affiliation" : [] }, { "given" : "David L.", "family" : "Eaton", "affiliation" : [] }, { "given" : "Daniel W.", "family" : "Nebert", "affiliation" : [] }, { "given" : "Sidney D.", "family" : "Nelson", "affiliation" : [] } ], "member" : "78", "container-title" : [ "Toxicology and Applied Pharmacology" ], "link" : [ { "URL" : "http://api.elsevier.com/content/article/PII:S0041008X9898543X?httpAccept=text/xml", "content-type" : "text/xml", "content-version" : "vor", "intended-application" : "text-mining" }, { "URL" : "http://api.elsevier.com/content/article/PII:S0041008X9898543X?httpAccept=text/plain", "content-type" : "text/plain", "content-version" : "vor", "intended-application" : "text-mining" } ], "deposited" : { "date-parts" : [ [ 2017, 6, 14 ] ], "date-time" : "2017-06-14T16:51:33Z", "timestamp" : { "$numberLong" : "1497459093000" } }, "score" : 1, "issued" : { "date-parts" : [ [ 1998, 11 ] ] }, "references-count" : 44, "alternative-id" : [ "S0041008X9898543X" ], "URL" : "http://dx.doi.org/10.1006/taap.1998.8543", "ISSN" : [ "0041-008X" ], "issn-type" : [ { "value" : "0041-008X", "type" : "print" } ], "subject" : [ "Toxicology", "Pharmacology" ] } { "_id" : { "$oid" : "5a55170088a035a45bd8490d" }, "indexed" : { "date-parts" : [ [ 2017, 10, 23 ] ], "date-time" : "2017-10-23T14:30:12Z", "timestamp" : { "$numberLong" : "1508769012416" } }, "reference-count" : 37, "publisher" : "Wiley-Blackwell", "issue" : "2", "license" : [ { "URL" : "http://doi.wiley.com/10.1002/tdm_license_1.1", "start" : { "date-parts" : [ [ 2015, 9, 1 ] ], "date-time" : "2015-09-01T00:00:00Z", "timestamp" : { "$numberLong" : "1441065600000" } }, "delay-in-days" : 5356, "content-version" : "tdm" } ], "content-domain" : { "domain" : [], "crossmark-restriction" : false }, "short-container-title" : [ "Am. J. Ind. Med." ], "published-print" : { "date-parts" : [ [ 2001, 2 ] ] }, "DOI" : "10.1002/1097-0274(200102)39:2<218::aid-ajim1009>3.0.co;2-4", "type" : "journal-article", "created" : { "date-parts" : [ [ 2002, 8, 25 ] ], "date-time" : "2002-08-25T20:41:50Z", "timestamp" : { "$numberLong" : "1030308110000" } }, "page" : "218-226", "source" : "Crossref", "is-referenced-by-count" : 10, "title" : [ "The work environment impact assessment: A methodologic framework for evaluating health-based interventions" ], "prefix" : "10.1002", "volume" : "39", "author" : [ { "given" : "Beth J.", "family" : "Rosenberg", "affiliation" : [] }, { "given" : "Elizabeth M.", "family" : "Barbeau", "affiliation" : [] }, { "given" : "Rafael", "family" : "Moure-Eraso", "affiliation" : [] }, { "given" : "Charles", "family" : "Levenstein", "affiliation" : [] } ], "member" : "311", "published-online" : { "date-parts" : [ [ 2001 ] ] }, "reference" : [ { "key" : "BIB1", "author" : "Barbeau", "year" : "1998", "unstructured" : "1998. Displaced tobacco workers, public health, and tobacco policy: moving beyond jobs versus health. Doctoral thesis, Department of Work Environment, University of Massachusetts, Lowell." }, { "key" : "BIB2", "author" : "Berberian", "volume" : "37", "first-page" : "126", "year" : "1987", "journal-title" : "J Occup Environ Med" }, { "key" : "BIB3", "author" : "Bignami", "volume" : "80", "first-page" : "265", "year" : "1981", "journal-title" : "Mutat Res", "DOI" : "10.1016/0027-5107(81)90099-3", "doi-asserted-by" : "crossref" }, { "key" : "BIB4", "author" : "Britton", "year" : "1989", "unstructured" : "1989. The post-Alar era dawns chilly for apple growers. Boston Globe. Oct. 25, p. 34." }, { "key" : "BIB5", "author" : "Brusick", "year" : "1976", "unstructured" : "1976. Mutagen and oncogen Study on 1,1-dimethylhydrazine. Prepared for the Aerospace Med. Res. Lab., Aeropsace Med. Div. Airforce Systems Command, Wright- Patterson A.F.B., Dayton OH Litton Bionetics, Inc., Kensington, MD. NTIS AD-A035475." }, { "key" : "BIB6", "author" : "Chemical Marketing Reporter", "year" : "1984", "unstructured" : "Chemical Marketing Reporter. 1984. Uniroyal pesticide to be reviewed by EPA: regulatory action prompted by its toxicity. July 23." }, { "key" : "BIB7", "author" : "Chemical Marketing Reporter", "year" : "1989", "unstructured" : "Chemical Marketing Reporter. 1989. Uniroyal pulls apple pesticide from market, citing controversy. June 5." }, { "key" : "BIB8", "year" : "1990", "unstructured" : "Du Pont Chemical Company. 1990. MSDS No. M0000057, p. 2." }, { "key" : "BIB9", "year" : "1993", "unstructured" : "Farm Chemicals Handbook '93. 1993. Willoughby, OH: Meister.", "volume-title" : "Farm Chemicals Handbook '93" }, { "key" : "BIB10", "year" : "1985", "unstructured" : "Farm Chemicals Handbook '85. 1985. Willoughby, OH: Meister.", "volume-title" : "Farm Chemicals Handbook '85" }, { "key" : "BIB11", "author" : "Federal Register", "year" : "1989", "unstructured" : "Federal Register. 1989. Daminozide: termination of special review of food uses. Vol. 54, No. 216, p. 47482, November 14." }, { "key" : "BIB12", "author" : "Fenske", "first-page" : "729", "year" : "2000", "unstructured" : "2000. Agricultural workers. In: editors. Occupational health: recognizing and preventing work-related disease and injury. 4th ed. Philadelphia: Lippincott Williams and Wilkins, p. 729-748.", "volume-title" : "Occupational health: recognizing and preventing work-related disease and injury" }, { "key" : "BIB13", "author" : "Gibson", "volume" : "5", "first-page" : "24", "year" : "1994", "journal-title" : "New Solutions", "DOI" : "10.2190/NS5.1.g", "doi-asserted-by" : "crossref" }, { "key" : "BIB14", "author" : "Goldenhar", "volume" : "29", "first-page" : "289", "year" : "1996", "journal-title" : "Am J Ind Med", "DOI" : "10.1002/(SICI)1097-0274(199604)29:4<289::AID-AJIM2>3.0.CO;2-K", "doi-asserted-by" : "crossref" }, { "key" : "BIB15", "author" : "Haun", "year" : "1984", "unstructured" : "1984. Inhalation studies of UDMH. Air Force Aerospace Medical Res Lab, TR-85-020." }, { "key" : "BIB16", "author" : "International Agency for Research on Cancer (IARC)", "year" : "1997", "unstructured" : "International Agency for Research on Cancer (IARC). 1997. Evaluation of carcinogen risks to humans: man-made mineral fibres and radon. Lyons, France." }, { "key" : "BIB17", "author" : "Lord", "year" : "1969", "unstructured" : "1969 (May-June). Thoughts on the apple harvest problem. Fruit Notes. U. S. Department of Agriculture, Massachusetts Extension Service." }, { "key" : "BIB18", "author" : "Manning", "first-page" : "34", "year" : "1989", "unstructured" : "Sales Agent for J. P. Sullivan and Co., of Ayer, MA, an apple commission house. In 1989. The post-Alar era dawns chilly for apple growers. Boston Globe Oct. 25 p. 34.", "volume-title" : "The post-Alar era dawns chilly for apple growers" }, { "key" : "BIB19", "author" : "National Cancer Institute", "year" : "1978", "unstructured" : "National Cancer Institute. 1978. Bioassay of daminozide for possible carcinogenicity. Washington, D.C., United State Department of Health, Education and Welfare, Public Health Service (NIC Carcinogenesis Technical Report Series No. 83; DHEW Publication No (NIH 78-1333)." }, { "key" : "BIB20", "author" : "Rogers", "volume" : "89", "first-page" : "321", "year" : "1981", "journal-title" : "Mutat Res", "DOI" : "10.1016/0165-1218(81)90113-0", "doi-asserted-by" : "crossref" }, { "key" : "BIB21", "author" : "Rosenberg", "year" : "1995", "unstructured" : "1995. The best laid bans: the impact of pesticide bans on workers. Doctoral thesis, Department of Work Environment, University of Massachusetts Lowell." }, { "key" : "BIB22", "author" : "Rosenberg", "volume" : "6", "first-page" : "34", "year" : "1996", "journal-title" : "New Solutions: A Journal of Environmental and Occupational Health Policy", "DOI" : "10.2190/NS6.2.d", "doi-asserted-by" : "crossref" }, { "key" : "BIB23", "author" : "Rosenberg", "volume" : "8", "first-page" : "365", "year" : "1998", "journal-title" : "New Solutions Environmental Health Policy", "DOI" : "10.2190/A2A1-CT1X-RY6D-RR3M", "doi-asserted-by" : "crossref" }, { "key" : "BIB24", "author" : "Saunders", "volume" : "29", "first-page" : "409", "year" : "1987", "journal-title" : "J Occup Environ Med" }, { "key" : "BIB25", "author" : "Toth", "volume" : "50", "first-page" : "181", "year" : "1973", "journal-title" : "J Natl Cancer Inst", "DOI" : "10.1093/jnci/50.1.181", "doi-asserted-by" : "crossref" }, { "key" : "BIB26", "author" : "Toth", "volume" : "40", "first-page" : "2427", "year" : "1977a", "journal-title" : "Cancer", "DOI" : "10.1002/1097-0142(197711)40:5+<2427::AID-CNCR2820400906>3.0.CO;2-Y", "doi-asserted-by" : "crossref" }, { "key" : "BIB27", "author" : "Toth", "volume" : "37", "first-page" : "3497", "year" : "1977b", "journal-title" : "Cancer Res" }, { "key" : "BIB28", "author" : "U.S. Environmental Protection Agency", "year" : "1986", "unstructured" : "U.S. Environmental Protection Agency. 1986. Integrated Risk Information System (IRIS). Oxamyl. December 9." }, { "key" : "BIB29", "author" : "U.S. Environmental Protection Agency", "year" : "1986", "unstructured" : "U.S. Environmental Protection Agency. 1986. Chemical Fact Sheet Number 26: Daminozide. Office of Pesticides and Toxic Substances, Washington, DC. 10-169." }, { "key" : "BIB30", "author" : "U.S. Environmental Protection Agency", "year" : "1989", "unstructured" : "U.S. Environmental Protection Agency, Office of Pesticide Programs, Office of Pesticides and Toxic Substances. 1989. Daminozide special review technical support document: Preliminary determination to cancel the food uses of Daminozide. Washington, DC: May." }, { "key" : "BIB31", "author" : "U.S. Environmental Protection Agency", "volume" : "54", "first-page" : "10", "year" : "1989", "journal-title" : "Fed Regist." }, { "key" : "BIB32", "author" : "U.S. Environmental Protection Agency", "year" : "1990", "unstructured" : "U.S. Environmental Protection Agency. 1990. Integrated Risk Information System (IRIS). Propargite. May 1." }, { "key" : "BIB33", "author" : "U.S. Environmental Protection Agency", "volume" : "57", "first-page" : "10", "year" : "1992", "journal-title" : "Fed. Regist." }, { "key" : "BIB34", "author" : "U.S. Environmental Protection Agency", "year" : "1993", "unstructured" : "U.S. Environmental Protection Agency, Office of Prevention, Pesticides and Toxic Substances. 1993. R.E.D. Facts, Document number EPA-738-F-93-007. September." }, { "key" : "BIB35", "author" : "U.S. Department of Agriculture", "year" : "1993", "journal-title" : "New England Agricultural Statistics" }, { "key" : "BIB36", "author" : "Warren", "year" : "1992", "unstructured" : "1992. Unanticipated consequences of banning a chemical: the case of Alar. Unpublished manuscript, Department of Work Environment, University of Massachusetts Lowell." }, { "key" : "BIB37", "author" : "Wood", "year" : "1990", "unstructured" : "1990. Memo to Poverty Lane, West Lebanon, New Hampshire, to members of the Risk Assessment/Risk Management Work Group, Keystone National Policy Dialogue on Food Safety, Oct. 26, 1990, cited in Rosenberg, B. 1996." } ], "container-title" : [ "American Journal of Industrial Medicine" ], "link" : [ { "URL" : "https://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1002%2F1097-0274(200102)39:2%3C218::AID-AJIM1009%3E3.0.CO;2-4", "content-type" : "unspecified", "content-version" : "vor", "intended-application" : "text-mining" } ], "deposited" : { "date-parts" : [ [ 2017, 8, 4 ] ], "date-time" : "2017-08-04T20:22:16Z", "timestamp" : { "$numberLong" : "1501878136000" } }, "score" : 1, "issued" : { "date-parts" : [ [ 2001 ] ] }, "references-count" : 37, "URL" : "http://dx.doi.org/10.1002/1097-0274(200102)39:2<218::aid-ajim1009>3.0.co;2-4", "relation" : { "cites" : [] }, "ISSN" : [ "0271-3586", "1097-0274" ], "issn-type" : [ { "value" : "0271-3586", "type" : "print" }, { "value" : "1097-0274", "type" : "electronic" } ], "subject" : [ "Public Health, Environmental and Occupational Health" ] } { "_id" : { "$oid" : "5a553b4388a035a45bf39150" }, "indexed" : { "date-parts" : [ [ 2017, 10, 23 ] ], "date-time" : "2017-10-23T17:10:15Z", "timestamp" : { "$numberLong" : "1508778615346" } }, "reference-count" : 22, "publisher" : "Elsevier BV", "issue" : "4", "license" : [ { "URL" : "http://www.elsevier.com/tdm/userlicense/1.0/", "start" : { "date-parts" : [ [ 2001, 12, 1 ] ], "date-time" : "2001-12-01T00:00:00Z", "timestamp" : { "$numberLong" : "1007164800000" } }, "delay-in-days" : 0, "content-version" : "tdm" } ], "content-domain" : { "domain" : [], "crossmark-restriction" : false }, "short-container-title" : [ "International Journal of Hospitality Management" ], "published-print" : { "date-parts" : [ [ 2001, 12 ] ] }, "DOI" : "10.1016/s0278-4319(01)00020-2", "type" : "journal-article", "created" : { "date-parts" : [ [ 2002, 7, 25 ] ], "date-time" : "2002-07-25T14:28:16Z", "timestamp" : { "$numberLong" : "1027607296000" } }, "page" : "325-338", "source" : "Crossref", "is-referenced-by-count" : 14, "title" : [ "Hotel management style: a study of employee perceptions and preferences" ], "prefix" : "10.1016", "volume" : "20", "author" : [ { "given" : "Margaret", "family" : "Deery", "affiliation" : [] }, { "given" : "Leo K", "family" : "Jago", "affiliation" : [] } ], "member" : "78", "container-title" : [ "International Journal of Hospitality Management" ], "link" : [ { "URL" : "http://api.elsevier.com/content/article/PII:S0278431901000202?httpAccept=text/xml", "content-type" : "text/xml", "content-version" : "vor", "intended-application" : "text-mining" }, { "URL" : "http://api.elsevier.com/content/article/PII:S0278431901000202?httpAccept=text/plain", "content-type" : "text/plain", "content-version" : "vor", "intended-application" : "text-mining" } ], "deposited" : { "date-parts" : [ [ 2017, 6, 14 ] ], "date-time" : "2017-06-14T21:24:09Z", "timestamp" : { "$numberLong" : "1497475449000" } }, "score" : 1, "issued" : { "date-parts" : [ [ 2001, 12 ] ] }, "references-count" : 22, "alternative-id" : [ "S0278431901000202" ], "URL" : "http://dx.doi.org/10.1016/s0278-4319(01)00020-2", "ISSN" : [ "0278-4319" ], "issn-type" : [ { "value" : "0278-4319", "type" : "print" } ], "subject" : [ "Tourism, Leisure and Hospitality Management", "Strategy and Management" ] } +{ "_id" : { "$oid" : "5a55176088a035a45bd8802c" }, "indexed" : { "date-parts" : [ [ 2017, 10, 23 ] ], "date-time" : "2017-10-23T14:31:47Z", "timestamp" : { "$numberLong" : "1508769107897" } }, "reference-count" : 1, "publisher" : "Hindawi Limited", "issue" : "2", "license" : [ { "URL" : "http://creativecommons.org/licenses/by/3.0/", "start" : { "date-parts" : [ [ 2002, 1, 1 ] ], "date-time" : "2002-01-01T00:00:00Z", "timestamp" : { "$numberLong" : "1009843200000" } }, "delay-in-days" : 0, "content-version" : "vor" } ], "content-domain" : { "domain" : [], "crossmark-restriction" : false }, "short-container-title" : [ "Comparative and Functional Genomics" ], "published-print" : { "date-parts" : [ [ 2002 ] ] }, "abstract" : "This brief meeting review summarizes the recommendations of NSF and NPGI funded bioinformaticians concerning the future requirements for plant bioinformatics systems and databases.", "DOI" : "10.1002/cfg.158", "type" : "journal-article", "created" : { "date-parts" : [ [ 2002, 8, 25 ] ], "date-time" : "2002-08-25T23:45:33Z", "timestamp" : { "$numberLong" : "1030319133000" } }, "page" : "176-176", "source" : "Crossref", "is-referenced-by-count" : 4, "title" : [ "Meeting Review: Plant Bioinformatics at the NSF and NPGI (PAMGX Satellite) Meetings" ], "prefix" : "10.1155", "volume" : "3", "author" : [ { "ORCID" : "http://orcid.org/0000-0002-4447-597X", "authenticated-orcid" : true, "given" : "Richard", "family" : "Bruskiewich", "affiliation" : [ { "name" : "International Rice Research Institute (IRRI), Metro Manila DAPO 7777, Philippines" } ] } ], "member" : "98", "reference" : [ { "key" : "10.1002/cfg.158-BIB1", "author" : "Brazma", "volume" : "29", "first-page" : "365", "year" : "2001", "journal-title" : "Nature Genetics", "DOI" : "10.1038/ng1201-365", "doi-asserted-by" : "crossref" } ], "container-title" : [ "Comparative and Functional Genomics" ], "link" : [ { "URL" : "http://downloads.hindawi.com/journals/ijg/2002/250628.pdf", "content-type" : "application/pdf", "content-version" : "vor", "intended-application" : "text-mining" }, { "URL" : "http://downloads.hindawi.com/journals/ijg/2002/250628.pdf", "content-type" : "unspecified", "content-version" : "vor", "intended-application" : "similarity-checking" } ], "deposited" : { "date-parts" : [ [ 2017, 8, 5 ] ], "date-time" : "2017-08-05T06:32:00Z", "timestamp" : { "$numberLong" : "1501914720000" } }, "score" : 1, "issued" : { "date-parts" : [ [ 2002 ] ] }, "references-count" : 1, "alternative-id" : [ "250628" ], "URL" : "http://dx.doi.org/10.1002/cfg.158", "relation" : { "cites" : [] }, "ISSN" : [ "1531-6912", "1532-6268" ], "issn-type" : [ { "value" : "1531-6912", "type" : "print" }, { "value" : "1532-6268", "type" : "electronic" } ], "subject" : [ "Biotechnology", "Genetics", "Molecular Biology" ] } { "_id" : { "$oid" : "5a55176088a035a45bd8802c" }, "indexed" : { "date-parts" : [ [ 2017, 10, 23 ] ], "date-time" : "2017-10-23T14:31:47Z", "timestamp" : { "$numberLong" : "1508769107897" } }, "reference-count" : 1, "publisher" : "Hindawi Limited", "issue" : "2", "license" : [ { "URL" : "http://creativecommons.org/licenses/by/3.0/", "start" : { "date-parts" : [ [ 2002, 1, 1 ] ], "date-time" : "2002-01-01T00:00:00Z", "timestamp" : { "$numberLong" : "1009843200000" } }, "delay-in-days" : 0, "content-version" : "vor" } ], "content-domain" : { "domain" : [], "crossmark-restriction" : false }, "short-container-title" : [ "Comparative and Functional Genomics" ], "published-print" : { "date-parts" : [ [ 2002 ] ] }, "abstract" : "This brief meeting review summarizes the recommendations of NSF and NPGI funded bioinformaticians concerning the future requirements for plant bioinformatics systems and databases.", "DOI" : "10.1002/cfg.158", "type" : "journal-article", "created" : { "date-parts" : [ [ 2002, 8, 25 ] ], "date-time" : "2002-08-25T23:45:33Z", "timestamp" : { "$numberLong" : "1030319133000" } }, "page" : "176-176", "source" : "Crossref", "is-referenced-by-count" : 4, "title" : [ "Meeting Review: Plant Bioinformatics at the NSF and NPGI (PAMGX Satellite) Meetings" ], "prefix" : "10.1155", "volume" : "3", "author" : [ { "ORCID" : "http://orcid.org/0000-0002-4447-5978", "authenticated-orcid" : true, "given" : "Richard", "family" : "Bruskiewich", "affiliation" : [ { "name" : "International Rice Research Institute (IRRI), Metro Manila DAPO 7777, Philippines" } ] } ], "member" : "98", "reference" : [ { "key" : "10.1002/cfg.158-BIB1", "author" : "Brazma", "volume" : "29", "first-page" : "365", "year" : "2001", "journal-title" : "Nature Genetics", "DOI" : "10.1038/ng1201-365", "doi-asserted-by" : "crossref" } ], "container-title" : [ "Comparative and Functional Genomics" ], "link" : [ { "URL" : "http://downloads.hindawi.com/journals/ijg/2002/250628.pdf", "content-type" : "application/pdf", "content-version" : "vor", "intended-application" : "text-mining" }, { "URL" : "http://downloads.hindawi.com/journals/ijg/2002/250628.pdf", "content-type" : "unspecified", "content-version" : "vor", "intended-application" : "similarity-checking" } ], "deposited" : { "date-parts" : [ [ 2017, 8, 5 ] ], "date-time" : "2017-08-05T06:32:00Z", "timestamp" : { "$numberLong" : "1501914720000" } }, "score" : 1, "issued" : { "date-parts" : [ [ 2002 ] ] }, "references-count" : 1, "alternative-id" : [ "250628" ], "URL" : "http://dx.doi.org/10.1002/cfg.158", "relation" : { "cites" : [] }, "ISSN" : [ "1531-6912", "1532-6268" ], "issn-type" : [ { "value" : "1531-6912", "type" : "print" }, { "value" : "1532-6268", "type" : "electronic" } ], "subject" : [ "Biotechnology", "Genetics", "Molecular Biology" ] } { "_id" : { "$oid" : "5a551fbe88a035a45bdf19fd" }, "indexed" : { "date-parts" : [ [ 2017, 10, 23 ] ], "date-time" : "2017-10-23T15:12:12Z", "timestamp" : { "$numberLong" : "1508771532055" } }, "reference-count" : 0, "publisher" : "Springer Nature", "issue" : "11", "content-domain" : { "domain" : [], "crossmark-restriction" : false }, "short-container-title" : [ "Skeletal Radiol" ], "published-print" : { "date-parts" : [ [ 2001, 11 ] ] }, "DOI" : "10.1007/s002560100423", "type" : "journal-article", "created" : { "date-parts" : [ [ 2002, 10, 6 ] ], "date-time" : "2002-10-06T13:44:04Z", "timestamp" : { "$numberLong" : "1033911844000" } }, "page" : "643-647", "source" : "Crossref", "is-referenced-by-count" : 2, "title" : [ "Unilateral osteonecrosis in a patient with bilateral os centrale carpi: imaging findings" ], "prefix" : "10.1007", "volume" : "30", "author" : [ { "given" : "F.", "family" : "Abascal", "affiliation" : [] }, { "given" : "L.", "family" : "Cerezal", "affiliation" : [] }, { "given" : "F.", "family" : "del Piñal", "affiliation" : [] }, { "given" : "R.", "family" : "García-Valtuille", "affiliation" : [] }, { "given" : "A.", "family" : "García-Valtuille", "affiliation" : [] }, { "given" : "A.", "family" : "Canga", "affiliation" : [] }, { "given" : "J.", "family" : "Torcida", "affiliation" : [] } ], "member" : "297", "published-online" : { "date-parts" : [ [ 2001, 9, 14 ] ] }, "container-title" : [ "Skeletal Radiology" ], "link" : [ { "URL" : "http://link.springer.com/content/pdf/10.1007/s002560100423", "content-type" : "unspecified", "content-version" : "vor", "intended-application" : "similarity-checking" } ], "deposited" : { "date-parts" : [ [ 2014, 4, 9 ] ], "date-time" : "2014-04-09T05:21:52Z", "timestamp" : { "$numberLong" : "1397020912000" } }, "score" : 1, "issued" : { "date-parts" : [ [ 2001, 9, 14 ] ] }, "references-count" : 0, "alternative-id" : [ "423" ], "URL" : "http://dx.doi.org/10.1007/s002560100423", "ISSN" : [ "0364-2348", "1432-2161" ], "issn-type" : [ { "value" : "0364-2348", "type" : "print" }, { "value" : "1432-2161", "type" : "electronic" } ], "subject" : [ "Radiology Nuclear Medicine and imaging" ] } diff --git a/python/tests/importer.py b/python/tests/importer.py index 190acbed..4d49e794 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -13,3 +13,23 @@ def test_issnl_mapping_lookup(): assert fi.issn2issnl('9999-0027') == None assert fi.lookup_issnl('9999-9999') == None + +def test_identifiers(): + + with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: + fi = FatcatImporter("http://localhost:9411/v0", issn_file) + + assert fi.is_issnl("1234-5678") == True + assert fi.is_issnl("1234-5678.") == False + assert fi.is_issnl("12345678") == False + assert fi.is_issnl("1-2345678") == False + + assert fi.is_doi("10.1234/56789") == True + assert fi.is_doi("101234/56789") == False + assert fi.is_doi("10.1234_56789") == False + + assert fi.is_orcid("0000-0003-3118-6591") == True + assert fi.is_orcid("0000-00x3-3118-659") == False + assert fi.is_orcid("0000-00033118-659") == False + assert fi.is_orcid("0000-0003-3118-659.") == False + diff --git a/python/tests/orcid.py b/python/tests/orcid.py index 00748972..e07583ac 100644 --- a/python/tests/orcid.py +++ b/python/tests/orcid.py @@ -13,6 +13,10 @@ def test_orcid_importer_batch(orcid_importer): with open('tests/files/0000-0001-8254-7103.json', 'r') as f: orcid_importer.process_batch(f) +def test_orcid_importer_badid(orcid_importer): + with open('tests/files/0000-0001-8254-710X.json', 'r') as f: + orcid_importer.process_batch(f) + def test_orcid_importer(orcid_importer): with open('tests/files/0000-0001-8254-7103.json', 'r') as f: orcid_importer.process_source(f) -- cgit v1.2.3