From a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 20 Mar 2020 13:00:50 -0700 Subject: pubmed: handle multiple ReferenceList This resolves a situation noticed in prod where we were only importing/updating a single reference per article. Includes a regression test. --- python/fatcat_tools/importers/pubmed.py | 5 +- python/tests/files/pubmed_19129924.xml | 206 ++++++++++++++++++++++++++++++++ python/tests/import_pubmed.py | 12 ++ 3 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 python/tests/files/pubmed_19129924.xml diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 70a6368d..3ecf5ef4 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -616,7 +616,10 @@ class PubmedImporter(EntityImporter): ### References refs = [] if pubmed.ReferenceList: - for ref in pubmed.ReferenceList.find_all('Reference'): + # note that Reference always exists within a ReferenceList, but + # that there may be multiple ReferenceList (eg, sometimes one per + # Reference) + for ref in pubmed.find_all('Reference'): ref_extra = dict() ref_doi = ref.find("ArticleId", IdType="doi") if ref_doi: diff --git a/python/tests/files/pubmed_19129924.xml b/python/tests/files/pubmed_19129924.xml new file mode 100644 index 00000000..a8ff0bcd --- /dev/null +++ b/python/tests/files/pubmed_19129924.xml @@ -0,0 +1,206 @@ + + + 19129924 + + 2011 + 07 + 14 + + + 2020 + 03 + 06 + +
+ + 1662-5196 + + 2 + + 2008 + + + Frontiers in neuroinformatics + Front Neuroinform + + PyMOOSE: Interoperable Scripting in Python for MOOSE. + + 6 + + 10.3389/neuro.11.006.2008 + + Python is emerging as a common scripting language for simulators. This opens up many possibilities for interoperability in the form of analysis, interfaces, and communications between simulators. We report the integration of Python scripting with the Multi-scale Object Oriented Simulation Environment (MOOSE). MOOSE is a general-purpose simulation system for compartmental neuronal models and for models of signaling pathways based on chemical kinetics. We show how the Python-scripting version of MOOSE, PyMOOSE, combines the power of a compiled simulator with the versatility and ease of use of Python. We illustrate this by using Python numerical libraries to analyze MOOSE output online, and by developing a GUI in Python/Qt for a MOOSE simulation. Finally, we build and run a composite neuronal/signaling model that uses both the NEURON and MOOSE numerical engines, and Python as a bridge between the two. Thus PyMOOSE has a high degree of interoperability with analysis routines, with graphical toolkits, and with other simulators. + + + + Ray + Subhasis + S + + National Centre for Biological Sciences Bangalore, India. + + + + Bhalla + Upinder S + US + + + eng + + Journal Article + + + 2008 + 12 + 19 + +
+ + Switzerland + Front Neuroinform + 101477957 + 1662-5196 + + + GENESIS + MOOSE + NEURON + Python + compartmental models + multi-scale models + simulators + systems biology + +
+ + + + 2008 + 09 + 15 + + + 2008 + 11 + 01 + + + 2009 + 1 + 9 + 9 + 0 + + + 2009 + 1 + 9 + 9 + 0 + + + 2009 + 1 + 9 + 9 + 1 + + + epublish + + 19129924 + 10.3389/neuro.11.006.2008 + PMC2614320 + + + + Eur J Neurosci. 2004 Nov;20(10):2671-80 + + 15548210 + + + + + + Science. 2002 Aug 9;297(5583):1018-23 + + 12169734 + + + + + + Philos Trans R Soc Lond B Biol Sci. 2001 Aug 29;356(1412):1209-28 + + 11545699 + + + + + + Biol Cybern. 1985;53(1):41-56 + + 3841014 + + + + + + Neuroinformatics. 2007 Summer;5(2):96-104 + + 17873371 + + + + + + Science. 1999 Jan 15;283(5400):381-7 + + 9888852 + + + + + + Neuroinformatics. 2007 Summer;5(2):127-38 + + 17873374 + + + + + + Nat Biotechnol. 2005 Dec;23(12):1509-15 + + 16333295 + + + + + + Biol Cybern. 1985;53(1):27-40 + + 3841013 + + + + + + J Neurophysiol. 1995 Mar;73(3):1157-68 + + 7608762 + + + + + + Bioinformatics. 2003 Mar 1;19(4):524-31 + + 12611808 + + + + +
+ diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py index 49609f75..f57aa273 100644 --- a/python/tests/import_pubmed.py +++ b/python/tests/import_pubmed.py @@ -118,6 +118,7 @@ def test_pubmed_xml_parse(pubmed_importer): assert r2.refs[0].extra['unstructured'] == "Microbiology. 2009 Jun;155(Pt 6):1840-6" assert r2.refs[0].extra['pmid'] == "19383690" + assert len(r2.refs) > 1 def test_pubmed_xml_dates(pubmed_importer): with open('tests/files/pubmed_31393839.xml', 'r') as f: @@ -126,3 +127,14 @@ def test_pubmed_xml_dates(pubmed_importer): assert r1.release_year == 2019 +def test_pubmed_xml_parse_refs(pubmed_importer): + """ + Tests the case of multiple nested ReferenceList/Reference objects, instead + of a single ReferenceList with multiple Reference + """ + with open('tests/files/pubmed_19129924.xml', 'r') as f: + soup = BeautifulSoup(f, "xml") + r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0]) + + assert len(r1.refs) > 1 + -- cgit v1.2.3