pubmed: handle multiple ReferenceList

This resolves a situation noticed in prod where we were only importing/updating a single reference per article. Includes a regression test.
author: Bryan Newbold <bnewbold@robocracy.org> 2020-03-20 13:00:50 -0700
committer: Bryan Newbold <bnewbold@robocracy.org> 2020-03-20 13:00:52 -0700
commit: a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb (patch)
tree: 6fe603ef02c70ae748cafd0c407978c74bd3ae3c /python/tests/import_pubmed.py
parent: 12c0e53669fb9401b09e088217c5c103d90b9106 (diff)
download: fatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.tar.gz
fatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.zip
1 files changed, 12 insertions, 0 deletions
diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py
index 49609f75..f57aa273 100644
--- a/python/tests/import_pubmed.py
+++ b/python/tests/import_pubmed.py
@@ -118,6 +118,7 @@ def test_pubmed_xml_parse(pubmed_importer):
 
     assert r2.refs[0].extra['unstructured'] == "Microbiology. 2009 Jun;155(Pt 6):1840-6"
     assert r2.refs[0].extra['pmid'] == "19383690"
+    assert len(r2.refs) > 1
 
 def test_pubmed_xml_dates(pubmed_importer):
     with open('tests/files/pubmed_31393839.xml', 'r') as f:
@@ -126,3 +127,14 @@ def test_pubmed_xml_dates(pubmed_importer):
 
     assert r1.release_year == 2019
 
+def test_pubmed_xml_parse_refs(pubmed_importer):
+    """
+    Tests the case of multiple nested ReferenceList/Reference objects, instead
+    of a single ReferenceList with multiple Reference
+    """
+    with open('tests/files/pubmed_19129924.xml', 'r') as f:
+        soup = BeautifulSoup(f, "xml")
+        r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0])
+
+    assert len(r1.refs) > 1
+
author	Bryan Newbold <bnewbold@robocracy.org>	2020-03-20 13:00:50 -0700
committer	Bryan Newbold <bnewbold@robocracy.org>	2020-03-20 13:00:52 -0700
commit	a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb (patch)
tree	6fe603ef02c70ae748cafd0c407978c74bd3ae3c /python/tests/import_pubmed.py
parent	12c0e53669fb9401b09e088217c5c103d90b9106 (diff)
download	fatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.tar.gz fatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.zip