From a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 20 Mar 2020 13:00:50 -0700 Subject: pubmed: handle multiple ReferenceList This resolves a situation noticed in prod where we were only importing/updating a single reference per article. Includes a regression test. --- python/fatcat_tools/importers/pubmed.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'python/fatcat_tools/importers') diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 70a6368d..3ecf5ef4 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -616,7 +616,10 @@ class PubmedImporter(EntityImporter): ### References refs = [] if pubmed.ReferenceList: - for ref in pubmed.ReferenceList.find_all('Reference'): + # note that Reference always exists within a ReferenceList, but + # that there may be multiple ReferenceList (eg, sometimes one per + # Reference) + for ref in pubmed.find_all('Reference'): ref_extra = dict() ref_doi = ref.find("ArticleId", IdType="doi") if ref_doi: -- cgit v1.2.3