summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-03-20 13:00:50 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-03-20 13:00:52 -0700
commita6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb (patch)
tree6fe603ef02c70ae748cafd0c407978c74bd3ae3c /python/fatcat_tools/importers
parent12c0e53669fb9401b09e088217c5c103d90b9106 (diff)
downloadfatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.tar.gz
fatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.zip
pubmed: handle multiple ReferenceList
This resolves a situation noticed in prod where we were only importing/updating a single reference per article. Includes a regression test.
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/pubmed.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 70a6368d..3ecf5ef4 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -616,7 +616,10 @@ class PubmedImporter(EntityImporter):
### References
refs = []
if pubmed.ReferenceList:
- for ref in pubmed.ReferenceList.find_all('Reference'):
+ # note that Reference always exists within a ReferenceList, but
+ # that there may be multiple ReferenceList (eg, sometimes one per
+ # Reference)
+ for ref in pubmed.find_all('Reference'):
ref_extra = dict()
ref_doi = ref.find("ArticleId", IdType="doi")
if ref_doi: