diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-03-20 13:00:50 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-03-20 13:00:52 -0700 |
commit | a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb (patch) | |
tree | 6fe603ef02c70ae748cafd0c407978c74bd3ae3c /python/fatcat_tools/importers | |
parent | 12c0e53669fb9401b09e088217c5c103d90b9106 (diff) | |
download | fatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.tar.gz fatcat-a6f74183dd1cf1eaa44f7edeb98dbc5dc737dabb.zip |
pubmed: handle multiple ReferenceList
This resolves a situation noticed in prod where we were only
importing/updating a single reference per article.
Includes a regression test.
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 70a6368d..3ecf5ef4 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -616,7 +616,10 @@ class PubmedImporter(EntityImporter): ### References refs = [] if pubmed.ReferenceList: - for ref in pubmed.ReferenceList.find_all('Reference'): + # note that Reference always exists within a ReferenceList, but + # that there may be multiple ReferenceList (eg, sometimes one per + # Reference) + for ref in pubmed.find_all('Reference'): ref_extra = dict() ref_doi = ref.find("ArticleId", IdType="doi") if ref_doi: |