1 files changed, 7 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 3e9527d4..62bb1ddb 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -394,6 +394,7 @@ class PubmedImporter(EntityImporter):
 
         title = medline.Article.ArticleTitle.get_text() # always present
         if title:
+            title = title.replace('\n', ' ')
             if title.endswith('.'):
                 title = title[:-1]
             # this hides some "special" titles, but the vast majority are
@@ -407,6 +408,7 @@ class PubmedImporter(EntityImporter):
         original_title = medline.Article.find("VernacularTitle", recurse=False)
         if original_title:
             original_title = original_title.get_text() or None
+            original_title = original_title.replace('\n', ' ')
             if original_title and original_title.endswith('.'):
                 original_title = original_title[:-1]
 
@@ -558,15 +560,15 @@ class PubmedImporter(EntityImporter):
                 surname = None
                 raw_name = None
                 if author.ForeName:
-                    given_name = author.ForeName.get_text()
+                    given_name = author.ForeName.get_text().replace('\n', ' ')
                 if author.LastName:
-                    surname = author.LastName.get_text()
+                    surname = author.LastName.get_text().replace('\n', ' ')
                 if given_name and surname:
                     raw_name = "{} {}".format(given_name, surname)
                 elif surname:
                     raw_name = surname
                 if not raw_name and author.CollectiveName and author.CollectiveName.get_text():
-                    raw_name = author.CollectiveName.get_text()
+                    raw_name = author.CollectiveName.get_text().replace('\n', ' ')
                 contrib_extra = dict()
                 orcid = author.find("Identifier", Source="ORCID")
                 if orcid:
@@ -588,9 +590,9 @@ class PubmedImporter(EntityImporter):
                 affiliations = author.find_all("Affiliation")
                 raw_affiliation = None
                 if affiliations:
-                    raw_affiliation = affiliations[0].get_text()
+                    raw_affiliation = affiliations[0].get_text().replace('\n', ' ')
                     if len(affiliations) > 1:
-                        contrib_extra['more_affiliations'] = [ra.get_text() for ra in affiliations[1:]]
+                        contrib_extra['more_affiliations'] = [ra.get_text().replace('\n', ' ') for ra in affiliations[1:]]
                 if author.find("EqualContrib"):
                     # TODO: schema for this?
                     contrib_extra['equal'] = True