From f77a553350238c8ccc9c3bc0edcf47fb9dd067b3 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 1 Apr 2020 12:02:20 -0700 Subject: importers: replace newlines in get_text() strings --- python/fatcat_tools/importers/arxiv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'python/fatcat_tools/importers/arxiv.py') diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index 79b242c4..719592fc 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -118,13 +118,13 @@ class ArxivRawImporter(EntityImporter): if not (doi.startswith('10.') and '/' in doi and doi.split('/')[1]): sys.stderr.write("BOGUS DOI: {}\n".format(doi)) doi = None - title = latex_to_text(metadata.title.get_text()) - authors = parse_arxiv_authors(metadata.authors.get_text()) + title = latex_to_text(metadata.title.get_text().replace('\n', ' ')) + authors = parse_arxiv_authors(metadata.authors.get_text().replace('\n', ' ')) contribs = [fatcat_openapi_client.ReleaseContrib(index=i, raw_name=a, role='author') for i, a in enumerate(authors)] lang = "en" # the vast majority in english if metadata.comments and metadata.comments.get_text(): - comments = metadata.comments.get_text().strip() + comments = metadata.comments.get_text().replace('\n', ' ').strip() extra_arxiv['comments'] = comments if 'in french' in comments.lower(): lang = 'fr' @@ -146,7 +146,7 @@ class ArxivRawImporter(EntityImporter): number = None if metadata.find('journal-ref') and metadata.find('journal-ref').get_text(): - journal_ref = metadata.find('journal-ref').get_text().strip() + journal_ref = metadata.find('journal-ref').get_text().replace('\n', ' ').strip() extra_arxiv['journal_ref'] = journal_ref if "conf." in journal_ref.lower() or "proc." in journal_ref.lower(): release_type = "paper-conference" -- cgit v1.2.3