From 3269ace124d9fcc886caa1bb9925a63cea869ba5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 7 Jul 2022 13:49:11 -0700 Subject: arxiv: work-around hack for strange title --- python/fatcat_tools/importers/arxiv.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index dd2c2284..d7d3ed97 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -18,6 +18,9 @@ latex2text = LatexNodes2Text() def latex_to_text(raw: str) -> str: + # hack: handle a single special mangled title + if raw.startswith("%CRTFASTGEEPWR"): + return raw.strip() try: return latex2text.latex_to_text(raw).strip() except AttributeError: @@ -26,6 +29,11 @@ def latex_to_text(raw: str) -> str: return raw.strip() +def test_latex_to_text() -> None: + s = "%CRTFASTGEEPWR: a SAS macro for power of the generalized estimating equations of multi-period cluster randomized trials with application to stepped wedge designs" + assert latex_to_text(s) == s + + def parse_arxiv_authors(raw: str) -> List[str]: if not raw: return [] -- cgit v1.2.3