diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2022-07-07 13:49:11 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2022-07-07 13:49:11 -0700 |
commit | 3269ace124d9fcc886caa1bb9925a63cea869ba5 (patch) | |
tree | 32a740b295cb05a2c416e5c1630b472c5a1bb041 /python | |
parent | de0b5fb315b73e6960c77c1ae934f63bce593bc9 (diff) | |
download | fatcat-3269ace124d9fcc886caa1bb9925a63cea869ba5.tar.gz fatcat-3269ace124d9fcc886caa1bb9925a63cea869ba5.zip |
arxiv: work-around hack for strange title
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/importers/arxiv.py | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index dd2c2284..d7d3ed97 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -18,6 +18,9 @@ latex2text = LatexNodes2Text() def latex_to_text(raw: str) -> str: + # hack: handle a single special mangled title + if raw.startswith("%CRTFASTGEEPWR"): + return raw.strip() try: return latex2text.latex_to_text(raw).strip() except AttributeError: @@ -26,6 +29,11 @@ def latex_to_text(raw: str) -> str: return raw.strip() +def test_latex_to_text() -> None: + s = "%CRTFASTGEEPWR: a SAS macro for power of the generalized estimating equations of multi-period cluster randomized trials with application to stepped wedge designs" + assert latex_to_text(s) == s + + def parse_arxiv_authors(raw: str) -> List[str]: if not raw: return [] |