diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2022-07-07 13:49:11 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2022-07-07 13:49:11 -0700 | 
| commit | 3269ace124d9fcc886caa1bb9925a63cea869ba5 (patch) | |
| tree | 32a740b295cb05a2c416e5c1630b472c5a1bb041 | |
| parent | de0b5fb315b73e6960c77c1ae934f63bce593bc9 (diff) | |
| download | fatcat-3269ace124d9fcc886caa1bb9925a63cea869ba5.tar.gz fatcat-3269ace124d9fcc886caa1bb9925a63cea869ba5.zip | |
arxiv: work-around hack for strange title
| -rw-r--r-- | python/fatcat_tools/importers/arxiv.py | 8 | 
1 files changed, 8 insertions, 0 deletions
| diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index dd2c2284..d7d3ed97 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -18,6 +18,9 @@ latex2text = LatexNodes2Text()  def latex_to_text(raw: str) -> str: +    # hack: handle a single special mangled title +    if raw.startswith("%CRTFASTGEEPWR"): +        return raw.strip()      try:          return latex2text.latex_to_text(raw).strip()      except AttributeError: @@ -26,6 +29,11 @@ def latex_to_text(raw: str) -> str:          return raw.strip() +def test_latex_to_text() -> None: +    s = "%CRTFASTGEEPWR: a SAS macro for power of the generalized estimating equations of multi-period cluster randomized trials with application to stepped wedge designs" +    assert latex_to_text(s) == s + +  def parse_arxiv_authors(raw: str) -> List[str]:      if not raw:          return [] | 
