aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2022-07-07 13:49:11 -0700
committerBryan Newbold <bnewbold@robocracy.org>2022-07-07 13:49:11 -0700
commit3269ace124d9fcc886caa1bb9925a63cea869ba5 (patch)
tree32a740b295cb05a2c416e5c1630b472c5a1bb041 /python/fatcat_tools
parentde0b5fb315b73e6960c77c1ae934f63bce593bc9 (diff)
downloadfatcat-3269ace124d9fcc886caa1bb9925a63cea869ba5.tar.gz
fatcat-3269ace124d9fcc886caa1bb9925a63cea869ba5.zip
arxiv: work-around hack for strange title
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/arxiv.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index dd2c2284..d7d3ed97 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -18,6 +18,9 @@ latex2text = LatexNodes2Text()
def latex_to_text(raw: str) -> str:
+ # hack: handle a single special mangled title
+ if raw.startswith("%CRTFASTGEEPWR"):
+ return raw.strip()
try:
return latex2text.latex_to_text(raw).strip()
except AttributeError:
@@ -26,6 +29,11 @@ def latex_to_text(raw: str) -> str:
return raw.strip()
+def test_latex_to_text() -> None:
+ s = "%CRTFASTGEEPWR: a SAS macro for power of the generalized estimating equations of multi-period cluster randomized trials with application to stepped wedge designs"
+ assert latex_to_text(s) == s
+
+
def parse_arxiv_authors(raw: str) -> List[str]:
if not raw:
return []