diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-10-08 16:11:09 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-10-08 16:11:09 -0700 |
commit | b9279efacdee9bf8038203d6efe9dc105cc4dce3 (patch) | |
tree | e1c40cebb0ea688182b5d8f1dcf5fbd02c3fa4c0 /python/fatcat_tools/importers/common.py | |
parent | 4b7c3c7b317cf4793f5ba5ad0d96102f103b66a3 (diff) | |
download | fatcat-b9279efacdee9bf8038203d6efe9dc105cc4dce3.tar.gz fatcat-b9279efacdee9bf8038203d6efe9dc105cc4dce3.zip |
refactor duplicated b32_hex function in importers
Diffstat (limited to 'python/fatcat_tools/importers/common.py')
-rw-r--r-- | python/fatcat_tools/importers/common.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index a25c3196..74595790 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -4,6 +4,7 @@ import sys import csv import json import ftfy +import base64 import sqlite3 import subprocess import unicodedata @@ -141,6 +142,14 @@ def test_clean(): assert clean('<b>a&b</b>') == '<b>a&b</b>' assert clean('<b>a&b</b>', force_xml=True) == '<b>a&b</b>' +def b32_hex(s): + s = s.strip().split()[0].lower() + if s.startswith("sha1:"): + s = s[5:] + if len(s) != 32: + return s + return base64.b16encode(base64.b32decode(s.upper())).lower().decode('utf-8') + def is_cjk(s): if not s: return False |