U_TO_B, CHAR_MAP = {}, {f'{i:03b}' : c
for i, c in enumerate(['\n', '\\', '/', 'O', '(', ')', ' ', '_'])}
for start, end, codepointoffset in [(0, 26, 0x41), (26, 52, 0x61),
(52, 116, 0x410), (116, 195, 0x3041), (195, 274, 0x30A1)]:
for b in range(start, end):
U_TO_B[b + codepointoffset - start] = b
def decode(bin: str, bchr: int=3, btot: int=6) -> str:
b = ''.join(f'{U_TO_B[ord(c)]:08b}' for c in bin)
return ''.join(
CHAR_MAP[b[i:i+bchr]] * (int(b[i+bchr:i+btot], 2) + 1)
for i in range(0, len(b), btot))
print(decode('HOxチuяチgQわさwjIxばUwjQoァどgイさyjけxAせoぐけwぐでxぐしx'))
# Run me at lyubogankov.dev/businesscard :)
Encoding example
text / character run-length
text = 'OOOO'; char_runlen = ('O', 4) # loop over source text, count
char run-len / binary (6-bit)
# bits per char = math.upper(math.log2(unique characters in source text))
CHAR_MAP = {'\n': '000', '\\': '001', '/': '010', 'O': '011',
'(': '100', ')': '101', ' ': '110', '_': '111'}
# runlen bits depends on largest run from source text (for me, 3 bits)
# subtract 1 from runlen, as each character repeats at least once!
binary = CHAR_MAP['O'] + f'{(4 - 1):03b}' # '011' + '011'
binary / Unicode (8-bit)
# [00000000, 00011010) = [ 0, 26) -> a-z # Latin lower
# [00011010, 00110100) = [ 26, 52) -> A-Z # Latin upper
# [00110100, 01110100) = [ 52, 116) -> А-Я, а-я # Cyrillic upper/lower
# [01110100, 11000011) = [116, 195) -> ぁ-わ # Japanese hiragana
# [11000011, 11111111] = [195, 255] -> ァ-ポ # Japanese katakana
runlens = [('\n', 7), ('(', 4), ('/', 7), ('\\', 3)]
binary = '000110' + '100011' + '010110' + '001010' # 6-bit enc
_unicode = 'aБし' # '00011010' + '00110101' + '10001010' # 8-bit enc