The below provided Python code converts a Python integer to a string in arbitrary base ( from 2 up to infinity ) and works in both directions. So all the created strings can be converted back to Python integers by providing a string for N instead of an integer.
The code works only on positive numbers by intention (there is in my eyes some hassle about negative values and their bit representations I don't want to dig into). Just pick from this code what you need, want or like, or just have fun learning about available options. Much is there only for the purpose of documenting all the various available approaches ( e.g. the Oneliner seems not to be fast, even if promised to be ).
I like the by Salvador Dali proposed format for infinite large bases. A nice proposal which works optically well even for simple binary bit representations. Notice that the width=x padding parameter in case of infiniteBase=True formatted string applies to the digits and not to the whole number. It seems, that code handling infiniteBase digits format runs even a bit faster than the other options - another reason for using it?
I don't like the idea of using Unicode for extending the number of symbols available for digits, so don't look in the code below for it, because it's not there. Use the proposed infiniteBase format instead or store integers as bytes for compression purposes.
def inumToStr( N, base=2, width=1, infiniteBase=False,\
useNumpy=False, useRecursion=False, useOneliner=False, \
useGmpy=False, verbose=True):
''' Positive numbers only, but works in BOTH directions.
For strings in infiniteBase notation set for bases <= 62
infiniteBase=True . Examples of use:
inumToStr( 17, 2, 1, 1) # [1,0,0,0,1]
inumToStr( 17, 3, 5) # 00122
inumToStr(245, 16, 4) # 00F5
inumToStr(245, 36, 4,0,1) # 006T
inumToStr(245245245245,36,10,0,1) # 0034NWOQBH
inumToStr(245245245245,62) # 4JhA3Th
245245245245 == int(gmpy2.mpz('4JhA3Th',62))
inumToStr(245245245245,99,2) # [25,78, 5,23,70,44]
----------------------------------------------------
inumToStr( '[1,0,0,0,1]',2, infiniteBase=True ) # 17
inumToStr( '[25,78, 5,23,70,44]', 99) # 245245245245
inumToStr( '0034NWOQBH', 36 ) # 245245245245
inumToStr( '4JhA3Th' , 62 ) # 245245245245
----------------------------------------------------
--- Timings for N = 2**4096, base=36:
standard: 0.0023
infinite: 0.0017
numpy : 0.1277
recursio; 0.0022
oneliner: 0.0146
For N = 2**8192:
standard: 0.0075
infinite: 0.0053
numpy : 0.1369
max. recursion depth exceeded: recursio/oneliner
'''
show = print
if type(N) is str and ( infiniteBase is True or base > 62 ):
lstN = eval(N)
if verbose: show(' converting a non-standard infiniteBase bits string to Python integer')
return sum( [ item*base**pow for pow, item in enumerate(lstN[::-1]) ] )
if type(N) is str and base <= 36:
if verbose: show('base <= 36. Returning Python int(N, base)')
return int(N, base)
if type(N) is str and base <= 62:
if useGmpy:
if verbose: show(' base <= 62, useGmpy=True, returning int(gmpy2.mpz(N,base))')
return int(gmpy2.mpz(N,base))
else:
if verbose: show(' base <= 62, useGmpy=False, self-calculating return value)')
lstStrOfDigits="0123456789"+ \
"abcdefghijklmnopqrstuvwxyz".upper() + \
"abcdefghijklmnopqrstuvwxyz"
dictCharToPow = {}
for index, char in enumerate(lstStrOfDigits):
dictCharToPow.update({char : index})
return sum( dictCharToPow[item]*base**pow for pow, item in enumerate(N[::-1]) )
#:if
#:if
if useOneliner and base <= 36:
if verbose: show(' base <= 36, useOneliner=True, running the Oneliner code')
d="0123456789abcdefghijklmnopqrstuvwxyz"
baseit = lambda a=N, b=base: (not a) and d[0] or \
baseit(a-a%b,b*base)+d[a%b%(base-1) or (a%b) and (base-1)]
return baseit().rjust(width, d[0])[1:]
if useRecursion and base <= 36:
if verbose: show(' base <= 36, useRecursion=True, running recursion algorythm')
BS="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
def to_base(n, b):
return "0" if not n else to_base(n//b, b).lstrip("0") + BS[n%b]
return to_base(N, base).rjust(width,BS[0])
if base > 62 or infiniteBase:
if verbose: show(' base > 62 or infiniteBase=True, returning a non-standard digits string')
# Allows arbitrary large base with 'width=...'
# applied to each digit (useful also for bits )
N, digit = divmod(N, base)
strN = str(digit).rjust(width, ' ')+']'
while N:
N, digit = divmod(N, base)
strN = str(digit).rjust(width, ' ') + ',' + strN
return '[' + strN
#:if
if base == 2:
if verbose: show(" base = 2, returning Python str(f'{N:0{width}b}')")
return str(f'{N:0{width}b}')
if base == 8:
if verbose: show(" base = 8, returning Python str(f'{N:0{width}o}')")
return str(f'{N:0{width}o}')
if base == 16:
if verbose: show(" base = 16, returning Python str(f'{N:0{width}X}')")
return str(f'{N:0{width}X}')
if base <= 36:
if useNumpy:
if verbose: show(" base <= 36, useNumpy=True, returning np.base_repr(N, base)")
import numpy as np
strN = np.base_repr(N, base)
return strN.rjust(width, '0')
else:
if verbose: show(' base <= 36, useNumpy=False, self-calculating return value)')
lstStrOfDigits="0123456789"+"abcdefghijklmnopqrstuvwxyz".upper()
strN = lstStrOfDigits[N % base] # rightmost digit
while N >= base:
N //= base # consume already converted digit
strN = lstStrOfDigits[N % base] + strN # add digits to the left
#:while
return strN.rjust(width, lstStrOfDigits[0])
#:if
#:if
if base <= 62:
if useGmpy:
if verbose: show(" base <= 62, useGmpy=True, returning gmpy2.digits(N, base)")
import gmpy2
strN = gmpy2.digits(N, base)
return strN.rjust(width, '0')
# back to Python int from gmpy2.mpz with
# int(gmpy2.mpz('4JhA3Th',62))
else:
if verbose: show(' base <= 62, useGmpy=False, self-calculating return value)')
lstStrOfDigits= "0123456789" + \
"abcdefghijklmnopqrstuvwxyz".upper() + \
"abcdefghijklmnopqrstuvwxyz"
strN = lstStrOfDigits[N % base] # rightmost digit
while N >= base:
N //= base # consume already converted digit
strN = lstStrOfDigits[N % base] + strN # add digits to the left
#:while
return strN.rjust(width, lstStrOfDigits[0])
#:if
#:if
#:def