You need to use fixed point precision/arithmetics/math for this. It means you use integer type variables but some of the bits are after the decimal point.
for example let assume 8 decimal bits so operations are done like this:
a = number1*256
b = number2*256
c=a+b // +
c=a-b // -
c=(a*b)>>8 // *
c=(a/b)<<8 // /
Here simple fixed point log2
example via binary search in C++:
//---------------------------------------------------------------------------
const DWORD _fx32_bits =32; // all bits count
const DWORD _fx32_fract_bits= 8; // fractional bits count
const DWORD _fx32_integ_bits=_fx32_bits-_fx32_fract_bits; // integer bits count
//---------------------------------------------------------------------------
const DWORD _fx32_one =1<<_fx32_fract_bits; // constant=1.0 (fixed point)
const DWORD _fx32_fract_mask=_fx32_one-1; // fractional bits mask
const DWORD _fx32_integ_mask=0xFFFFFFFF-_fx32_fract_mask; // integer bits mask
const DWORD _fx32_MSB_mask=1<<(_fx32_bits-1); // max unsigned bit mask
//---------------------------------------------------------------------------
DWORD bits(DWORD p) // count how many bits is p
{
DWORD m=0x80000000; DWORD b=32;
for (;m;m>>=1,b--)
if (p>=m) break;
return b;
}
//---------------------------------------------------------------------------
DWORD fx32_mul(DWORD x,DWORD y)
{
// this should be done in asm with 64 bit result !!!
DWORD a=x,b=y; // asm has access only to local variables
asm { // compute (a*b)>>_fx32_fract
mov eax,a // eax=a
mov ebx,b // ebx=b
mul eax,ebx // (edx,eax)=eax*ebx
mov ebx,_fx32_one
div ebx // eax=(edx,eax)>>_fx32_fract
mov a,eax;
}
return a;
// you can also do this instead but unless done on 64bit variable will overflow
return (x*y)>>_fx32_fract_bits;
}
//---------------------------------------------------------------------------
DWORD fx32_sqrt(const DWORD &x) // unsigned fixed point sqrt
{
DWORD m,a;
if (!x) return 0;
m=bits(x); // integer bits
if (m>_fx32_fract_bits) m-=_fx32_fract_bits; else m=0;
m>>=1; // sqrt integer result is half of x integer bits
m=_fx32_one<<m; // MSB of result mask
for (a=0;m;m>>=1) // test bits from MSB to 0
{
a|=m; // bit set
if (fx32_mul(a,a)>x) // if result is too big
a^=m; // bit clear
}
return a;
}
//---------------------------------------------------------------------------
DWORD fx32_exp2(DWORD y) // 2^y
{
// handle special cases
if (!y) return _fx32_one; // 2^0 = 1
if (y==_fx32_one) return 2; // 2^1 = 2
DWORD m,a,b,_y;
// handle the signs
_y=y&_fx32_fract_mask; // _y fractional part of exponent
y=y&_fx32_integ_mask; // y integer part of exponent
a=_fx32_one; // ini result
// powering by squaring x^y
if (y)
{
for (m=_fx32_MSB_mask;(m>_fx32_one)&&(m>y);m>>=1); // find mask of highest bit of exponent
for (;m>=_fx32_one;m>>=1)
{
a=fx32_mul(a,a);
if (DWORD(y&m)) a<<=1; // a*=2
}
}
// powering by rooting x^_y
if (_y)
{
for (b=2<<_fx32_fract_bits,m=_fx32_one>>1;m;m>>=1) // use only fractional part
{
b=fx32_sqrt(b);
if (DWORD(_y&m)) a=fx32_mul(a,b);
}
}
return a;
}
//---------------------------------------------------------------------------
DWORD fx32_log2(DWORD x) // = log2(x)
{
DWORD y,m;
// binary search from highest possible integer power of 2 to avoid overflows (log2(integer bits)-1)
for (y=0,m=_fx32_one<<(bits(_fx32_integ_bits)-1);m;m>>=1)
{
y|=m; // set bit
if (fx32_exp2(y)>x) y^=m; // clear bit if result too big
}
return y;
}
//---------------------------------------------------------------------------
Here simple test (using floats just for loading and printing you can handle booth on integers too, or by compiler evaluated constants):
float(fx32_log2(float(125.67*float(_fx32_one)))) / float(_fx32_one)
This evaluates: log2(125.67) = 6.98828125
my win calc returns 6.97349648
which is pretty close. More precise result you need more fractional bits you need to use. Int and compile time evaluation float example:
(100*fx32_log2(125.67*_fx32_one))>>_fx32_fract_bits
returns 698
which means 6.98
as we multiplied by 100
. You can also write your own load and print function to convert between fixed point and string directly.
To change precision just play with _fx32_fract_bits
constant. Anyway if your C++ does not know DWORD
it is just 32bit unsigned int
. If you are using different type (like 16
or 64
bit) then just change the constants accordingly.
For more info take a look at:
[Edit2] fx32_mul
on 32bit arithmetics without asm
base 2^16 O(n^2)
DWORD fx32_mul(DWORD x,DWORD y)
{
const int _h=1; // this is MSW,LSW order platform dependent So swap 0,1 if your platform is different
const int _l=0;
union _u
{
DWORD u32;
WORD u16[2];
}u;
DWORD al,ah,bl,bh;
DWORD c0,c1,c2,c3;
// separate 2^16 base digits
u.u32=x; al=u.u16[_l]; ah=u.u16[_h];
u.u32=y; bl=u.u16[_l]; bh=u.u16[_h];
// multiplication (al+ah<<1)*(bl+bh<<1) = al*bl + al*bh<<1 + ah*bl<<1 + ah*bh<<2
c0=(al*bl);
c1=(al*bh)+(ah*bl);
c2=(ah*bh);
c3= 0;
// propagate 2^16 overflows (backward to avoid overflow)
c3+=c2>>16; c2&=0x0000FFFF;
c2+=c1>>16; c1&=0x0000FFFF;
c1+=c0>>16; c0&=0x0000FFFF;
// propagate 2^16 overflows (normaly to recover from secondary overflow)
c2+=c1>>16; c1&=0x0000FFFF;
c3+=c2>>16; c2&=0x0000FFFF;
// (c3,c2,c1,c0) >> _fx32_fract_bits
u.u16[_l]=c0; u.u16[_h]=c1; c0=u.u32;
u.u16[_l]=c2; u.u16[_h]=c3; c1=u.u32;
c0 =(c0&_fx32_integ_mask)>>_fx32_fract_bits;
c0|=(c1&_fx32_fract_mask)<<_fx32_integ_bits;
return c0;
}
In case you do not have WORD,DWORD
add this to start of code
typedef unsigned __int32 DWORD;
typedef unsigned __int16 WORD;
or this:
typedef uint32_t DWORD;
typedef uint16_t WORD;
[Edit3] fx32_mul debug info
let call and trace/breakpoint this (15 fractional bits):
fx32_mul(0x00123400,0x00230056);
Which is:
0x00123400/32768 * 0x00230056/32768 =
36 * 70.00262451171875 = 2520.094482421875
So:
DWORD fx32_mul(DWORD x,DWORD y) // x=0x00123400 y=0x00230056
{
const int _h=1;
const int _l=0;
union _u
{
DWORD u32;
WORD u16[2];
}u;
DWORD al,ah,bl,bh;
DWORD c0,c1,c2,c3;
// separate 2^16 base digits
u.u32=x; al=u.u16[_l]; ah=u.u16[_h]; // al=0x3400 ah=0x0012
u.u32=y; bl=u.u16[_l]; bh=u.u16[_h]; // bl=0x0056 bh=0x0023
// multiplication (al+ah<<1)*(bl+bh<<1) = al*bl + al*bh<<1 + ah*bl<<1 + ah*bh<<2
c0=(al*bl); // c0=0x00117800
c1=(al*bh)+(ah*bl);// c1=0x0007220C
c2=(ah*bh); // c2=0x00000276
c3= 0; // c3=0x00000000
// propagate 2^16 overflows (backward to avoid overflow)
c3+=c2>>16; c2&=0x0000FFFF; // c3=0x00000000 c2=0x00000276
c2+=c1>>16; c1&=0x0000FFFF; // c2=0x0000027D c1=0x0000220C
c1+=c0>>16; c0&=0x0000FFFF; // c1=0x0000221D c0=0x00007800
// propagate 2^16 overflows (normaly to recover from secondary overflow)
c2+=c1>>16; c1&=0x0000FFFF; // c2=0x0000027D c1=0x0000221D
c3+=c2>>16; c2&=0x0000FFFF; // c3=0x00000000 c2=0x0000027D
// (c3,c2,c1,c0) >> _fx32_fract_bits
u.u16[_l]=c0; u.u16[_h]=c1; c0=u.u32; // c0=0x221D7800
u.u16[_l]=c2; u.u16[_h]=c3; c1=u.u32; // c1=0x0000027D
c0 =(c0&_fx32_integ_mask)>>_fx32_fract_bits; // c0=0x0000443A
c0|=(c1&_fx32_fract_mask)<<_fx32_integ_bits; // c0=0x04FA443A
return c0; // 0x04FA443A -> 83510330/32768 = 2548.53302001953125
}
float
as a result type and when you need just multiply it say by 100 and get integer part ? – Interferon24.8
bit ? – ColemancolemaniteDWORD LUT[_fx32_fract_bits] = { 0.5^0.5, 0.5^0.25, 0.5^0.125,...}
and use that instead ofb=fx32_sqrt(b);
Do you need real code? – Colemancolemanite