12 * This "tester" lets us construct Reals out of Floating Points with arbitrary sizes (but must be less than 64 bits currently)
13 * Eventually it will also go the other way
14 * This is originally for conceptual understanding but will ultimately become useful later on.
20 * From http://stackoverflow.com/questions/109023/how-to-count-the-number-of-set-bits-in-a-32-bit-integer
22 long CountBits(long n)
24 unsigned int c; // c accumulates the total bits set in v
26 n &= n - 1; // clear the least significant bit set
31 * Converts data represented as an (s, e, m) floating point number to a Real.
32 * Now actually correct for floats bigger than 8 bits!
33 * Now with 100% less memcpy and 100% more pointer arithmetic!
34 * Now with IEEE style encodings! (How many flops does it take to represent a float this way? TOO MANY!)
35 * Still probably breaks if you are not using an x86 based processor
37 * e = exponent (IEEE ``offset'' encoding)
38 * m = mantissa (IEEE encoding)
40 template <size_t EMAX, size_t PREC, size_t BASE = 2> Real BitsToReal(void * data)
42 if (PREC + EMAX > 62) // we need 1 bit for the sign of the exponent, 1 bit for the sign
43 Fatal("Can't do more than 62 bits (asked for %d + %d = %d)", PREC, EMAX, PREC+EMAX);
45 size_t mbytes = ceil(double(PREC)/8.0);
46 size_t mtrunc = floor(double(PREC)/8.0);
47 uint64_t mantissa = 0L;
48 uint8_t * c = (uint8_t*)(data);
49 for (size_t i = 0; i < mbytes; ++i)
51 mantissa |= *(c++) << (i*8);
53 mantissa &= ~(~0L << PREC);
55 size_t ebytes = ceil(double(EMAX)/8.0);
56 uint64_t exponent = *(c-1) & (~0L << (PREC-8*mtrunc));
57 for (size_t i = 0; i < ebytes; ++i)
59 exponent |= *(c++) << (i+1)*8;
61 exponent = exponent >> (PREC-8*mtrunc);
62 bool sign = exponent & (1LL << EMAX);
63 exponent &= ~(~0L << EMAX);
65 // Floats are defined by
66 // x = (-1)^s * m * B^{e}
68 // In IEEE things get a little crazy though, the mantissa is stored reversed and the digits are summed indexed using negative powers of 2 (not positive)
69 // So to conform with IEEE can't just treat the mantissa as an unsigned integer
71 // Calculate the actual mantissa from the IEEE style encoding of the mantissa (sigh)
72 // This is obviously horribly inefficient but I guess that encoding not designed for ... whatever it is I'm doing here
73 // NOW WITH LESS TERRIBLE LOOPING THAT GIVES SLIGHTLY WRONG RESULTS! Order a copy today.
74 Real M = (((exponent != 0) << PREC) + (uint64_t)(mantissa))/powl(BASE,PREC);
76 //Debug("Mantissa: %x = %lf", mantissa, Float(M));
77 //Debug("Exponent: %x = %u => %lf", exponent, exponent, Float(Real(exponent + 1) - Real(1LL << (EMAX-1))));
78 //Debug("Sign: %x = %u", sign, sign);
81 Real x = M * powl(Real(BASE), Real(exponent + 1)-Real(1LL << (EMAX-1)));
82 return (sign) ? -x : x;
86 * Modifies fraction to contain the fractional part of x
87 * Returns position of the first '1' bit
89 size_t Fraction(Real x, uint64_t & fraction)
91 x = abs(x) - (uint64_t)(abs(x));
94 for (size_t i = 0; i < 8*sizeof(fraction); ++i)
99 offset = (offset == 0) ? i : offset;
100 fraction |= (1LL << (8*sizeof(fraction)-1-i));
110 * Copy mantissa and exponent into data
112 template <size_t EMAX, size_t PREC, size_t BASE = 2> void BitsFromRepr(bool sign, uint64_t mantissa, uint64_t exponent, void * data)
114 uint8_t * c = (uint8_t*)(data);
116 for (i = 0; i < PREC; ++i)
118 *c |= ((mantissa >> i) & 1) << (i%8);
124 for (; i < PREC+EMAX; ++i)
126 *c |= ((exponent >> (i-PREC)) & 1) << (i%8);
132 *c |= (sign << (i%8));
139 * Performs the inverse of BitsToReal
141 * It chooses the normalised representation
143 template <size_t EMAX, size_t PREC, size_t BASE = 2> void BitsFromReal(Real x, void * data)
145 if (PREC + EMAX > 62)
146 Fatal("Can't do more than 62 bits (asked for %d + %d = %d)", PREC, EMAX, PREC+EMAX);
148 uint64_t integer = (uint64_t)(abs(x));
149 uint64_t fraction = 0LL;
150 const size_t max_bits = 8*sizeof(uint64_t);
152 x = abs(x) - integer;
154 size_t fraction_leading_zeroes = max_bits;
155 for (size_t i = 0; i < max_bits; ++i)
160 if (fraction_leading_zeroes > i)
161 fraction_leading_zeroes = i;
163 fraction |= (1LL << (max_bits-1-i));
169 uint64_t mantissa = 0LL;
170 int64_t biased_exponent = 0LL;
172 size_t integer_leading_zeroes = 0;
173 for (integer_leading_zeroes = 0; integer_leading_zeroes < max_bits; ++integer_leading_zeroes)
175 if ((integer >> (max_bits-1-integer_leading_zeroes)) & 1LL)
179 // 0|011 11|00 0000 0001
185 mantissa = (integer << (integer_leading_zeroes));
186 mantissa |= (fraction >> (max_bits - integer_leading_zeroes));
190 mantissa = (fraction << (fraction_leading_zeroes));
192 //Debug("Mantissa with fraction %.16lx (%.16lx >><>?<< %lu)", mantissa, fraction, fraction_leading_zeroes);
193 mantissa = (mantissa >> (max_bits - PREC - 1));
194 //Debug("Mantissa is %.16lx (%.16lx . %.16lx)", mantissa, fraction, integer);
195 biased_exponent = (integer != 0) ? (max_bits - integer_leading_zeroes) : (-fraction_leading_zeroes);
196 if (biased_exponent < -(1LL << (EMAX-1))+1)
198 //Debug("Denormalising for glory or death! %li , %li", biased_exponent, -(1LL << (EMAX-1)) - biased_exponent + 1);
199 mantissa = mantissa >> (-(1LL << (EMAX-1)) - biased_exponent + 1);
200 biased_exponent = -(1LL << (EMAX-1))+1;
202 //Debug("Biased exponent is %li", biased_exponent);
204 // If the unbiased exponent would be non-zero, the IEEE has the leading 1 as implied; remove it and decrease exponent to compensate
205 if (biased_exponent != -(1LL << (EMAX-1))+1)
207 //Debug("Implied 1 in mantissa %.16lx", mantissa);
208 mantissa &= ~(1LL << (PREC));
209 //Debug("Implied 1 in mantissa %.16lx", mantissa);
210 biased_exponent -= 1;
216 //Debug("%.16lx", mantissa);
218 uint64_t exponent = (uint64_t)(biased_exponent + (1LL << (EMAX-1))-1); // offset the exponent
222 Real M = (exponent != 0);
223 for (size_t i = 1; i < PREC; ++i)
225 if ((mantissa >> PREC-i) & 1)
226 M += Real(1) / powl(BASE, i);
229 Debug("Exponent: %lx %lu -> %li", exponent, exponent, biased_exponent);
230 Debug("Mantissa: %lx -> %f", mantissa, Float(M));
231 Debug("Both: %lx", mantissa | (exponent << (PREC)));
233 // Now copy the bits in
234 BitsFromRepr<EMAX, PREC, BASE>(sign, mantissa, exponent, data);
240 int main(int argc, char ** argv)
242 printf("# Convert custom floats to a Real\n");
243 printf("# a\thex(a)\tReal(a)\tdelta(last2)\n");
245 typedef uint8_t Bits;
246 typedef pair<Bits, Real> Pear;
252 for (Bits a = a0; a < 0xFF; ++a)
254 Real x = BitsToReal<E,P>(&a);
255 Bits b = 0; BitsFromReal<E,P>(x, &b);
256 Real y = BitsToReal<E,P>(&b);
259 Fatal("%x -> %.10lf -> %x -> %.10lf", a, Float(x), b, Float(y));
261 space.push_back(Pear(a, x));
263 space.sort([=](const Pear & a, const Pear & b){return a.second < b.second;});
265 for (list<Pear>::iterator i = space.begin(); i != space.end(); ++i)
267 printf("%u\t%.2x\t%.20lf", i->first, i->first, Float(i->second));
268 if (i != space.begin())
269 printf("\t%f", Float(i->second - prev));
275 //0|011 11|00 0000 0001
277 //0|000 01|00 0000 0000