12 * This "tester" lets us construct Reals out of Floating Points with arbitrary sizes (but must be less than 64 bits currently)
13 * Eventually it will also go the other way
14 * This is originally for conceptual understanding but will ultimately become useful later on.
20 * From http://stackoverflow.com/questions/109023/how-to-count-the-number-of-set-bits-in-a-32-bit-integer
22 long CountBits(long n)
24 unsigned int c; // c accumulates the total bits set in v
26 n &= n - 1; // clear the least significant bit set
31 * Converts data represented as an (s, e, m) floating point number to a Real.
32 * Now actually correct for floats bigger than 8 bits!
33 * Now with 100% less memcpy and 100% more pointer arithmetic!
34 * Now with IEEE style encodings! (How many flops does it take to represent a float this way? TOO MANY!)
35 * Still probably breaks if you are not using an x86 based processor
37 * e = exponent (IEEE ``offset'' encoding)
38 * m = mantissa (IEEE encoding)
40 template <size_t EMAX, size_t PREC, size_t BASE = 2> Real BitsToReal(void * data)
42 if (PREC + EMAX > 62) // we need 1 bit for the sign of the exponent, 1 bit for the sign
43 Fatal("Can't do more than 62 bits (asked for %d + %d = %d)", PREC, EMAX, PREC+EMAX);
45 size_t mbytes = ceil(double(PREC)/8.0);
46 size_t mtrunc = floor(double(PREC)/8.0);
47 uint64_t mantissa = 0L;
48 uint8_t * c = (uint8_t*)(data);
49 for (size_t i = 0; i < mbytes; ++i)
51 mantissa |= *(c++) << (i*8);
53 mantissa &= ~(~0L << PREC);
55 size_t ebytes = ceil(double(EMAX)/8.0);
56 uint64_t exponent = *(c-1) & (~0L << (PREC-8*mtrunc));
57 for (size_t i = 0; i < ebytes; ++i)
59 exponent |= *(c++) << (i+1)*8;
61 exponent = exponent >> (PREC-8*mtrunc);
62 bool sign = exponent & (1LL << EMAX);
63 exponent &= ~(~0L << EMAX);
65 // Floats are defined by
66 // x = (-1)^s * m * B^{e}
68 // In IEEE things get a little crazy though, the mantissa is stored reversed and the digits are summed indexed using negative powers of 2 (not positive)
69 // So to conform with IEEE can't just treat the mantissa as an unsigned integer
71 // Calculate the actual mantissa from the IEEE style encoding of the mantissa (sigh)
72 // This is obviously horribly inefficient but I guess that encoding not designed for ... whatever it is I'm doing here
73 Real M = (exponent != 0);
74 for (size_t i = 1; i < PREC; ++i)
76 if ((mantissa >> PREC-i) & 1)
77 M += Real(1) / powl(BASE, i);
81 //Debug("Mantissa: %x = %lf", mantissa, Float(M));
82 //Debug("Exponent: %x = %u => %lf", exponent, exponent, Float(Real(exponent + 1) - Real(1LL << (EMAX-1))));
83 //Debug("Sign: %x = %u", sign, sign);
86 Real x = M * powl(Real(BASE), Real(exponent + 1)-Real(1LL << (EMAX-1)));
87 return (sign) ? -x : x;
92 * Performs the inverse of BitsToReal
94 * It chooses the normalised representation
96 template <size_t EMAX, size_t PREC, size_t BASE = 2> void BitsFromReal(const Real & x, void * data)
99 Fatal("Can't do more than 62 bits (asked for %d + %d = %d)", PREC, EMAX, PREC+EMAX);
101 uint64_t integer = (uint64_t)(abs(x));
102 uint64_t fraction = 0L;
103 int64_t exponent = 0L;
105 int64_t exp_bias = (1LL << (EMAX-1) - 1);
107 Real f = Real(abs(x)) - Real(integer);
108 //f = 0.375; integer = 12;
109 // Get active fractional bits
110 for (size_t i = 0; i <= PREC && f > 0; ++i)
112 //Debug("%d, %lf", i, Float(f));
115 fraction |= (1LL << (PREC-i));
124 //Debug("Integer: %lx", integer);
125 int offset = 0; while ((integer >> (offset++) != 0) && (offset < 8*sizeof(integer))); --offset;
126 //Debug("Offset: %d", offset);
127 //Debug("Fraction: %lx", fraction);
128 exponent += offset-1;
129 uint64_t mantissa = fraction;
132 //Debug("Mantissa %.2lx", mantissa);
133 mantissa = (integer << PREC) | fraction;
134 //Debug("Mantissa %.2lx", mantissa);
135 mantissa = mantissa >> (offset-1);
136 //Debug("Mantissa %.2lx", mantissa);
137 //mantissa = mantissa >> (offset-1);
141 mantissa = mantissa << 1;
143 //Debug("Mantissa %.2lx", mantissa);
145 //Debug("Exponent: %lx %li", exponent, exponent);
146 if (exponent < -exp_bias)
148 mantissa = mantissa >> (-exp_bias - exponent);
149 exponent = -exp_bias;
151 //Debug("Exponent: %lx %li", exponent, exponent);
152 exponent = exp_bias + exponent;
153 //Real(exponent + 1) - Real(1LL << (EMAX-1))
154 //Debug("Exponent: %lx %li", exponent, exponent);
155 Real M = (exponent != 0);
156 for (size_t i = 1; i < PREC; ++i)
158 if ((mantissa >> PREC-i) & 1)
159 M += Real(1) / powl(BASE, i);
161 //Debug("M is %lf", Float(M));
163 // Now copy the bits in (fun times)
164 uint8_t * c = (uint8_t*)(data);
166 for (i = 0; i < PREC; ++i)
168 *c |= ((mantissa >> i) & 1) << (i%8);
174 for (; i < PREC+EMAX; ++i)
176 *c |= ((exponent >> (i-PREC)) & 1) << (i%8);
182 *c |= ((x < 0) << (i%8));
189 int main(int argc, char ** argv)
191 printf("# Convert custom floats to a Real\n");
192 printf("# a\thex(a)\tReal(a)\tdelta(last2)\n");
194 typedef pair<uint8_t, Real> Pear;
197 for (uint8_t a = a0; a < a0+0xFF; ++a)
199 Real x = BitsToReal<2,5>(&a);
200 uint8_t b = 0; BitsFromReal<2,5>(x, &b);
201 Real y = BitsToReal<2,5>(&b);
204 Warn("%x -> %lf -> %x -> %lf", a, Float(x), b, Float(y));
206 space.push_back(Pear(a, x));
208 space.sort([=](const Pear & a, const Pear & b){return a.second < b.second;});
210 for (list<Pear>::iterator i = space.begin(); i != space.end(); ++i)
212 printf("%u\t%.2x\t%.20lf", i->first, i->first, Float(i->second));
213 if (i != space.begin())
214 printf("\t%f", Float(i->second - prev));