From: Sam Moore Date: Fri, 9 May 2014 12:23:50 +0000 (+0800) Subject: Fix the Floating Point X-Git-Url: https://git.ucc.asn.au/?a=commitdiff_plain;h=f4bba69cebb67c19cfe0725287f9c383b3d852c0;p=ipdf%2Fcode.git Fix the Floating Point Pun intended I had two bugs that cancelled each other out and made a self consistent float that was wrong except for all the ones I compared it to! David helped fix it. --- diff --git a/src/tests/represent.cpp b/src/tests/represent.cpp index e5abef5..c576802 100644 --- a/src/tests/represent.cpp +++ b/src/tests/represent.cpp @@ -70,13 +70,8 @@ template Real BitsToReal(void * data // Calculate the actual mantissa from the IEEE style encoding of the mantissa (sigh) // This is obviously horribly inefficient but I guess that encoding not designed for ... whatever it is I'm doing here - Real M = (exponent != 0); - for (size_t i = 1; i < PREC; ++i) - { - if ((mantissa >> PREC-i) & 1) - M += Real(1) / powl(BASE, i); - } - + // NOW WITH LESS TERRIBLE LOOPING THAT GIVES SLIGHTLY WRONG RESULTS! Order a copy today. + Real M = (((exponent != 0) << PREC) + (uint64_t)(mantissa))/powl(BASE,PREC); //Debug("Mantissa: %x = %lf", mantissa, Float(M)); //Debug("Exponent: %x = %u => %lf", exponent, exponent, Float(Real(exponent + 1) - Real(1LL << (EMAX-1)))); @@ -174,45 +169,51 @@ template void BitsFromReal(Real x, v uint64_t mantissa = 0LL; int64_t biased_exponent = 0LL; - // Combine integer and fraction into mantissa - if (integer == 0) + size_t integer_leading_zeroes = 0; + for (integer_leading_zeroes = 0; integer_leading_zeroes < max_bits; ++integer_leading_zeroes) { - //Debug("0. %lx (%d)", fraction, fraction_leading_zeroes); - // Mantissa must have leading '1'; shift to get leading 1, then shift back to mantissa position - mantissa = (fraction << fraction_leading_zeroes) >> (max_bits - PREC); - biased_exponent -= fraction_leading_zeroes; - // If the unbiased exponent would be negative use a denormalised number instead - if (biased_exponent < -(1LL << (EMAX-1))+1) - { - mantissa = mantissa >> (-(1LL << (EMAX-1))+1 - biased_exponent); - biased_exponent = -(1LL << (EMAX-1))+1; - } + if ((integer >> (max_bits-1-integer_leading_zeroes)) & 1LL) + break; + } + + // 0|011 11|00 0000 0001 + // 0001 + // 0000 0000 0100 + // 10000 0000 0100 + if (integer != 0) + { + mantissa = (integer << (integer_leading_zeroes)); + mantissa |= (fraction >> (max_bits - integer_leading_zeroes)); } else { - - size_t integer_leading_zeroes = 0; - for (integer_leading_zeroes = 0; integer_leading_zeroes < max_bits; ++integer_leading_zeroes) - { - if ((integer >> (max_bits-1-integer_leading_zeroes)) & 1LL) - break; - } - //Debug("%.16lx . %.16lx (%li, %li)", integer,fraction, integer_leading_zeroes, fraction_leading_zeroes); - - mantissa = (integer << (integer_leading_zeroes)); // shift integer part - mantissa |= (fraction >> (max_bits-integer_leading_zeroes)); // append the fraction after it - //Debug("%.16lx (%.16lx << %i)", mantissa, fraction, (fraction_leading_zeroes - max_bits + integer_leading_zeroes)); - mantissa = mantissa >> (max_bits - PREC); // shift back to the mantissa position - biased_exponent = (max_bits - integer_leading_zeroes); // calculate exponent + mantissa = (fraction << (fraction_leading_zeroes)); + } + //Debug("Mantissa with fraction %.16lx (%.16lx >><>?<< %lu)", mantissa, fraction, fraction_leading_zeroes); + mantissa = (mantissa >> (max_bits - PREC - 1)); + //Debug("Mantissa is %.16lx (%.16lx . %.16lx)", mantissa, fraction, integer); + biased_exponent = (integer != 0) ? (max_bits - integer_leading_zeroes) : (-fraction_leading_zeroes); + if (biased_exponent < -(1LL << (EMAX-1))+1) + { + //Debug("Denormalising for glory or death! %li , %li", biased_exponent, -(1LL << (EMAX-1)) - biased_exponent + 1); + mantissa = mantissa >> (-(1LL << (EMAX-1)) - biased_exponent + 1); + biased_exponent = -(1LL << (EMAX-1))+1; } + //Debug("Biased exponent is %li", biased_exponent); // If the unbiased exponent would be non-zero, the IEEE has the leading 1 as implied; remove it and decrease exponent to compensate if (biased_exponent != -(1LL << (EMAX-1))+1) { - mantissa &= ~(1LL << max_bits - PREC); - mantissa = mantissa << 1; + //Debug("Implied 1 in mantissa %.16lx", mantissa); + mantissa &= ~(1LL << (PREC)); + //Debug("Implied 1 in mantissa %.16lx", mantissa); biased_exponent -= 1; } + else + { + mantissa >>= 1; + } + //Debug("%.16lx", mantissa); uint64_t exponent = (uint64_t)(biased_exponent + (1LL << (EMAX-1))-1); // offset the exponent @@ -241,17 +242,21 @@ int main(int argc, char ** argv) printf("# Convert custom floats to a Real\n"); printf("# a\thex(a)\tReal(a)\tdelta(last2)\n"); - typedef pair Pear; + typedef uint16_t Bits; + typedef pair Pear; + const uint8_t E = 5; + const uint8_t P = 10; + list space; - uint16_t a0 = 0x0000; - for (uint16_t a = a0; a < 0xFFFF; ++a) + Bits a0 = 0x0000; + for (Bits a = a0; a < 0xFFFF; ++a) { - Real x = BitsToReal<5,10>(&a); - uint16_t b = 0; BitsFromReal<5,10>(x, &b); - Real y = BitsToReal<5,10>(&b); + Real x = BitsToReal(&a); + Bits b = 0; BitsFromReal(x, &b); + Real y = BitsToReal(&b); if (y != x) { - Fatal("%x -> %lf -> %x -> %lf", a, Float(x), b, Float(y)); + Fatal("%x -> %.10lf -> %x -> %.10lf", a, Float(x), b, Float(y)); } space.push_back(Pear(a, x)); } @@ -266,3 +271,7 @@ int main(int argc, char ** argv) prev = i->second; } } + +//0|011 11|00 0000 0001 + +//0|000 01|00 0000 0000 \ No newline at end of file