Tester from some lecture notes by Kahan

[ipdf/code.git] / src / tests / represent.cpp
diff --git a/src/tests/represent.cpp b/src/tests/represent.cpp

index d7b129e..e5abef5 100644 (file)
--- a/src/tests/represent.cpp
+++ b/src/tests/represent.cpp
@@ -87,100 +87,150 @@ template <size_t EMAX, size_t PREC, size_t BASE = 2> Real BitsToReal(void * data
         return (sign) ? -x : x;
  }
  
+/**
+ * Modifies fraction to contain the fractional part of x
+ * Returns position of the first '1' bit
+ */
+size_t Fraction(Real x, uint64_t & fraction)
+{
+       x = abs(x) - (uint64_t)(abs(x));
+       fraction = 0LL;
+       size_t offset = 0;
+       for (size_t i = 0; i < 8*sizeof(fraction); ++i)
+       {
+               x *= 2;
+               if (x >= 1.0)
+               {
+                       offset = (offset == 0) ? i : offset;
+                       fraction |= (1LL << (8*sizeof(fraction)-1-i));
+                       x -= 1.0;
+               }
+               if (x <= 0.0)
+                       break;
+       }
+       return fraction;
+}
+
+/**
+ * Copy mantissa and exponent into data
+ */
+template <size_t EMAX, size_t PREC, size_t BASE = 2> void BitsFromRepr(bool sign, uint64_t mantissa, uint64_t exponent, void * data)
+{
+       uint8_t * c = (uint8_t*)(data);
+       size_t i = 0;
+       for (i = 0; i < PREC; ++i)
+       {
+               *c |= ((mantissa >> i) & 1) << (i%8);
+               if ((i+1) % 8 == 0) 
+               {
+                       ++c;
+               }
+       }
+       for (; i < PREC+EMAX; ++i)
+       {
+               *c |= ((exponent >> (i-PREC)) & 1) << (i%8);
+               if ((i+1) % 8 == 0) 
+               {
+                       ++c;
+               }
+       }
+       *c |= (sign << (i%8));
+}
+
+
+
  
  /**
   * Performs the inverse of BitsToReal
   * Hopefully
   * It chooses the normalised representation
   */
-template <size_t EMAX, size_t PREC, size_t BASE = 2> void BitsFromReal(const Real & x, void * data)
+template <size_t EMAX, size_t PREC, size_t BASE = 2> void BitsFromReal(Real x, void * data)
  {
         if (PREC + EMAX > 62)
                 Fatal("Can't do more than 62 bits (asked for %d + %d = %d)", PREC, EMAX, PREC+EMAX);
  
         uint64_t integer = (uint64_t)(abs(x));
-       uint64_t fraction = 0L;
-       int64_t exponent = 0L;
-
-       int64_t exp_bias = (1LL << (EMAX-1) - 1);
-
-       Real f = Real(abs(x)) - Real(integer);
-       //f = 0.375; integer = 12;
-       // Get active fractional bits
-       for (size_t i = 0; i <= PREC && f > 0; ++i)
+       uint64_t fraction = 0LL;
+       const size_t max_bits = 8*sizeof(uint64_t);
+       bool sign = (x < 0);
+       x = abs(x) - integer;
+       
+       size_t fraction_leading_zeroes = max_bits;
+       for (size_t i = 0; i < max_bits; ++i)
         {
-               //Debug("%d, %lf", i, Float(f));
-               if (f >= 1)
-               {       
-                       fraction |= (1LL << (PREC-i));
-                       f -= 1.0L;      
-                       //Debug("1");
+               x *= 2;
+               if (x >= 1.0)
+               {
+                       if (fraction_leading_zeroes > i)
+                               fraction_leading_zeroes = i;
+                       x -= 1;
+                       fraction |= (1LL << (max_bits-1-i));
                 }
-               //else
-                       //Debug("0");
-               f *= 2.0L;
+               if (x <= 0.0)
+                       break;
         }
  
-       //Debug("Integer: %lx", integer);
-       int offset = 0; while ((integer >> (offset++) != 0) && (offset < 8*sizeof(integer))); --offset;
-       //Debug("Offset: %d", offset);
-       //Debug("Fraction: %lx", fraction);     
-       exponent += offset-1;
-       uint64_t mantissa = fraction;   
-       if (offset > 0)
+       uint64_t mantissa = 0LL;
+       int64_t biased_exponent = 0LL;
+       
+       // Combine integer and fraction into mantissa
+       if (integer == 0)
         {
-               //Debug("Mantissa %.2lx", mantissa);    
-               mantissa = (integer << PREC) | fraction;
-               //Debug("Mantissa %.2lx", mantissa);    
-               mantissa = mantissa >> (offset-1);
-               //Debug("Mantissa %.2lx", mantissa);    
-               //mantissa = mantissa >> (offset-1);
+               //Debug("0. %lx (%d)", fraction, fraction_leading_zeroes);
+               // Mantissa must have leading '1'; shift to get leading 1, then shift back to mantissa position
+               mantissa = (fraction << fraction_leading_zeroes) >> (max_bits - PREC);
+               biased_exponent -= fraction_leading_zeroes;
+               // If the unbiased exponent would be negative use a denormalised number instead
+               if (biased_exponent < -(1LL << (EMAX-1))+1)
+               {
+                       mantissa = mantissa >> (-(1LL << (EMAX-1))+1 - biased_exponent);
+                       biased_exponent = -(1LL << (EMAX-1))+1;
+               }
         }
         else
         {
-               mantissa = mantissa << 1;
+
+               size_t integer_leading_zeroes = 0;
+               for (integer_leading_zeroes = 0; integer_leading_zeroes < max_bits; ++integer_leading_zeroes)
+               {
+                       if ((integer >> (max_bits-1-integer_leading_zeroes)) & 1LL)
+                               break;
+               }
+               //Debug("%.16lx . %.16lx (%li, %li)", integer,fraction,  integer_leading_zeroes, fraction_leading_zeroes);
+
+               mantissa = (integer << (integer_leading_zeroes)); // shift integer part
+               mantissa |= (fraction >> (max_bits-integer_leading_zeroes)); // append the fraction after it
+               //Debug("%.16lx (%.16lx << %i)", mantissa,  fraction, (fraction_leading_zeroes - max_bits + integer_leading_zeroes));
+               mantissa = mantissa >> (max_bits - PREC); // shift back to the mantissa position
+               biased_exponent = (max_bits - integer_leading_zeroes); // calculate exponent
         }
-       //Debug("Mantissa %.2lx", mantissa);    
  
-       //Debug("Exponent: %lx %li", exponent, exponent);
-       if (exponent < -exp_bias)
+       // If the unbiased exponent would be non-zero, the IEEE has the leading 1 as implied; remove it and decrease exponent to compensate
+       if (biased_exponent != -(1LL << (EMAX-1))+1)
         {
-               mantissa = mantissa >> (-exp_bias - exponent);
-               exponent = -exp_bias;
+               mantissa &= ~(1LL << max_bits - PREC);
+               mantissa = mantissa << 1;
+               biased_exponent -= 1;
         }
-       //Debug("Exponent: %lx %li", exponent, exponent);       
-       exponent = exp_bias + exponent;
-       //Real(exponent + 1) - Real(1LL << (EMAX-1))
-       //Debug("Exponent: %lx %li", exponent, exponent);
+
+       uint64_t exponent = (uint64_t)(biased_exponent + (1LL << (EMAX-1))-1); // offset the exponent
+
+       // Debug
+       /*
         Real M = (exponent != 0);
         for (size_t i = 1; i < PREC; ++i)
         {
                 if ((mantissa >> PREC-i) & 1)
                         M += Real(1) / powl(BASE, i);
         }
-       //Debug("M is %lf", Float(M));
  
-       // Now copy the bits in (fun times)
-       uint8_t * c = (uint8_t*)(data);
-       size_t i = 0;
-       for (i = 0; i < PREC; ++i)
-       {
-               *c |= ((mantissa >> i) & 1) << (i%8);
-               if ((i+1) % 8 == 0) 
-               {
-                       ++c;
-               }
-       }
-       for (; i < PREC+EMAX; ++i)
-       {
-               *c |= ((exponent >> (i-PREC)) & 1) << (i%8);
-               if ((i+1) % 8 == 0) 
-               {
-                       ++c;
-               }
-       }
-       *c |= ((x < 0) << (i%8));
-       
+       Debug("Exponent: %lx %lu -> %li", exponent, exponent, biased_exponent);
+       Debug("Mantissa: %lx -> %f", mantissa, Float(M));
+       Debug("Both: %lx", mantissa | (exponent << (PREC)));
+       */
+       // Now copy the bits in
+       BitsFromRepr<EMAX, PREC, BASE>(sign, mantissa, exponent, data);
         
  }
  
@@ -191,17 +241,17 @@ int main(int argc, char ** argv)
         printf("# Convert custom floats to a Real\n");
         printf("# a\thex(a)\tReal(a)\tdelta(last2)\n");
  
-       typedef pair<uint8_t, Real> Pear;
+       typedef pair<uint16_t, Real> Pear;
         list<Pear> space;
         uint16_t a0 = 0x0000;
-       for (uint16_t a = a0; a < a0+0x00FF; ++a)
+       for (uint16_t a = a0; a < 0xFFFF; ++a)
         {
                 Real x = BitsToReal<5,10>(&a);
                 uint16_t b = 0; BitsFromReal<5,10>(x, &b);
                 Real y = BitsToReal<5,10>(&b);
                 if (y != x)
                 {
-                       Warn("%x -> %lf -> %x -> %lf", a, Float(x), b, Float(y));
+                       Fatal("%x -> %lf -> %x -> %lf", a, Float(x), b, Float(y));
                 }
                 space.push_back(Pear(a, x));
         }