From: Sam Moore Date: Mon, 4 Aug 2014 07:52:58 +0000 (+0800) Subject: CS Lab machines have met their nemesis X-Git-Url: https://git.ucc.asn.au/?a=commitdiff_plain;h=55790e6cc129dcac3b3d001c7f5c74c288642123;p=ipdf%2Fcode.git CS Lab machines have met their nemesis The ongoing adventures of overcomplicated makefile land. David is dictating this commit message by the way, PS: They are i686 and only g++0x from before 0x Tune in next week for another exciting episode (or installment, you, the viewer, decide!) of randomly ifdefing things out! Will our heroes successfully defeat the 32 bit long? Or will they perish in an integer overflow? (I think the project is beginning to affect our minds) --- diff --git a/src/Makefile b/src/Makefile index ce6ac42..94b1f67 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,15 +6,19 @@ MAIN = main.o OBJ = log.o real.o bezier.o document.o objectrenderer.o view.o screen.o vfpu.o quadtree.o graphicsbuffer.o framebuffer.o shaderprogram.o stb_truetype.o gl_core44.o add_digits_asm.o sub_digits_asm.o mul_digits_asm.o div_digits_asm.o arbint.o LIB_x86_64 = ../contrib/lib/libSDL2-2.0.so.0 -lGL -lgmp LIB_i386 = ../contrib/lib32/libSDL2-2.0.so.0 -lGL -lgmp +LIB_i686 = $(LIB_i386) MAINRPATH_x86_64 = -Wl,-rpath,'$$ORIGIN/../contrib/lib' MAINRPATH_i386 = -Wl,-rpath,'$$ORIGIN/../contrib/lib32' +MAINRPATH_i686 = $(MAINRPATH_i386) TESTRPATH_x86_64 = -Wl,-rpath,'$$ORIGIN/../../contrib/lib' TESTRPATH_i386 = -Wl,-rpath,'$$ORIGIN/../../contrib/lib32' +TESTRPATH_i686 = $(TESTRPATH_i386) OBJPATHS = $(OBJ:%=../obj/%) DEPS := $(OBJPATHS:%.o=%.d) CFLAGS_x86_64 := -I../contrib/include/SDL2 -I`pwd` CFLAGS_i386 := -I../contrib/include32/SDL2 -I`pwd` +CFLAGS_i686 := $(CFLAGS_i386) LIB := $(LIB_$(ARCH)) @@ -54,7 +58,7 @@ movie : $(BIN) ../tools/stream_plot.py # To change that you can run as `make DEFS="REAL=X" tests/` where X is your chosen type # But remember to make clean first. tests/% : tests/%.cpp ../obj/tests/%.o $(LINKOBJ) - $(CXX) -o $@.test $(LINKOBJ) ../obj/$@.o $(LIB) $(TESTRPATH) + $(CXX) $(CFLAGS) -o $@.test $(LINKOBJ) ../obj/$@.o $(LIB) $(TESTRPATH) -include $(DEPS) @@ -65,7 +69,7 @@ runtests : tests/runtests.sh $(BIN) : $(LINKOBJ) ../obj/$(MAIN) echo $(LINKOBJ) @mkdir -p $(dir $@) - $(CXX) -o $(BIN) $(LINKOBJ) ../obj/$(MAIN) $(LIB) $(MAINRPATH) + $(CXX) $(CFLAGS) -o $(BIN) $(LINKOBJ) ../obj/$(MAIN) $(LIB) $(MAINRPATH) -include $(DEPS) @@ -73,7 +77,7 @@ $(BIN) : $(LINKOBJ) ../obj/$(MAIN) @mkdir -p $(dir $@) $(CXX) $(CFLAGS) $(DEF) -c -MMD -o $@ $< -../obj/%_asm.o : %_asm.s main.h +../obj/%_asm.o : %_asm.S main.h @mkdir -p $(dir $@) $(CXX) -c -o $@ $< diff --git a/src/add_digits_asm.S b/src/add_digits_asm.S new file mode 100644 index 0000000..8037e7d --- /dev/null +++ b/src/add_digits_asm.S @@ -0,0 +1,38 @@ +.section .text +.globl add_digits +.type add_digits, @function + +#ifdef __x86_64__ + +# Add two arrays of 64 bit digits, with carry, modifying the first argument +# Address at first argument %rdi is array to add and modify +# Address at second %rsi will be added (not modified) +# Third argument is counter of number of digits +# Result in %rax is the final result in the carry flag +# Exploits the fact that inc and dec do not affect the carry flag +add_digits: + addq $0, %rax + loop: + movq (%rsi), %rax # Temporarily store digit from second array + adcq %rax, (%rdi) # Add digits in second and first array, store in first + dec %rdx # Decrement counter + jz end_loop # We are done + + # Move to next element in the first array + leaq 8(,%rdi,1), %rdi + # Move to next element in the second array + leaq 8(,%rsi,1), %rsi + jmp loop # Repeat + end_loop: + movq $0, %rax + jnc end + movq $1, %rax + end: + ret # We are done + +#else + +add_digits: + ret + +#endif diff --git a/src/add_digits_asm.s b/src/add_digits_asm.s deleted file mode 100644 index 8144acc..0000000 --- a/src/add_digits_asm.s +++ /dev/null @@ -1,29 +0,0 @@ -.section .text -.globl add_digits -.type add_digits, @function - -# Add two arrays of 64 bit digits, with carry, modifying the first argument -# Address at first argument %rdi is array to add and modify -# Address at second %rsi will be added (not modified) -# Third argument is counter of number of digits -# Result in %rax is the final result in the carry flag -# Exploits the fact that inc and dec do not affect the carry flag -add_digits: - addq $0, %rax - loop: - movq (%rsi), %rax # Temporarily store digit from second array - adcq %rax, (%rdi) # Add digits in second and first array, store in first - dec %rdx # Decrement counter - jz end_loop # We are done - - # Move to next element in the first array - leaq 8(,%rdi,1), %rdi - # Move to next element in the second array - leaq 8(,%rsi,1), %rsi - jmp loop # Repeat - end_loop: - movq $0, %rax - jnc end - movq $1, %rax - end: - ret # We are done diff --git a/src/arbint.h b/src/arbint.h index ae0b23e..468785f 100644 --- a/src/arbint.h +++ b/src/arbint.h @@ -25,7 +25,7 @@ namespace IPDF int64_t AsDigit() const { - int64_t digit = (m_digits.size() == 1) ? m_digits[0] : 0x7FFFFFFFFFFFFFFF; + int64_t digit = (m_digits.size() == 1) ? m_digits[0] : 0xBADF00D; return (m_sign) ? -digit : digit; } diff --git a/src/div_digits_asm.S b/src/div_digits_asm.S new file mode 100644 index 0000000..5dd5504 --- /dev/null +++ b/src/div_digits_asm.S @@ -0,0 +1,33 @@ +.section .text +.globl div_digits +.type div_digits, @function + +#ifdef __x86_64__ + +# div_digits(digits, div, size, res) +# divides an arbint in digits by uint64 div into res, returns remainder +# res may alias digits +# digits = rdi, div = rsx, size = rdx, res = rcx, +div_digits: + movq %rdx, %r8 + leaq -8(%rdi,%r8,8), %rdi # We want to point to the end of the buffer (LSB) + leaq -8(%rcx,%r8,8), %rcx # We want to point to the end of the buffer (LSB) + movq $0, %rdx +loop: + movq (%rdi), %rax + divq %rsi # rdx:rax/rsi => rax, rdx:rax%rsi => rdx + movq %rax, (%rcx) + dec %r8 + leaq -8(%rdi), %rdi + leaq -8(%rcx), %rcx + jnz loop +end: + movq %rdx, %rax # return the remainder + ret + +#else + +div_digits: + ret + +#endif diff --git a/src/div_digits_asm.s b/src/div_digits_asm.s deleted file mode 100644 index 1ebf86f..0000000 --- a/src/div_digits_asm.s +++ /dev/null @@ -1,26 +0,0 @@ -.section .text -.globl div_digits -.type div_digits, @function - -# div_digits(digits, div, size, res) -# divides an arbint in digits by uint64 div into res, returns remainder -# res may alias digits -# digits = rdi, div = rsx, size = rdx, res = rcx, -div_digits: - movq %rdx, %r8 - leaq -8(%rdi,%r8,8), %rdi # We want to point to the end of the buffer (LSB) - leaq -8(%rcx,%r8,8), %rcx # We want to point to the end of the buffer (LSB) - movq $0, %rdx -loop: - movq (%rdi), %rax - divq %rsi # rdx:rax/rsi => rax, rdx:rax%rsi => rdx - movq %rax, (%rcx) - dec %r8 - leaq -8(%rdi), %rdi - leaq -8(%rcx), %rcx - jnz loop -end: - movq %rdx, %rax # return the remainder - ret - - diff --git a/src/framebuffer.h b/src/framebuffer.h index 33c5fe1..920cbd2 100644 --- a/src/framebuffer.h +++ b/src/framebuffer.h @@ -1,7 +1,7 @@ #ifndef _FRAMEBUFFER_H #define _FRAMEBUFFER_H -#include +#include "SDL.h" #include "gl_core44.h" diff --git a/src/graphicsbuffer.h b/src/graphicsbuffer.h index 4665345..fd89771 100644 --- a/src/graphicsbuffer.h +++ b/src/graphicsbuffer.h @@ -1,7 +1,7 @@ #ifndef _GRAPHICSBUFFER_H #define _GRAPHICSBUFFER_H -#include +#include "SDL.h" #include "gl_core44.h" diff --git a/src/main.h b/src/main.h index ee78f59..cfc6909 100644 --- a/src/main.h +++ b/src/main.h @@ -118,7 +118,7 @@ inline void MainLoop(Document & doc, const Rect & bounds = Rect(0,0,1,1), const total_real_time += real_frame; total_cpu_time += cpu_frame; total_gpu_time += gpu_frame; if (data_rate > 0 && total_real_time > data_rate*(data_points+1)) { - printf("%lu\t%f\t%f\t%f\t%f\t%f\t%f\n", (uint64_t)frames, total_real_time, total_cpu_time, total_gpu_time, real_frame, cpu_frame, gpu_frame); + printf("%lu\t%f\t%f\t%f\t%f\t%f\t%f\n", (long unsigned int)frames, total_real_time, total_cpu_time, total_gpu_time, real_frame, cpu_frame, gpu_frame); data_points++; } scr.DebugFontPrintF("Rendered frame %lu\n", (uint64_t)frames); diff --git a/src/mul_digits_asm.S b/src/mul_digits_asm.S new file mode 100644 index 0000000..36e3be5 --- /dev/null +++ b/src/mul_digits_asm.S @@ -0,0 +1,40 @@ +.section .text +.globl mul_digits +.type mul_digits, @function + +#ifdef __x86_64__ + +# Multiply an array of 64 bit digits by *one* 64 bit digit, modifies the array in place +mul_digits: + movq %rdx, %rcx # rdx is reserved for mulq, use rcx as counter + movq $0, %r12 # Overflow register + loop: + movq %rsi, %rax # Value to multiply in %rax + mulq (%rdi) # Multiply, stored in %rdx:%rax (ie: we get TWO digits) + + # Add overflow from previous operation + add %r12, %rax + # Upper digit gets saved as next overflow + movq %rdx, %r12 + + # Lower digit goes in current array position + movq %rax, (%rdi) + + dec %rcx # Decrement counter + jz end_loop # We are done + + # Move to next element in the array + leaq 8(,%rdi,1), %rdi + jmp loop # Repeat + + end_loop: + end: + movq %r12, %rax # Return overflow + ret # We are done + +#else + +mul_digits: + ret + +#endif diff --git a/src/mul_digits_asm.s b/src/mul_digits_asm.s deleted file mode 100644 index fb11765..0000000 --- a/src/mul_digits_asm.s +++ /dev/null @@ -1,31 +0,0 @@ -.section .text -.globl mul_digits -.type mul_digits, @function - -# Multiply an array of 64 bit digits by *one* 64 bit digit, modifies the array in place -mul_digits: - movq %rdx, %rcx # rdx is reserved for mulq, use rcx as counter - movq $0, %r12 # Overflow register - loop: - movq %rsi, %rax # Value to multiply in %rax - mulq (%rdi) # Multiply, stored in %rdx:%rax (ie: we get TWO digits) - - # Add overflow from previous operation - add %r12, %rax - # Upper digit gets saved as next overflow - movq %rdx, %r12 - - # Lower digit goes in current array position - movq %rax, (%rdi) - - dec %rcx # Decrement counter - jz end_loop # We are done - - # Move to next element in the array - leaq 8(,%rdi,1), %rdi - jmp loop # Repeat - - end_loop: - end: - movq %r12, %rax # Return overflow - ret # We are done diff --git a/src/objectrenderer.cpp b/src/objectrenderer.cpp index 0770e46..9bbcfd6 100644 --- a/src/objectrenderer.cpp +++ b/src/objectrenderer.cpp @@ -117,9 +117,9 @@ void RectFilledRenderer::RenderUsingCPU(const Objects & objects, const View & vi if (m_indexes[i] < first_obj_id) continue; if (m_indexes[i] >= last_obj_id) continue; PixelBounds bounds(CPURenderBounds(objects.bounds[m_indexes[i]], view, target)); - for (int64_t x = max(0L, bounds.x); x <= min(bounds.x+bounds.w, target.w-1); ++x) + for (int64_t x = max((int64_t)0, bounds.x); x <= min(bounds.x+bounds.w, target.w-1); ++x) { - for (int64_t y = max(0L, bounds.y); y <= min(bounds.y+bounds.h, target.h-1); ++y) + for (int64_t y = max((int64_t)0, bounds.y); y <= min(bounds.y+bounds.h, target.h-1); ++y) { int index = (x+target.w*y)*4; target.pixels[index+0] = 0; @@ -175,9 +175,9 @@ void CircleFilledRenderer::RenderUsingCPU(const Objects & objects, const View & //Debug("Centre is %d, %d", centre_x, centre_y); //Debug("Bounds are %d,%d,%d,%d", bounds.x, bounds.y, bounds.w, bounds.h); //Debug("Windos is %d,%d", target.w, target.h); - for (int64_t x = max(0L, bounds.x); x <= min(bounds.x+bounds.w, target.w-1); ++x) + for (int64_t x = max((int64_t)0, bounds.x); x <= min(bounds.x+bounds.w, target.w-1); ++x) { - for (int64_t y = max(0L, bounds.y); y <= min(bounds.y + bounds.h, target.h-1); ++y) + for (int64_t y = max((int64_t)0, bounds.y); y <= min(bounds.y + bounds.h, target.h-1); ++y) { Real dx(2); dx *= Real(x - centre_x)/Real(bounds.w); Real dy(2); dy *= Real(y - centre_y)/Real(bounds.h); @@ -237,7 +237,7 @@ void BezierRenderer::RenderUsingCPU(const Objects & objects, const View & view, Real x[2]; Real y[2]; control.Evaluate(x[0], y[0], Real(0)); - int64_t blen = max(2L, min(100L, pix_bounds.w)); + int64_t blen = max((int64_t)2, min((int64_t)100, pix_bounds.w)); Real invblen(1); invblen /= blen; Debug("Using %li lines, inverse %f", blen, Double(invblen)); for (int64_t j = 1; j <= blen; ++j) diff --git a/src/real.h b/src/real.h index ecd3852..8021ada 100644 --- a/src/real.h +++ b/src/real.h @@ -40,7 +40,7 @@ namespace IPDF #elif REAL == REAL_LONG_DOUBLE typedef long double Real; #elif REAL == REAL_VFPU - typedef VFPU::Float Real; + typedef VFPU::VFloat Real; inline float Float(const Real & r) {return r.m_value;} inline double Double(const Real & r) {return r.m_value;} #elif REAL == REAL_RATIONAL diff --git a/src/screen.h b/src/screen.h index 2d28db2..85b3c27 100644 --- a/src/screen.h +++ b/src/screen.h @@ -1,7 +1,7 @@ #ifndef _SCREEN_H #define _SCREEN_H -#include +#include "SDL.h" #include diff --git a/src/sub_digits_asm.S b/src/sub_digits_asm.S new file mode 100644 index 0000000..cd4629a --- /dev/null +++ b/src/sub_digits_asm.S @@ -0,0 +1,38 @@ +.section .text +.globl sub_digits +.type sub_digits, @function + +#ifdef __x86_64__ + +# Subtract two arrays of 64 bit digits, with carry, modifying the first argument +# Address at first argument %rdi is array to add and modify +# Address at second %rsi will be added (not modified) +# Third argument is counter of number of digits +# Result in %rax is the final result in the carry flag +# Exploits the fact that inc and dec do not affect the carry flag +sub_digits: + subq $0, %rax # Reset the carry/borrow flag + loop: + movq (%rsi), %rax # Temporarily store digit from second array + sbbq %rax, (%rdi) # Subtract digits in second and first array, store in first + dec %rdx # Decrement counter + jz end_loop # We are done + + # Move to next element in the first array + leaq 8(,%rdi,1), %rdi + # Move to next element in the second array + leaq 8(,%rsi,1), %rsi + jmp loop # Repeat + end_loop: + movq $0, %rax + jnc end + movq $1, %rax + end: + ret # We are done + +#else + +sub_digits: + ret + +#endif diff --git a/src/sub_digits_asm.s b/src/sub_digits_asm.s deleted file mode 100644 index 17d81c1..0000000 --- a/src/sub_digits_asm.s +++ /dev/null @@ -1,29 +0,0 @@ -.section .text -.globl sub_digits -.type sub_digits, @function - -# Subtract two arrays of 64 bit digits, with carry, modifying the first argument -# Address at first argument %rdi is array to add and modify -# Address at second %rsi will be added (not modified) -# Third argument is counter of number of digits -# Result in %rax is the final result in the carry flag -# Exploits the fact that inc and dec do not affect the carry flag -sub_digits: - subq $0, %rax # Reset the carry/borrow flag - loop: - movq (%rsi), %rax # Temporarily store digit from second array - sbbq %rax, (%rdi) # Subtract digits in second and first array, store in first - dec %rdx # Decrement counter - jz end_loop # We are done - - # Move to next element in the first array - leaq 8(,%rdi,1), %rdi - # Move to next element in the second array - leaq 8(,%rsi,1), %rsi - jmp loop # Repeat - end_loop: - movq $0, %rax - jnc end - movq $1, %rax - end: - ret # We are done diff --git a/src/vfpu.cpp b/src/vfpu.cpp index b59fe3e..bf9e9e6 100644 --- a/src/vfpu.cpp +++ b/src/vfpu.cpp @@ -115,8 +115,7 @@ Register Exec(const Register & a, const Register & b, Opcode op, Rmode rmode) { assert(g_running); stringstream s; - //TODO: Make it compile on non C++11 - s << hex << setw(8) << setfill('0') << a.to_ullong() << "\n" << b.to_ullong() << "\n" << setw(1) << op <<"\n" << setw(1) << rmode << "\n"; + s << hex << setw(8) << setfill('0') << a.to_ulong() << "\n" << b.to_ulong() << "\n" << setw(1) << op <<"\n" << setw(1) << rmode << "\n"; string str(s.str()); //Debug("Writing: %s", str.c_str()); @@ -139,7 +138,7 @@ Register Exec(const Register & a, const Register & b, Opcode op, Rmode rmode) stringstream s2; //TODO: Make it compile on non C++11 - s2 << hex << result.to_ullong(); + s2 << hex << result.to_ulong(); //Debug("Result is: %s", s2.str().c_str()); return result; } diff --git a/src/vfpu.h b/src/vfpu.h index 07bcacd..cb7fe50 100644 --- a/src/vfpu.h +++ b/src/vfpu.h @@ -21,10 +21,10 @@ namespace VFPU /** * Wrapper class for floats where operations are done on the VFPU */ - class Float + class VFloat { public: - Float(float f = 0) : m_value(f) + VFloat(float f = 0) : m_value(f) { static bool init = false; if (!init) @@ -33,59 +33,59 @@ namespace VFPU VFPU::Start("flops.vcd"); } } - Float(const Float & cpy) : m_value(cpy.m_value) {} - virtual ~Float() + VFloat(const VFloat & cpy) : m_value(cpy.m_value) {} + virtual ~VFloat() { } - Float & operator+=(const Float & op) + VFloat & operator+=(const VFloat & op) { m_value = Exec(m_value, op.m_value, ADD); return *this; } - Float & operator-=(const Float & op) + VFloat & operator-=(const VFloat & op) { m_value = Exec(m_value, op.m_value, SUB); return *this; } - Float & operator*=(const Float & op) + VFloat & operator*=(const VFloat & op) { m_value = Exec(m_value, op.m_value, MULT); return *this; } - Float & operator/=(const Float & op) + VFloat & operator/=(const VFloat & op) { m_value = Exec(m_value, op.m_value, DIV); return *this; } - Float operator+(const Float & op) const {Float f(*this); f+=op; return f;} - Float operator-(const Float & op) const {Float f(*this); f-=op; return f;} - Float operator*(const Float & op) const {Float f(*this); f*=op; return f;} - Float operator/(const Float & op) const {Float f(*this); f/=op; return f;} + VFloat operator+(const VFloat & op) const {VFloat f(*this); f+=op; return f;} + VFloat operator-(const VFloat & op) const {VFloat f(*this); f-=op; return f;} + VFloat operator*(const VFloat & op) const {VFloat f(*this); f*=op; return f;} + VFloat operator/(const VFloat & op) const {VFloat f(*this); f/=op; return f;} - bool operator==(const Float & op) const + bool operator==(const VFloat & op) const { - Float f(op); + VFloat f(op); f -= *this; return (f.m_value == 0); } - bool operator!=(const Float & op) const {return !this->operator==(op);} - bool operator<(const Float & op) const + bool operator!=(const VFloat & op) const {return !this->operator==(op);} + bool operator<(const VFloat & op) const { - Float f(op); + VFloat f(op); f -= *this; return (f.m_value > 0); } - bool operator<=(const Float & op) const + bool operator<=(const VFloat & op) const { - Float f(op); + VFloat f(op); f -= *this; return (f.m_value >= 0); } - bool operator>(const Float & op) const {return !this->operator<=(op);} - bool operator>=(const Float & op) const {return !this->operator<(op);} + bool operator>(const VFloat & op) const {return !this->operator<=(op);} + bool operator>=(const VFloat & op) const {return !this->operator<(op);} float m_value;