X-Git-Url: https://git.ucc.asn.au/?a=blobdiff_plain;f=Kernel%2Farch%2Fx86%2Flib.c;h=ecc575c1ad240b3c57693025ad1c73437d076430;hb=a20cfd571f504f5c7f2d29516442a12c200441d5;hp=9dc631d0e5cc09a9ced96fe69b4ac08d9a0b47b9;hpb=233370ff3da98c2037f4648d68b72b3b9f776032;p=tpg%2Facess2.git diff --git a/Kernel/arch/x86/lib.c b/Kernel/arch/x86/lib.c index 9dc631d0..ecc575c1 100644 --- a/Kernel/arch/x86/lib.c +++ b/Kernel/arch/x86/lib.c @@ -22,6 +22,7 @@ extern struct sShortSpinlock glThreadListLock; extern int GetCPUNum(void); // === PROTOTYPES == +Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem); Uint64 __udivdi3(Uint64 Num, Uint64 Den); Uint64 __umoddi3(Uint64 Num, Uint64 Den); @@ -45,13 +46,7 @@ int IS_LOCKED(struct sShortSpinlock *Lock) */ int CPU_HAS_LOCK(struct sShortSpinlock *Lock) { - #if STACKED_LOCKS == 1 return Lock->Lock == GetCPUNum() + 1; - #elif STACKED_LOCKS == 2 - return Lock->Lock == Proc_GetCurThread(); - #else - return 0; - #endif } /** @@ -70,32 +65,12 @@ int CPU_HAS_LOCK(struct sShortSpinlock *Lock) void SHORTLOCK(struct sShortSpinlock *Lock) { int v = 1; - #if LOCK_DISABLE_INTS int IF; - #endif - #if STACKED_LOCKS == 1 int cpu = GetCPUNum() + 1; - #elif STACKED_LOCKS == 2 - void *thread = Proc_GetCurThread(); - #endif - #if LOCK_DISABLE_INTS // Save interrupt state __ASM__ ("pushf;\n\tpop %0" : "=r"(IF)); IF &= 0x200; // AND out all but the interrupt flag - #endif - - #if STACKED_LOCKS == 1 - if( Lock->Lock == cpu ) { - Lock->Depth ++; - return ; - } - #elif STACKED_LOCKS == 2 - if( Lock->Lock == thread ) { - Lock->Depth ++; - return ; - } - #endif #if TRACE_LOCKS if( Lock != &glDebug_Lock && Lock != &glThreadListLock ) @@ -105,34 +80,17 @@ void SHORTLOCK(struct sShortSpinlock *Lock) } #endif + __ASM__("cli"); + // Wait for another CPU to release - while(v) { - // CMPXCHG: - // If r/m32 == EAX, set ZF and set r/m32 = r32 - // Else, clear ZF and set EAX = r/m32 - #if STACKED_LOCKS == 1 - __ASM__("lock cmpxchgl %2, (%3)" - : "=a"(v) - : "a"(0), "r"(cpu), "r"(&Lock->Lock) - ); - #elif STACKED_LOCKS == 2 - __ASM__("lock cmpxchgl %2, (%3)" - : "=a"(v) - : "a"(0), "r"(thread), "r"(&Lock->Lock) - ); - #else - __ASM__("xchgl %%eax, (%%edi)":"=a"(v):"a"(1),"D"(&Lock->Lock)); - #endif - - #if LOCK_DISABLE_INTS - if( v ) __ASM__("sti"); // Re-enable interrupts - #endif - } + __ASM__( + "1: lock cmpxchgl %2, (%3)\n\t" + "jnz 1b" + : "=a"(v) + : "a"(0), "r"(cpu), "r"(&Lock->Lock) + ); - #if LOCK_DISABLE_INTS - __ASM__("cli"); Lock->IF = IF; - #endif #if TRACE_LOCKS if( Lock != &glDebug_Lock && Lock != &glThreadListLock ) @@ -149,13 +107,6 @@ void SHORTLOCK(struct sShortSpinlock *Lock) */ void SHORTREL(struct sShortSpinlock *Lock) { - #if STACKED_LOCKS - if( Lock->Depth ) { - Lock->Depth --; - return ; - } - #endif - #if TRACE_LOCKS if( Lock != &glDebug_Lock && Lock != &glThreadListLock ) { @@ -164,7 +115,6 @@ void SHORTREL(struct sShortSpinlock *Lock) } #endif - #if LOCK_DISABLE_INTS // Lock->IF can change anytime once Lock->Lock is zeroed if(Lock->IF) { Lock->Lock = 0; @@ -173,9 +123,6 @@ void SHORTREL(struct sShortSpinlock *Lock) else { Lock->Lock = 0; } - #else - Lock->Lock = 0; - #endif } // === DEBUG IO === @@ -324,10 +271,47 @@ int memcmp(const void *m1, const void *m2, size_t Num) */ void *memcpy(void *Dest, const void *Src, size_t Num) { -// Debug("\nmemcpy:Num=0x%x by %p", Num, __builtin_return_address(0)); - if( ((Uint)Dest & 3) || ((Uint)Src & 3) ) - __asm__ __volatile__ ("rep movsb" :: "D" (Dest), "S" (Src), "c" (Num)); - else { + tVAddr dst = (tVAddr)Dest; + tVAddr src = (tVAddr)Src; + if( (dst & 3) != (src & 3) ) + { + __asm__ __volatile__ ("rep movsb" :: "D" (dst), "S" (src), "c" (Num)); +// Debug("\nmemcpy:Num=0x%x by %p (UA)", Num, __builtin_return_address(0)); + } + #if 1 + else if( Num > 128 && (dst & 15) == (src & 15) ) + { + char tmp[16+15]; // Note, this is a hack to save/restor xmm0 + int count = 16 - (dst & 15); +// Debug("\nmemcpy:Num=0x%x by %p (SSE)", Num, __builtin_return_address(0)); + if( count < 16 ) + { + Num -= count; + __asm__ __volatile__ ("rep movsb" : "=D"(dst),"=S"(src): "0"(dst), "1"(src), "c"(count)); + } + + count = Num / 16; + __asm__ __volatile__ ( + "movdqa 0(%5), %%xmm0;\n\t" + "1:\n\t" + "movdqa 0(%1), %%xmm0;\n\t" + "movdqa %%xmm0, 0(%0);\n\t" + "add $16,%0;\n\t" + "add $16,%1;\n\t" + "loop 1b;\n\t" + "movdqa %%xmm0, 0(%5);\n\t" + : "=r"(dst),"=r"(src) + : "0"(dst), "1"(src), "c"(count), "r" (((tVAddr)tmp+15)&~15) + ); + + count = Num & 15; + if(count) + __asm__ __volatile__ ("rep movsb" :: "D"(dst), "S"(src), "c"(count)); + } + #endif + else + { +// Debug("\nmemcpy:Num=0x%x by %p", Num, __builtin_return_address(0)); __asm__ __volatile__ ( "rep movsl;\n\t" "mov %3, %%ecx;\n\t" @@ -336,6 +320,7 @@ void *memcpy(void *Dest, const void *Src, size_t Num) } return Dest; } + /** * \fn void *memcpyd(void *Dest, const void *Src, size_t Num) * \brief Copy \a Num DWORDs from \a Src to \a Dest @@ -346,9 +331,12 @@ void *memcpyd(void *Dest, const void *Src, size_t Num) return Dest; } +#include "../helpers.h" + +DEF_DIVMOD(64); + Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem) { - Uint64 ret; if( Div < 0x100000000ULL && Num < 0xFFFFFFFF * Div ) { Uint32 rem, ret_32; __asm__ __volatile__( @@ -360,9 +348,7 @@ Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem) return ret_32; } - ret = __udivdi3(Num, Div); - if(Rem) *Rem = __umoddi3(Num, Div); - return ret; + return __divmod64(Num, Div, Rem); } /** @@ -371,11 +357,10 @@ Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem) */ Uint64 __udivdi3(Uint64 Num, Uint64 Den) { - Uint64 P[2]; - Uint64 q = 0; - int i; - - if(Den == 0) __asm__ __volatile__ ("int $0x0"); + if(Den == 0) { + __asm__ __volatile__ ("int $0x0"); + return -1; + } // Common speedups if(Num <= 0xFFFFFFFF && Den <= 0xFFFFFFFF) return (Uint32)Num / (Uint32)Den; @@ -391,46 +376,8 @@ Uint64 __udivdi3(Uint64 Num, Uint64 Den) if(Num < Den) return 0; if(Num < Den*2) return 1; if(Num == Den*2) return 2; - - #if 1 - i = 0; // Shut up - P[0] = Num; - P[1] = Den; - __asm__ __volatile__ ( - "fildq %2\n\t" // Num - "fildq %1\n\t" // Den - "fdivp\n\t" - "fistpq %0" - : "=m" (q) - : "m" (P[0]), "m" (P[1]) - ); - - //Log("%llx / %llx = %llx\n", Num, Den, q); - #else - // Restoring division, from wikipedia - // http://en.wikipedia.org/wiki/Division_(digital) - P[0] = Num; P[1] = 0; - for( i = 64; i--; ) - { - // P <<= 1; - P[1] = (P[1] << 1) | (P[0] >> 63); - P[0] = P[0] << 1; - - // P -= Den << 64 - P[1] -= Den; - - // P >= 0 - if( !(P[1] & (1ULL<<63)) ) { - q |= (Uint64)1 << (63-i); - } - else { - //q |= 0 << (63-i); - P[1] += Den; - } - } - #endif - - return q; + + return __divmod64(Num, Den, NULL); } /** @@ -439,7 +386,11 @@ Uint64 __udivdi3(Uint64 Num, Uint64 Den) */ Uint64 __umoddi3(Uint64 Num, Uint64 Den) { - if(Den == 0) __asm__ __volatile__ ("int $0x0"); // Call Div by Zero Error + Uint64 ret = 0; + if(Den == 0) { + __asm__ __volatile__ ("int $0x0"); // Call Div by Zero Error + return -1; + } if(Den == 1) return 0; // Speed Hacks if(Den == 2) return Num & 1; // Speed Hacks if(Den == 4) return Num & 3; // Speed Hacks @@ -453,7 +404,8 @@ Uint64 __umoddi3(Uint64 Num, Uint64 Den) if(Num >> 32 == 0 && Den >> 32 == 0) return (Uint32)Num % (Uint32)Den; - return Num - __udivdi3(Num, Den) * Den; + __divmod64(Num, Den, &ret); + return ret; }