Kernel - Slight reworks to timer code
[tpg/acess2.git] / Kernel / arch / x86 / lib.c
index 8f8abe6..175f9a5 100644 (file)
@@ -6,6 +6,8 @@
  */
 #include <acess.h>
 #include <threads_int.h>
+#include <arch_int.h>
+#include <hal_proc.h>  // GetCPUNum
 
 #define TRACE_LOCKS    0
 
 // === IMPRORTS ===
 #if TRACE_LOCKS
 extern struct sShortSpinlock   glDebug_Lock;
-extern struct sShortSpinlock   glThreadListLock;
+extern tMutex  glPhysAlloc;
+#define TRACE_LOCK_COND        (Lock != &glDebug_Lock && Lock != &glThreadListLock && Lock != &glPhysAlloc.Protector)
+//#define TRACE_LOCK_COND      (Lock != &glDebug_Lock && Lock != &glPhysAlloc.Protector)
 #endif
-extern int     GetCPUNum(void);
 
 // === PROTOTYPES ==
+Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
 Uint64 __udivdi3(Uint64 Num, Uint64 Den);
 Uint64 __umoddi3(Uint64 Num, Uint64 Den);
 
@@ -45,15 +49,20 @@ int IS_LOCKED(struct sShortSpinlock *Lock)
  */
 int CPU_HAS_LOCK(struct sShortSpinlock *Lock)
 {
-       #if STACKED_LOCKS == 1
        return Lock->Lock == GetCPUNum() + 1;
-       #elif STACKED_LOCKS == 2
-       return Lock->Lock == Proc_GetCurThread();
-       #else
-       return 0;
-       #endif
 }
 
+void __AtomicTestSetLoop(Uint *Ptr, Uint Value)
+{
+       __ASM__(
+               "1:\n\t"
+               "xor %%eax, %%eax;\n\t"
+               "lock cmpxchgl %0, (%1);\n\t"
+               "jnz 1b;\n\t"
+               :: "r"(Value), "r"(Ptr)
+               : "eax" // EAX clobbered
+               );
+}
 /**
  * \brief Acquire a Short Spinlock
  * \param Lock Lock pointer
@@ -69,77 +78,33 @@ int CPU_HAS_LOCK(struct sShortSpinlock *Lock)
  */
 void SHORTLOCK(struct sShortSpinlock *Lock)
 {
-        int    v = 1;
-       #if LOCK_DISABLE_INTS
         int    IF;
-       #endif
-       #if STACKED_LOCKS == 1
         int    cpu = GetCPUNum() + 1;
-       #elif STACKED_LOCKS == 2
-       void    *thread = Proc_GetCurThread();
-       #endif
        
-       #if LOCK_DISABLE_INTS
        // Save interrupt state
        __ASM__ ("pushf;\n\tpop %0" : "=r"(IF));
        IF &= 0x200;    // AND out all but the interrupt flag
-       #endif
-       
-       #if STACKED_LOCKS == 1
-       if( Lock->Lock == cpu ) {
-               Lock->Depth ++;
-               return ;
-       }
-       #elif STACKED_LOCKS == 2
-       if( Lock->Lock == thread ) {
-               Lock->Depth ++;
-               return ;
-       }
-       #endif
        
        #if TRACE_LOCKS
-       if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+       if( TRACE_LOCK_COND )
        {
                //Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
-               Debug("%p obtaining %p (Called by %p)", __builtin_return_address(0), Lock, __builtin_return_address(1));
+               Debug("%i %p obtaining %p (Called by %p)", cpu-1,  __builtin_return_address(0), Lock, __builtin_return_address(1));
        }
        #endif
        
-       // Wait for another CPU to release
-       while(v) {
-               // CMPXCHG:
-               //  If r/m32 == EAX, set ZF and set r/m32 = r32
-               //  Else, clear ZF and set EAX = r/m32
-               #if STACKED_LOCKS == 1
-               __ASM__("lock cmpxchgl %2, (%3)"
-                       : "=a"(v)
-                       : "a"(0), "r"(cpu), "r"(&Lock->Lock)
-                       );
-               #elif STACKED_LOCKS == 2
-               __ASM__("lock cmpxchgl %2, (%3)"
-                       : "=a"(v)
-                       : "a"(0), "r"(thread), "r"(&Lock->Lock)
-                       );
-               #else
-               __ASM__("xchgl %%eax, (%%edi)":"=a"(v):"a"(1),"D"(&Lock->Lock));
-               #endif
-               
-               #if LOCK_DISABLE_INTS
-               if( v ) __ASM__("sti"); // Re-enable interrupts
-               #endif
-       }
-       
-       #if LOCK_DISABLE_INTS
        __ASM__("cli");
+       
+       // Wait for another CPU to release
+       __AtomicTestSetLoop( (Uint*)&Lock->Lock, cpu );
        Lock->IF = IF;
-       #endif
        
        #if TRACE_LOCKS
-       if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+       if( TRACE_LOCK_COND )
        {
                //Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
-               //Debug("Lock %p locked by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
-               Debug("got it");
+               Debug("%i %p locked by %p\t%p", cpu-1, Lock, __builtin_return_address(0), __builtin_return_address(1));
+//             Debug("got it");
        }
        #endif
 }
@@ -149,22 +114,14 @@ void SHORTLOCK(struct sShortSpinlock *Lock)
  */
 void SHORTREL(struct sShortSpinlock *Lock)
 {      
-       #if STACKED_LOCKS
-       if( Lock->Depth ) {
-               Lock->Depth --;
-               return ;
-       }
-       #endif
-       
        #if TRACE_LOCKS
-       if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+       if( TRACE_LOCK_COND )
        {
                //Log_Log("LOCK", "%p released by %p", Lock, __builtin_return_address(0));
                Debug("Lock %p released by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
        }
        #endif
        
-       #if LOCK_DISABLE_INTS
        // Lock->IF can change anytime once Lock->Lock is zeroed
        if(Lock->IF) {
                Lock->Lock = 0;
@@ -173,9 +130,6 @@ void SHORTREL(struct sShortSpinlock *Lock)
        else {
                Lock->Lock = 0;
        }
-       #else
-       Lock->Lock = 0;
-       #endif
 }
 
 // === DEBUG IO ===
@@ -324,9 +278,47 @@ int memcmp(const void *m1, const void *m2, size_t Num)
  */
 void *memcpy(void *Dest, const void *Src, size_t Num)
 {
-       if( ((Uint)Dest & 3) || ((Uint)Src & 3) )
-               __asm__ __volatile__ ("rep movsb" :: "D" (Dest), "S" (Src), "c" (Num));
-       else {
+       tVAddr  dst = (tVAddr)Dest;
+       tVAddr  src = (tVAddr)Src;
+       if( (dst & 3) != (src & 3) )
+       {
+               __asm__ __volatile__ ("rep movsb" :: "D" (dst), "S" (src), "c" (Num));
+//             Debug("\nmemcpy:Num=0x%x by %p (UA)", Num, __builtin_return_address(0));
+       }
+       #if 1
+       else if( Num > 128 && (dst & 15) == (src & 15) )
+       {
+               char    tmp[16+15];     // Note, this is a hack to save/restor xmm0
+                int    count = 16 - (dst & 15);
+//             Debug("\nmemcpy:Num=0x%x by %p (SSE)", Num, __builtin_return_address(0));
+               if( count < 16 )
+               {
+                       Num -= count;
+                       __asm__ __volatile__ ("rep movsb" : "=D"(dst),"=S"(src): "0"(dst), "1"(src), "c"(count));
+               }
+               
+               count = Num / 16;
+               __asm__ __volatile__ (
+                       "movdqa 0(%5), %%xmm0;\n\t"
+                       "1:\n\t"
+                       "movdqa 0(%1), %%xmm0;\n\t"
+                       "movdqa %%xmm0, 0(%0);\n\t"
+                       "add $16,%0;\n\t"
+                       "add $16,%1;\n\t"
+                       "loop 1b;\n\t"
+                       "movdqa %%xmm0, 0(%5);\n\t"
+                       : "=r"(dst),"=r"(src)
+                       : "0"(dst), "1"(src), "c"(count), "r" (((tVAddr)tmp+15)&~15)
+                       );
+
+               count = Num & 15;
+               if(count)
+                       __asm__ __volatile__ ("rep movsb" :: "D"(dst), "S"(src), "c"(count));
+       }
+       #endif
+       else
+       {
+//             Debug("\nmemcpy:Num=0x%x by %p", Num, __builtin_return_address(0));
                __asm__ __volatile__ (
                        "rep movsl;\n\t"
                        "mov %3, %%ecx;\n\t"
@@ -335,6 +327,7 @@ void *memcpy(void *Dest, const void *Src, size_t Num)
        }
        return Dest;
 }
+
 /**
  * \fn void *memcpyd(void *Dest, const void *Src, size_t Num)
  * \brief Copy \a Num DWORDs from \a Src to \a Dest
@@ -345,9 +338,12 @@ void *memcpyd(void *Dest, const void *Src, size_t Num)
        return Dest;
 }
 
+#include "../helpers.h"
+
+DEF_DIVMOD(64);
+
 Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem)
 {
-       Uint64  ret;
        if( Div < 0x100000000ULL && Num < 0xFFFFFFFF * Div ) {
                Uint32  rem, ret_32;
                __asm__ __volatile__(
@@ -359,9 +355,7 @@ Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem)
                return ret_32;
        }
 
-       ret = __udivdi3(Num, Div);
-       if(Rem) *Rem = __umoddi3(Num, Div);
-       return ret;
+       return __divmod64(Num, Div, Rem);
 }
 
 /**
@@ -370,11 +364,10 @@ Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem)
  */
 Uint64 __udivdi3(Uint64 Num, Uint64 Den)
 {
-       Uint64  P[2];
-       Uint64  q = 0;
-        int    i;
-       
-       if(Den == 0)    __asm__ __volatile__ ("int $0x0");
+       if(Den == 0) {
+               __asm__ __volatile__ ("int $0x0");
+               return -1;
+       }
        // Common speedups
        if(Num <= 0xFFFFFFFF && Den <= 0xFFFFFFFF)
                return (Uint32)Num / (Uint32)Den;
@@ -390,46 +383,8 @@ Uint64 __udivdi3(Uint64 Num, Uint64 Den)
        if(Num < Den)   return 0;
        if(Num < Den*2) return 1;
        if(Num == Den*2)        return 2;
-       
-       #if 1
-       i = 0;  // Shut up
-       P[0] = Num;
-       P[1] = Den;
-       __asm__ __volatile__ (
-               "fildq %2\n\t"  // Num
-               "fildq %1\n\t"  // Den
-               "fdivp\n\t"
-               "fistpq %0"
-               : "=m" (q)
-               : "m" (P[0]), "m" (P[1])
-               );
-               
-       //Log("%llx / %llx = %llx\n", Num, Den, q);
-       #else
-       // Restoring division, from wikipedia
-       // http://en.wikipedia.org/wiki/Division_(digital)
-       P[0] = Num;     P[1] = 0;
-       for( i = 64; i--; )
-       {
-               // P <<= 1;
-               P[1] = (P[1] << 1) | (P[0] >> 63);
-               P[0] = P[0] << 1;
-               
-               // P -= Den << 64
-               P[1] -= Den;
-               
-               // P >= 0
-               if( !(P[1] & (1ULL<<63)) ) {
-                       q |= (Uint64)1 << (63-i);
-               }
-               else {
-                       //q |= 0 << (63-i);
-                       P[1] += Den;
-               }
-       }
-       #endif
-       
-       return q;
+
+       return __divmod64(Num, Den, NULL);
 }
 
 /**
@@ -438,7 +393,11 @@ Uint64 __udivdi3(Uint64 Num, Uint64 Den)
  */
 Uint64 __umoddi3(Uint64 Num, Uint64 Den)
 {
-       if(Den == 0)    __asm__ __volatile__ ("int $0x0");      // Call Div by Zero Error
+       Uint64  ret = 0;
+       if(Den == 0) {
+               __asm__ __volatile__ ("int $0x0");      // Call Div by Zero Error
+               return -1;
+       }
        if(Den == 1)    return 0;       // Speed Hacks
        if(Den == 2)    return Num & 1; // Speed Hacks
        if(Den == 4)    return Num & 3; // Speed Hacks
@@ -452,7 +411,8 @@ Uint64 __umoddi3(Uint64 Num, Uint64 Den)
        if(Num >> 32 == 0 && Den >> 32 == 0)
                return (Uint32)Num % (Uint32)Den;
        
-       return Num - __udivdi3(Num, Den) * Den;
+       __divmod64(Num, Den, &ret);
+       return ret;
 }
 
 

UCC git Repository :: git.ucc.asn.au