/*
- * AcessOS Microkernel Version
- * lib.c
+ * Acess2
+ *
+ * arch/x86/lib.c
+ * - General arch-specific stuff
*/
#include <acess.h>
#include <threads_int.h>
// === IMPRORTS ===
#if TRACE_LOCKS
extern struct sShortSpinlock glDebug_Lock;
+extern struct sShortSpinlock glThreadListLock;
#endif
extern int GetCPUNum(void);
// === PROTOTYPES ==
+Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
Uint64 __udivdi3(Uint64 Num, Uint64 Den);
Uint64 __umoddi3(Uint64 Num, Uint64 Den);
*/
int CPU_HAS_LOCK(struct sShortSpinlock *Lock)
{
- #if STACKED_LOCKS == 1
return Lock->Lock == GetCPUNum() + 1;
- #elif STACKED_LOCKS == 2
- return Lock->Lock == Proc_GetCurThread();
- #else
- return 0;
- #endif
}
/**
void SHORTLOCK(struct sShortSpinlock *Lock)
{
int v = 1;
- #if LOCK_DISABLE_INTS
int IF;
- #endif
- #if STACKED_LOCKS == 1
int cpu = GetCPUNum() + 1;
- #elif STACKED_LOCKS == 2
- void *thread = Proc_GetCurThread();
- #endif
- #if LOCK_DISABLE_INTS
// Save interrupt state
__ASM__ ("pushf;\n\tpop %0" : "=r"(IF));
IF &= 0x200; // AND out all but the interrupt flag
- #endif
- #if STACKED_LOCKS == 1
- if( Lock->Lock == cpu ) {
- Lock->Depth ++;
- return ;
- }
- #elif STACKED_LOCKS == 2
- if( Lock->Lock == thread ) {
- Lock->Depth ++;
- return ;
+ #if TRACE_LOCKS
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+ {
+ //Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
+ Debug("%p obtaining %p (Called by %p)", __builtin_return_address(0), Lock, __builtin_return_address(1));
}
#endif
+ __ASM__("cli");
+
// Wait for another CPU to release
- while(v) {
- // CMPXCHG:
- // If r/m32 == EAX, set ZF and set r/m32 = r32
- // Else, clear ZF and set EAX = r/m32
- #if STACKED_LOCKS == 1
- __ASM__("lock cmpxchgl %2, (%3)"
- : "=a"(v)
- : "a"(0), "r"(cpu), "r"(&Lock->Lock)
- );
- #elif STACKED_LOCKS == 2
- __ASM__("lock cmpxchgl %2, (%3)"
- : "=a"(v)
- : "a"(0), "r"(thread), "r"(&Lock->Lock)
- );
- #else
- __ASM__("xchgl %%eax, (%%edi)":"=a"(v):"a"(1),"D"(&Lock->Lock));
- #endif
-
- #if LOCK_DISABLE_INTS
- if( v ) __ASM__("sti"); // Re-enable interrupts
- #endif
- }
+ __ASM__(
+ "1: lock cmpxchgl %2, (%3)\n\t"
+ "jnz 1b"
+ : "=a"(v)
+ : "a"(0), "r"(cpu), "r"(&Lock->Lock)
+ );
- #if LOCK_DISABLE_INTS
- __ASM__("cli");
Lock->IF = IF;
- #endif
#if TRACE_LOCKS
- if( Lock != &glDebug_Lock )
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
{
//Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
- LogF("Lock %p locked by %p\n", Lock, __builtin_return_address(0));
+ //Debug("Lock %p locked by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
+ Debug("got it");
}
#endif
}
*/
void SHORTREL(struct sShortSpinlock *Lock)
{
- #if STACKED_LOCKS
- if( Lock->Depth ) {
- Lock->Depth --;
- return ;
- }
- #endif
-
#if TRACE_LOCKS
- if( Lock != &glDebug_Lock )
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
{
//Log_Log("LOCK", "%p released by %p", Lock, __builtin_return_address(0));
- LogF("Lock %p released by %p\n", Lock, __builtin_return_address(0));
+ Debug("Lock %p released by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
}
#endif
- #if LOCK_DISABLE_INTS
// Lock->IF can change anytime once Lock->Lock is zeroed
if(Lock->IF) {
Lock->Lock = 0;
else {
Lock->Lock = 0;
}
- #else
- Lock->Lock = 0;
- #endif
}
// === DEBUG IO ===
outb(GDB_SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit (8N1)
outb(GDB_SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
outb(GDB_SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
- gbDebug_SerialSetup = 1;
+ gbGDB_SerialSetup = 1;
}
while( (inb(GDB_SERIAL_PORT + 5) & 0x20) == 0 );
outb(GDB_SERIAL_PORT, ch);
outb(GDB_SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit
outb(GDB_SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
outb(GDB_SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
- gbDebug_SerialSetup = 1;
+ gbGDB_SerialSetup = 1;
}
while( (inb(GDB_SERIAL_PORT + 5) & 1) == 0) ;
return inb(GDB_SERIAL_PORT);
*/
void *memcpy(void *Dest, const void *Src, size_t Num)
{
- if( ((Uint)Dest & 3) || ((Uint)Src & 3) )
- __asm__ __volatile__ ("rep movsb" :: "D" (Dest), "S" (Src), "c" (Num));
- else {
+ tVAddr dst = (tVAddr)Dest;
+ tVAddr src = (tVAddr)Src;
+ if( (dst & 3) != (src & 3) )
+ {
+ __asm__ __volatile__ ("rep movsb" :: "D" (dst), "S" (src), "c" (Num));
+// Debug("\nmemcpy:Num=0x%x by %p (UA)", Num, __builtin_return_address(0));
+ }
+ #if 1
+ else if( Num > 128 && (dst & 15) == (src & 15) )
+ {
+ char tmp[16+15]; // Note, this is a hack to save/restor xmm0
+ int count = 16 - (dst & 15);
+// Debug("\nmemcpy:Num=0x%x by %p (SSE)", Num, __builtin_return_address(0));
+ if( count < 16 )
+ {
+ Num -= count;
+ __asm__ __volatile__ ("rep movsb" : "=D"(dst),"=S"(src): "0"(dst), "1"(src), "c"(count));
+ }
+
+ count = Num / 16;
+ __asm__ __volatile__ (
+ "movdqa 0(%5), %%xmm0;\n\t"
+ "1:\n\t"
+ "movdqa 0(%1), %%xmm0;\n\t"
+ "movdqa %%xmm0, 0(%0);\n\t"
+ "add $16,%0;\n\t"
+ "add $16,%1;\n\t"
+ "loop 1b;\n\t"
+ "movdqa %%xmm0, 0(%5);\n\t"
+ : "=r"(dst),"=r"(src)
+ : "0"(dst), "1"(src), "c"(count), "r" (((tVAddr)tmp+15)&~15)
+ );
+
+ count = Num & 15;
+ if(count)
+ __asm__ __volatile__ ("rep movsb" :: "D"(dst), "S"(src), "c"(count));
+ }
+ #endif
+ else
+ {
+// Debug("\nmemcpy:Num=0x%x by %p", Num, __builtin_return_address(0));
__asm__ __volatile__ (
"rep movsl;\n\t"
"mov %3, %%ecx;\n\t"
}
return Dest;
}
+
/**
* \fn void *memcpyd(void *Dest, const void *Src, size_t Num)
* \brief Copy \a Num DWORDs from \a Src to \a Dest
return Dest;
}
+#include "../helpers.h"
+
+DEF_DIVMOD(64);
+
+Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem)
+{
+ if( Div < 0x100000000ULL && Num < 0xFFFFFFFF * Div ) {
+ Uint32 rem, ret_32;
+ __asm__ __volatile__(
+ "div %4"
+ : "=a" (ret_32), "=d" (rem)
+ : "a" ( (Uint32)(Num & 0xFFFFFFFF) ), "d" ((Uint32)(Num >> 32)), "r" (Div)
+ );
+ if(Rem) *Rem = rem;
+ return ret_32;
+ }
+
+ return __divmod64(Num, Div, Rem);
+}
+
/**
* \fn Uint64 __udivdi3(Uint64 Num, Uint64 Den)
* \brief Divide two 64-bit integers
*/
Uint64 __udivdi3(Uint64 Num, Uint64 Den)
{
- Uint64 P[2];
- Uint64 q = 0;
- int i;
-
- if(Den == 0) __asm__ __volatile__ ("int $0x0");
+ if(Den == 0) {
+ __asm__ __volatile__ ("int $0x0");
+ return -1;
+ }
// Common speedups
if(Num <= 0xFFFFFFFF && Den <= 0xFFFFFFFF)
return (Uint32)Num / (Uint32)Den;
if(Num < Den) return 0;
if(Num < Den*2) return 1;
if(Num == Den*2) return 2;
-
- #if 1
- i = 0; // Shut up
- P[0] = Num;
- P[1] = Den;
- __asm__ __volatile__ (
- "fildq %2\n\t" // Num
- "fildq %1\n\t" // Den
- "fdivp\n\t"
- "fistpq %0"
- : "=m" (q)
- : "m" (P[0]), "m" (P[1])
- );
-
- //Log("%llx / %llx = %llx\n", Num, Den, q);
- #else
- // Restoring division, from wikipedia
- // http://en.wikipedia.org/wiki/Division_(digital)
- P[0] = Num; P[1] = 0;
- for( i = 64; i--; )
- {
- // P <<= 1;
- P[1] = (P[1] << 1) | (P[0] >> 63);
- P[0] = P[0] << 1;
-
- // P -= Den << 64
- P[1] -= Den;
-
- // P >= 0
- if( !(P[1] & (1ULL<<63)) ) {
- q |= (Uint64)1 << (63-i);
- }
- else {
- //q |= 0 << (63-i);
- P[1] += Den;
- }
- }
- #endif
-
- return q;
+
+ return __divmod64(Num, Den, NULL);
}
/**
*/
Uint64 __umoddi3(Uint64 Num, Uint64 Den)
{
- if(Den == 0) __asm__ __volatile__ ("int $0x0"); // Call Div by Zero Error
+ Uint64 ret = 0;
+ if(Den == 0) {
+ __asm__ __volatile__ ("int $0x0"); // Call Div by Zero Error
+ return -1;
+ }
if(Den == 1) return 0; // Speed Hacks
if(Den == 2) return Num & 1; // Speed Hacks
if(Den == 4) return Num & 3; // Speed Hacks
if(Num >> 32 == 0 && Den >> 32 == 0)
return (Uint32)Num % (Uint32)Den;
- return Num - __udivdi3(Num, Den) * Den;
+ __divmod64(Num, Den, &ret);
+ return ret;
}
-Uint16 LittleEndian16(Uint16 Val)
-{
- return Val;
-}
-Uint16 BigEndian16(Uint16 Val)
-{
- return ((Val&0xFF)<<8) | ((Val>>8)&0xFF);
-}
-Uint32 LittleEndian32(Uint32 Val)
-{
- return Val;
-}
-Uint32 BigEndian32(Uint32 Val)
-{
- return ((Val&0xFF)<<24) | ((Val&0xFF00)<<8) | ((Val>>8)&0xFF00) | ((Val>>24)&0xFF);
-}
// --- EXPORTS ---
EXPORT(memcpy); EXPORT(memset);
EXPORT(outb); EXPORT(outw); EXPORT(outd);
EXPORT(__udivdi3); EXPORT(__umoddi3);
-EXPORT(LittleEndian16); EXPORT(BigEndian16);
-EXPORT(LittleEndian32); EXPORT(BigEndian32);
-
EXPORT(SHORTLOCK);
EXPORT(SHORTREL);
EXPORT(IS_LOCKED);