/*
- * AcessOS Microkernel Version
- * lib.c
+ * Acess2
+ *
+ * arch/x86/lib.c
+ * - General arch-specific stuff
*/
#include <acess.h>
+#include <threads_int.h>
+
+#define TRACE_LOCKS 0
+
+#define DEBUG_TO_E9 1
+#define DEBUG_TO_SERIAL 1
+#define SERIAL_PORT 0x3F8
+#define GDB_SERIAL_PORT 0x2F8
+
+// === IMPRORTS ===
+#if TRACE_LOCKS
+extern struct sShortSpinlock glDebug_Lock;
+extern struct sShortSpinlock glThreadListLock;
+#endif
+extern int GetCPUNum(void);
+
+// === PROTOTYPES ==
+Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
+Uint64 __udivdi3(Uint64 Num, Uint64 Den);
+Uint64 __umoddi3(Uint64 Num, Uint64 Den);
+
+// === GLOBALS ===
+ int gbDebug_SerialSetup = 0;
+ int gbGDB_SerialSetup = 0;
// === CODE ===
-void Spinlock(int *lock)
+/**
+ * \brief Determine if a short spinlock is locked
+ * \param Lock Lock pointer
+ */
+int IS_LOCKED(struct sShortSpinlock *Lock)
+{
+ return !!Lock->Lock;
+}
+
+/**
+ * \brief Check if the current CPU has the lock
+ * \param Lock Lock pointer
+ */
+int CPU_HAS_LOCK(struct sShortSpinlock *Lock)
+{
+ return Lock->Lock == GetCPUNum() + 1;
+}
+
+/**
+ * \brief Acquire a Short Spinlock
+ * \param Lock Lock pointer
+ *
+ * This type of mutex should only be used for very short sections of code,
+ * or in places where a Mutex_* would be overkill, such as appending
+ * an element to linked list (usually two assignement lines in C)
+ *
+ * \note This type of lock halts interrupts, so ensure that no timing
+ * functions are called while it is held. As a matter of fact, spend as
+ * little time as possible with this lock held
+ * \note If \a STACKED_LOCKS is set, this type of spinlock can be nested
+ */
+void SHORTLOCK(struct sShortSpinlock *Lock)
{
int v = 1;
- while(v) __asm__ __volatile__ ("lock xchgl %%eax, (%%edi)":"=a"(v):"a"(1),"D"(lock));
+ int IF;
+ int cpu = GetCPUNum() + 1;
+
+ // Save interrupt state
+ __ASM__ ("pushf;\n\tpop %0" : "=r"(IF));
+ IF &= 0x200; // AND out all but the interrupt flag
+
+ #if TRACE_LOCKS
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+ {
+ //Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
+ Debug("%p obtaining %p (Called by %p)", __builtin_return_address(0), Lock, __builtin_return_address(1));
+ }
+ #endif
+
+ __ASM__("cli");
+
+ // Wait for another CPU to release
+ __ASM__(
+ "1: lock cmpxchgl %2, (%3)\n\t"
+ "jnz 1b"
+ : "=a"(v)
+ : "a"(0), "r"(cpu), "r"(&Lock->Lock)
+ );
+
+ Lock->IF = IF;
+
+ #if TRACE_LOCKS
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+ {
+ //Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
+ //Debug("Lock %p locked by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
+ Debug("got it");
+ }
+ #endif
+}
+/**
+ * \brief Release a short lock
+ * \param Lock Lock pointer
+ */
+void SHORTREL(struct sShortSpinlock *Lock)
+{
+ #if TRACE_LOCKS
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+ {
+ //Log_Log("LOCK", "%p released by %p", Lock, __builtin_return_address(0));
+ Debug("Lock %p released by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
+ }
+ #endif
+
+ // Lock->IF can change anytime once Lock->Lock is zeroed
+ if(Lock->IF) {
+ Lock->Lock = 0;
+ __ASM__ ("sti");
+ }
+ else {
+ Lock->Lock = 0;
+ }
+}
+
+// === DEBUG IO ===
+#if USE_GDB_STUB
+int putDebugChar(char ch)
+{
+ if(!gbGDB_SerialSetup) {
+ outb(GDB_SERIAL_PORT + 1, 0x00); // Disable all interrupts
+ outb(GDB_SERIAL_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
+ outb(GDB_SERIAL_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
+ outb(GDB_SERIAL_PORT + 1, 0x00); // (base is (hi byte)
+ outb(GDB_SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit (8N1)
+ outb(GDB_SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
+ outb(GDB_SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
+ gbGDB_SerialSetup = 1;
+ }
+ while( (inb(GDB_SERIAL_PORT + 5) & 0x20) == 0 );
+ outb(GDB_SERIAL_PORT, ch);
+ return 0;
+}
+int getDebugChar(void)
+{
+ if(!gbGDB_SerialSetup) {
+ outb(GDB_SERIAL_PORT + 1, 0x00); // Disable all interrupts
+ outb(GDB_SERIAL_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
+ outb(GDB_SERIAL_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
+ outb(GDB_SERIAL_PORT + 1, 0x00); // (hi byte)
+ outb(GDB_SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit
+ outb(GDB_SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
+ outb(GDB_SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
+ gbGDB_SerialSetup = 1;
+ }
+ while( (inb(GDB_SERIAL_PORT + 5) & 1) == 0) ;
+ return inb(GDB_SERIAL_PORT);
+}
+#endif /* USE_GDB_STUB */
+
+void Debug_PutCharDebug(char ch)
+{
+ #if DEBUG_TO_E9
+ __asm__ __volatile__ ( "outb %%al, $0xe9" :: "a"(((Uint8)ch)) );
+ #endif
+
+ #if DEBUG_TO_SERIAL
+ if(!gbDebug_SerialSetup) {
+ outb(SERIAL_PORT + 1, 0x00); // Disable all interrupts
+ outb(SERIAL_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
+ outb(SERIAL_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
+ outb(SERIAL_PORT + 1, 0x00); // (hi byte)
+ outb(SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit
+ outb(SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
+ outb(SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
+ gbDebug_SerialSetup = 1;
+ }
+ while( (inb(SERIAL_PORT + 5) & 0x20) == 0 );
+ outb(SERIAL_PORT, ch);
+ #endif
}
-void Release(int *lock)
+void Debug_PutStringDebug(const char *String)
{
- __asm__ __volatile__ ("lock andl $0, (%0)"::"r"(lock));
+ while(*String)
+ Debug_PutCharDebug(*String++);
}
// === IO Commands ===
*/
int memcmp(const void *m1, const void *m2, size_t Num)
{
+ const Uint8 *d1 = m1;
+ const Uint8 *d2 = m2;
+ if( Num == 0 ) return 0; // No bytes are always identical
+
while(Num--)
{
- if(*(Uint8*)m1 != *(Uint8*)m2) break;
- m1 ++;
- m2 ++;
+ if(*d1 != *d2)
+ return *d1 - *d2;
+ d1 ++;
+ d2 ++;
}
- return *(Uint8*)m1 - *(Uint8*)m2;
+ return 0;
}
/**
*/
void *memcpy(void *Dest, const void *Src, size_t Num)
{
- if( ((Uint)Dest & 3) || ((Uint)Src & 3) )
- __asm__ __volatile__ ("rep movsb" :: "D" (Dest), "S" (Src), "c" (Num));
- else {
+ tVAddr dst = (tVAddr)Dest;
+ tVAddr src = (tVAddr)Src;
+ if( (dst & 3) != (src & 3) )
+ {
+ __asm__ __volatile__ ("rep movsb" :: "D" (dst), "S" (src), "c" (Num));
+// Debug("\nmemcpy:Num=0x%x by %p (UA)", Num, __builtin_return_address(0));
+ }
+ #if 1
+ else if( Num > 128 && (dst & 15) == (src & 15) )
+ {
+ char tmp[16+15]; // Note, this is a hack to save/restor xmm0
+ int count = 16 - (dst & 15);
+// Debug("\nmemcpy:Num=0x%x by %p (SSE)", Num, __builtin_return_address(0));
+ if( count < 16 )
+ {
+ Num -= count;
+ __asm__ __volatile__ ("rep movsb" : "=D"(dst),"=S"(src): "0"(dst), "1"(src), "c"(count));
+ }
+
+ count = Num / 16;
+ __asm__ __volatile__ (
+ "movdqa 0(%5), %%xmm0;\n\t"
+ "1:\n\t"
+ "movdqa 0(%1), %%xmm0;\n\t"
+ "movdqa %%xmm0, 0(%0);\n\t"
+ "add $16,%0;\n\t"
+ "add $16,%1;\n\t"
+ "loop 1b;\n\t"
+ "movdqa %%xmm0, 0(%5);\n\t"
+ : "=r"(dst),"=r"(src)
+ : "0"(dst), "1"(src), "c"(count), "r" (((tVAddr)tmp+15)&~15)
+ );
+
+ count = Num & 15;
+ if(count)
+ __asm__ __volatile__ ("rep movsb" :: "D"(dst), "S"(src), "c"(count));
+ }
+ #endif
+ else
+ {
+// Debug("\nmemcpy:Num=0x%x by %p", Num, __builtin_return_address(0));
__asm__ __volatile__ (
"rep movsl;\n\t"
"mov %3, %%ecx;\n\t"
}
return Dest;
}
+
/**
* \fn void *memcpyd(void *Dest, const void *Src, size_t Num)
* \brief Copy \a Num DWORDs from \a Src to \a Dest
return Dest;
}
+#include "../helpers.h"
+
+DEF_DIVMOD(64);
+
+Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem)
+{
+ if( Div < 0x100000000ULL && Num < 0xFFFFFFFF * Div ) {
+ Uint32 rem, ret_32;
+ __asm__ __volatile__(
+ "div %4"
+ : "=a" (ret_32), "=d" (rem)
+ : "a" ( (Uint32)(Num & 0xFFFFFFFF) ), "d" ((Uint32)(Num >> 32)), "r" (Div)
+ );
+ if(Rem) *Rem = rem;
+ return ret_32;
+ }
+
+ return __divmod64(Num, Div, Rem);
+}
+
/**
* \fn Uint64 __udivdi3(Uint64 Num, Uint64 Den)
* \brief Divide two 64-bit integers
*/
Uint64 __udivdi3(Uint64 Num, Uint64 Den)
{
- Uint64 P[2];
- Uint64 q = 0;
- int i;
-
- if(Den == 0) __asm__ __volatile__ ("int $0x0");
+ if(Den == 0) {
+ __asm__ __volatile__ ("int $0x0");
+ return -1;
+ }
// Common speedups
if(Num <= 0xFFFFFFFF && Den <= 0xFFFFFFFF)
return (Uint32)Num / (Uint32)Den;
if(Num < Den) return 0;
if(Num < Den*2) return 1;
if(Num == Den*2) return 2;
-
- // Restoring division, from wikipedia
- // http://en.wikipedia.org/wiki/Division_(digital)
- P[0] = Num; P[1] = 0;
- for( i = 64; i--; )
- {
- // P <<= 1;
- P[1] = (P[1] << 1) | (P[0] >> 63);
- P[0] = P[0] << 1;
-
- // P -= Den << 64
- P[1] -= Den;
-
- // P >= 0
- if( !(P[1] & (1ULL<<63)) ) {
- q |= (Uint64)1 << (63-i);
- }
- else {
- //q |= 0 << (63-i);
- P[1] += Den;
- }
- }
-
- return q;
+
+ return __divmod64(Num, Den, NULL);
}
/**
*/
Uint64 __umoddi3(Uint64 Num, Uint64 Den)
{
- if(Den == 0) __asm__ __volatile__ ("int $0x0"); // Call Div by Zero Error
+ Uint64 ret = 0;
+ if(Den == 0) {
+ __asm__ __volatile__ ("int $0x0"); // Call Div by Zero Error
+ return -1;
+ }
if(Den == 1) return 0; // Speed Hacks
if(Den == 2) return Num & 1; // Speed Hacks
if(Den == 4) return Num & 3; // Speed Hacks
if(Num >> 32 == 0 && Den >> 32 == 0)
return (Uint32)Num % (Uint32)Den;
- return Num - __udivdi3(Num, Den) * Den;
+ __divmod64(Num, Den, &ret);
+ return ret;
}
-Uint16 LittleEndian16(Uint16 Val)
-{
- return Val;
-}
-Uint16 BigEndian16(Uint16 Val)
-{
- return ((Val&0xFF)<<8) | ((Val>>8)&0xFF);
-}
-Uint32 LittleEndian32(Uint32 Val)
-{
- return Val;
-}
-Uint32 BigEndian32(Uint32 Val)
-{
- return ((Val&0xFF)<<24) | ((Val&0xFF00)<<8) | ((Val>>8)&0xFF00) | ((Val>>24)&0xFF);
-}
// --- EXPORTS ---
EXPORT(memcpy); EXPORT(memset);
EXPORT(outb); EXPORT(outw); EXPORT(outd);
EXPORT(__udivdi3); EXPORT(__umoddi3);
-EXPORT(LittleEndian16); EXPORT(BigEndian16);
-EXPORT(LittleEndian32); EXPORT(BigEndian32);
+EXPORT(SHORTLOCK);
+EXPORT(SHORTREL);
+EXPORT(IS_LOCKED);