/*
- * AcessOS Microkernel Version
- * lib.c
+ * Acess2
+ *
+ * arch/x86/lib.c
+ * - General arch-specific stuff
*/
-#include <common.h>
+#include <acess.h>
+#include <threads_int.h>
+
+#define TRACE_LOCKS 0
+
+#define DEBUG_TO_E9 1
+#define DEBUG_TO_SERIAL 1
+#define SERIAL_PORT 0x3F8
+#define GDB_SERIAL_PORT 0x2F8
+
+// === IMPRORTS ===
+#if TRACE_LOCKS
+extern struct sShortSpinlock glDebug_Lock;
+extern struct sShortSpinlock glThreadListLock;
+#endif
+extern int GetCPUNum(void);
+
+// === PROTOTYPES ==
+Uint64 __udivdi3(Uint64 Num, Uint64 Den);
+Uint64 __umoddi3(Uint64 Num, Uint64 Den);
+
+// === GLOBALS ===
+ int gbDebug_SerialSetup = 0;
+ int gbGDB_SerialSetup = 0;
// === CODE ===
-void Spinlock(int *lock)
+/**
+ * \brief Determine if a short spinlock is locked
+ * \param Lock Lock pointer
+ */
+int IS_LOCKED(struct sShortSpinlock *Lock)
+{
+ return !!Lock->Lock;
+}
+
+/**
+ * \brief Check if the current CPU has the lock
+ * \param Lock Lock pointer
+ */
+int CPU_HAS_LOCK(struct sShortSpinlock *Lock)
+{
+ #if STACKED_LOCKS == 1
+ return Lock->Lock == GetCPUNum() + 1;
+ #elif STACKED_LOCKS == 2
+ return Lock->Lock == Proc_GetCurThread();
+ #else
+ return 0;
+ #endif
+}
+
+/**
+ * \brief Acquire a Short Spinlock
+ * \param Lock Lock pointer
+ *
+ * This type of mutex should only be used for very short sections of code,
+ * or in places where a Mutex_* would be overkill, such as appending
+ * an element to linked list (usually two assignement lines in C)
+ *
+ * \note This type of lock halts interrupts, so ensure that no timing
+ * functions are called while it is held. As a matter of fact, spend as
+ * little time as possible with this lock held
+ * \note If \a STACKED_LOCKS is set, this type of spinlock can be nested
+ */
+void SHORTLOCK(struct sShortSpinlock *Lock)
{
int v = 1;
- while(v) __asm__ __volatile__ ("lock xchgl %%eax, (%%edi)":"=a"(v):"a"(1),"D"(lock));
+ #if LOCK_DISABLE_INTS
+ int IF;
+ #endif
+ #if STACKED_LOCKS == 1
+ int cpu = GetCPUNum() + 1;
+ #elif STACKED_LOCKS == 2
+ void *thread = Proc_GetCurThread();
+ #endif
+
+ #if LOCK_DISABLE_INTS
+ // Save interrupt state
+ __ASM__ ("pushf;\n\tpop %0" : "=r"(IF));
+ IF &= 0x200; // AND out all but the interrupt flag
+ #endif
+
+ #if STACKED_LOCKS == 1
+ if( Lock->Lock == cpu ) {
+ Lock->Depth ++;
+ return ;
+ }
+ #elif STACKED_LOCKS == 2
+ if( Lock->Lock == thread ) {
+ Lock->Depth ++;
+ return ;
+ }
+ #endif
+
+ #if TRACE_LOCKS
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+ {
+ //Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
+ Debug("%p obtaining %p (Called by %p)", __builtin_return_address(0), Lock, __builtin_return_address(1));
+ }
+ #endif
+
+ // Wait for another CPU to release
+ while(v) {
+ // CMPXCHG:
+ // If r/m32 == EAX, set ZF and set r/m32 = r32
+ // Else, clear ZF and set EAX = r/m32
+ #if STACKED_LOCKS == 1
+ __ASM__("lock cmpxchgl %2, (%3)"
+ : "=a"(v)
+ : "a"(0), "r"(cpu), "r"(&Lock->Lock)
+ );
+ #elif STACKED_LOCKS == 2
+ __ASM__("lock cmpxchgl %2, (%3)"
+ : "=a"(v)
+ : "a"(0), "r"(thread), "r"(&Lock->Lock)
+ );
+ #else
+ __ASM__("xchgl %%eax, (%%edi)":"=a"(v):"a"(1),"D"(&Lock->Lock));
+ #endif
+
+ #if LOCK_DISABLE_INTS
+ if( v ) __ASM__("sti"); // Re-enable interrupts
+ #endif
+ }
+
+ #if LOCK_DISABLE_INTS
+ __ASM__("cli");
+ Lock->IF = IF;
+ #endif
+
+ #if TRACE_LOCKS
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+ {
+ //Log_Log("LOCK", "%p locked by %p", Lock, __builtin_return_address(0));
+ //Debug("Lock %p locked by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
+ Debug("got it");
+ }
+ #endif
+}
+/**
+ * \brief Release a short lock
+ * \param Lock Lock pointer
+ */
+void SHORTREL(struct sShortSpinlock *Lock)
+{
+ #if STACKED_LOCKS
+ if( Lock->Depth ) {
+ Lock->Depth --;
+ return ;
+ }
+ #endif
+
+ #if TRACE_LOCKS
+ if( Lock != &glDebug_Lock && Lock != &glThreadListLock )
+ {
+ //Log_Log("LOCK", "%p released by %p", Lock, __builtin_return_address(0));
+ Debug("Lock %p released by %p\t%p", Lock, __builtin_return_address(0), __builtin_return_address(1));
+ }
+ #endif
+
+ #if LOCK_DISABLE_INTS
+ // Lock->IF can change anytime once Lock->Lock is zeroed
+ if(Lock->IF) {
+ Lock->Lock = 0;
+ __ASM__ ("sti");
+ }
+ else {
+ Lock->Lock = 0;
+ }
+ #else
+ Lock->Lock = 0;
+ #endif
+}
+
+// === DEBUG IO ===
+#if USE_GDB_STUB
+int putDebugChar(char ch)
+{
+ if(!gbGDB_SerialSetup) {
+ outb(GDB_SERIAL_PORT + 1, 0x00); // Disable all interrupts
+ outb(GDB_SERIAL_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
+ outb(GDB_SERIAL_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
+ outb(GDB_SERIAL_PORT + 1, 0x00); // (base is (hi byte)
+ outb(GDB_SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit (8N1)
+ outb(GDB_SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
+ outb(GDB_SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
+ gbGDB_SerialSetup = 1;
+ }
+ while( (inb(GDB_SERIAL_PORT + 5) & 0x20) == 0 );
+ outb(GDB_SERIAL_PORT, ch);
+ return 0;
+}
+int getDebugChar(void)
+{
+ if(!gbGDB_SerialSetup) {
+ outb(GDB_SERIAL_PORT + 1, 0x00); // Disable all interrupts
+ outb(GDB_SERIAL_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
+ outb(GDB_SERIAL_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
+ outb(GDB_SERIAL_PORT + 1, 0x00); // (hi byte)
+ outb(GDB_SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit
+ outb(GDB_SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
+ outb(GDB_SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
+ gbGDB_SerialSetup = 1;
+ }
+ while( (inb(GDB_SERIAL_PORT + 5) & 1) == 0) ;
+ return inb(GDB_SERIAL_PORT);
+}
+#endif /* USE_GDB_STUB */
+
+void Debug_PutCharDebug(char ch)
+{
+ #if DEBUG_TO_E9
+ __asm__ __volatile__ ( "outb %%al, $0xe9" :: "a"(((Uint8)ch)) );
+ #endif
+
+ #if DEBUG_TO_SERIAL
+ if(!gbDebug_SerialSetup) {
+ outb(SERIAL_PORT + 1, 0x00); // Disable all interrupts
+ outb(SERIAL_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
+ outb(SERIAL_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
+ outb(SERIAL_PORT + 1, 0x00); // (hi byte)
+ outb(SERIAL_PORT + 3, 0x03); // 8 bits, no parity, one stop bit
+ outb(SERIAL_PORT + 2, 0xC7); // Enable FIFO with 14-byte threshold and clear it
+ outb(SERIAL_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
+ gbDebug_SerialSetup = 1;
+ }
+ while( (inb(SERIAL_PORT + 5) & 0x20) == 0 );
+ outb(SERIAL_PORT, ch);
+ #endif
}
-void Release(int *lock)
+void Debug_PutStringDebug(const char *String)
{
- __asm__ __volatile__ ("lock andl $0, (%0)"::"r"(lock));
+ while(*String)
+ Debug_PutCharDebug(*String++);
}
// === IO Commands ===
}
/**
- * \fn void *memset(void *Dest, int Val, Uint Num)
- * \brief Do a byte set of Dest
+ * \fn void *memset(void *Dest, int Val, size_t Num)
+ * \brief Do a byte granuality set of Dest
*/
-void *memset(void *Dest, int Val, Uint Num)
+void *memset(void *Dest, int Val, size_t Num)
{
- __asm__ __volatile__ ("rep stosb" :: "D" (Dest), "a" (Val), "c" (Num));
+ Uint32 val = Val&0xFF;
+ val |= val << 8;
+ val |= val << 16;
+ __asm__ __volatile__ (
+ "rep stosl;\n\t"
+ "mov %3, %%ecx;\n\t"
+ "rep stosb"
+ :: "D" (Dest), "a" (val), "c" (Num/4), "r" (Num&3));
return Dest;
}
/**
- * \fn void *memsetd(void *Dest, Uint Val, Uint Num)
+ * \brief Set double words
*/
-void *memsetd(void *Dest, Uint Val, Uint Num)
+void *memsetd(void *Dest, Uint32 Val, size_t Num)
{
__asm__ __volatile__ ("rep stosl" :: "D" (Dest), "a" (Val), "c" (Num));
return Dest;
}
+/**
+ * \fn int memcmp(const void *m1, const void *m2, size_t Num)
+ * \brief Compare two pieces of memory
+ */
+int memcmp(const void *m1, const void *m2, size_t Num)
+{
+ const Uint8 *d1 = m1;
+ const Uint8 *d2 = m2;
+ if( Num == 0 ) return 0; // No bytes are always identical
+
+ while(Num--)
+ {
+ if(*d1 != *d2)
+ return *d1 - *d2;
+ d1 ++;
+ d2 ++;
+ }
+ return 0;
+}
/**
- * \fn void *memcpy(void *Dest, void *Src, Uint Num)
+ * \fn void *memcpy(void *Dest, const void *Src, size_t Num)
+ * \brief Copy \a Num bytes from \a Src to \a Dest
*/
-void *memcpy(void *Dest, void *Src, Uint Num)
+void *memcpy(void *Dest, const void *Src, size_t Num)
{
- __asm__ __volatile__ ("rep movsb" :: "D" (Dest), "S" (Src), "c" (Num));
+ if( ((Uint)Dest & 3) || ((Uint)Src & 3) )
+ __asm__ __volatile__ ("rep movsb" :: "D" (Dest), "S" (Src), "c" (Num));
+ else {
+ __asm__ __volatile__ (
+ "rep movsl;\n\t"
+ "mov %3, %%ecx;\n\t"
+ "rep movsb"
+ :: "D" (Dest), "S" (Src), "c" (Num/4), "r" (Num&3));
+ }
return Dest;
}
/**
- * \fn void *memcpyd(void *Dest, void *Src, Uint Num)
+ * \fn void *memcpyd(void *Dest, const void *Src, size_t Num)
+ * \brief Copy \a Num DWORDs from \a Src to \a Dest
*/
-void *memcpyd(void *Dest, void *Src, Uint Num)
+void *memcpyd(void *Dest, const void *Src, size_t Num)
{
__asm__ __volatile__ ("rep movsl" :: "D" (Dest), "S" (Src), "c" (Num));
return Dest;
}
+Uint64 DivMod64U(Uint64 Num, Uint64 Div, Uint64 *Rem)
+{
+ Uint64 ret;
+ if( Div < 0x100000000ULL && Num < 0xFFFFFFFF * Div ) {
+ Uint32 rem, ret_32;
+ __asm__ __volatile__(
+ "div %4"
+ : "=a" (ret_32), "=d" (rem)
+ : "a" ( (Uint32)(Num & 0xFFFFFFFF) ), "d" ((Uint32)(Num >> 32)), "r" (Div)
+ );
+ if(Rem) *Rem = rem;
+ return ret_32;
+ }
+
+ ret = __udivdi3(Num, Div);
+ if(Rem) *Rem = __umoddi3(Num, Div);
+ return ret;
+}
+
/**
* \fn Uint64 __udivdi3(Uint64 Num, Uint64 Den)
* \brief Divide two 64-bit integers
*/
Uint64 __udivdi3(Uint64 Num, Uint64 Den)
{
- Uint64 ret = 0;
+ Uint64 P[2];
+ Uint64 q = 0;
+ int i;
- if(Den == 0) __asm__ __volatile__ ("int $0x0"); // Call Div by Zero Error
- if(Den == 1) return Num; // Speed Hacks
+ if(Den == 0) __asm__ __volatile__ ("int $0x0");
+ // Common speedups
+ if(Num <= 0xFFFFFFFF && Den <= 0xFFFFFFFF)
+ return (Uint32)Num / (Uint32)Den;
+ if(Den == 1) return Num;
if(Den == 2) return Num >> 1; // Speed Hacks
if(Den == 4) return Num >> 2; // Speed Hacks
if(Den == 8) return Num >> 3; // Speed Hacks
if(Den == 32) return Num >> 5; // Speed Hacks
if(Den == 1024) return Num >> 10; // Speed Hacks
if(Den == 2048) return Num >> 11; // Speed Hacks
+ if(Den == 4096) return Num >> 12;
+ if(Num < Den) return 0;
+ if(Num < Den*2) return 1;
+ if(Num == Den*2) return 2;
- if(Num >> 32 == 0 && Den >> 32 == 0)
- return (Uint32)Num / (Uint32)Den;
-
- //Log("__udivdi3: (Num={0x%x:%x}, Den={0x%x:%x})",
- // Num>>32, Num&0xFFFFFFFF,
- // Den>>32, Den&0xFFFFFFFF);
-
- while(Num > Den) {
- ret ++;
- Num -= Den;
+ #if 1
+ i = 0; // Shut up
+ P[0] = Num;
+ P[1] = Den;
+ __asm__ __volatile__ (
+ "fildq %2\n\t" // Num
+ "fildq %1\n\t" // Den
+ "fdivp\n\t"
+ "fistpq %0"
+ : "=m" (q)
+ : "m" (P[0]), "m" (P[1])
+ );
+
+ //Log("%llx / %llx = %llx\n", Num, Den, q);
+ #else
+ // Restoring division, from wikipedia
+ // http://en.wikipedia.org/wiki/Division_(digital)
+ P[0] = Num; P[1] = 0;
+ for( i = 64; i--; )
+ {
+ // P <<= 1;
+ P[1] = (P[1] << 1) | (P[0] >> 63);
+ P[0] = P[0] << 1;
+
+ // P -= Den << 64
+ P[1] -= Den;
+
+ // P >= 0
+ if( !(P[1] & (1ULL<<63)) ) {
+ q |= (Uint64)1 << (63-i);
+ }
+ else {
+ //q |= 0 << (63-i);
+ P[1] += Den;
+ }
}
- return ret;
+ #endif
+
+ return q;
}
/**
if(Den == 32) return Num & 31; // Speed Hacks
if(Den == 1024) return Num & 1023; // Speed Hacks
if(Den == 2048) return Num & 2047; // Speed Hacks
+ if(Den == 4096) return Num & 4095; // Speed Hacks
if(Num >> 32 == 0 && Den >> 32 == 0)
return (Uint32)Num % (Uint32)Den;
- while(Num > Den)
- Num -= Den;
- return Num;
+ return Num - __udivdi3(Num, Den) * Den;
}
+
// --- EXPORTS ---
EXPORT(memcpy); EXPORT(memset);
+EXPORT(memcmp);
//EXPORT(memcpyw); EXPORT(memsetw);
EXPORT(memcpyd); EXPORT(memsetd);
EXPORT(inb); EXPORT(inw); EXPORT(ind);
EXPORT(outb); EXPORT(outw); EXPORT(outd);
EXPORT(__udivdi3); EXPORT(__umoddi3);
+
+EXPORT(SHORTLOCK);
+EXPORT(SHORTREL);
+EXPORT(IS_LOCKED);