Kernel/x86_64 - Separated task switching from timer interrupt
[tpg/acess2.git] / Kernel / arch / x86_64 / mm_virt.c
index ae8784b..2c91aed 100644 (file)
@@ -11,6 +11,7 @@
 
 // === CONSTANTS ===
 #define PHYS_BITS      52      // TODO: Move out
+#define VIRT_BITS      48
 
 #define PML4_SHIFT     39
 #define PDP_SHIFT      30
 #define PTAB_SHIFT     12
 
 #define        PADDR_MASK      0x7FFFFFFF##FFFFF000
-#define PAGE_MASK      (((Uint)1 << 36)-1)
-#define TABLE_MASK     (((Uint)1 << 27)-1)
-#define PDP_MASK       (((Uint)1 << 18)-1)
-#define PML4_MASK      (((Uint)1 << 9)-1)
+#define PAGE_MASK      ((1LL << 36)-1)
+#define TABLE_MASK     ((1LL << 27)-1)
+#define PDP_MASK       ((1LL << 18)-1)
+#define PML4_MASK      ((1LL << 9)-1)
 
 #define        PF_PRESENT      0x001
 #define        PF_WRITE        0x002
 #define        PF_USER         0x004
-#define        PF_LARGE        0x000
+#define        PF_LARGE        0x080
+#define        PF_GLOBAL       0x100
 #define        PF_COW          0x200
 #define        PF_PAGED        0x400
 #define        PF_NX           0x80000000##00000000
 
 // === MACROS ===
-#define PAGETABLE(idx) (*((tPAddr*)MM_FRACTAL_BASE+((idx)&PAGE_MASK)))
+#define PAGETABLE(idx) (*((Uint64*)MM_FRACTAL_BASE+((idx)&PAGE_MASK)))
 #define PAGEDIR(idx)   PAGETABLE((MM_FRACTAL_BASE>>12)+((idx)&TABLE_MASK))
 #define PAGEDIRPTR(idx)        PAGEDIR((MM_FRACTAL_BASE>>21)+((idx)&PDP_MASK))
 #define PAGEMAPLVL4(idx)       PAGEDIRPTR((MM_FRACTAL_BASE>>30)+((idx)&PML4_MASK))
 
 #define TMPCR3()       PAGEMAPLVL4(MM_TMPFRAC_BASE>>39)
-#define TMPTABLE(idx)  (*((tPAddr*)MM_TMPFRAC_BASE+((idx)&PAGE_MASK)))
+#define TMPTABLE(idx)  (*((Uint64*)MM_TMPFRAC_BASE+((idx)&PAGE_MASK)))
 #define TMPDIR(idx)    PAGETABLE((MM_TMPFRAC_BASE>>12)+((idx)&TABLE_MASK))
 #define TMPDIRPTR(idx) PAGEDIR((MM_TMPFRAC_BASE>>21)+((idx)&PDP_MASK))
 #define TMPMAPLVL4(idx)        PAGEDIRPTR((MM_TMPFRAC_BASE>>30)+((idx)&PML4_MASK))
@@ -53,6 +55,7 @@
 // === IMPORTS ===
 extern void    Error_Backtrace(Uint IP, Uint BP);
 extern tPAddr  gInitialPML4[512];
+extern void    Threads_SegFault(tVAddr Addr);
 
 // === PROTOTYPES ===
 void   MM_InitVirt(void);
@@ -72,7 +75,7 @@ tMutex        glMM_TempFractalLock;
 // === CODE ===
 void MM_InitVirt(void)
 {
-       MM_DumpTables(0, -1L);
+//     MM_DumpTables(0, -1L);
 }
 
 void MM_FinishVirtualInit(void)
@@ -86,24 +89,35 @@ void MM_FinishVirtualInit(void)
 void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
 {
        // TODO: Implement Copy-on-Write
-       #if 0
-       if( gaPageDir  [Addr>>22] & PF_PRESENT
-        && gaPageTable[Addr>>12] & PF_PRESENT
-        && gaPageTable[Addr>>12] & PF_COW )
+       #if 1
+       if( PAGEMAPLVL4(Addr>>39) & PF_PRESENT
+        && PAGEDIRPTR (Addr>>30) & PF_PRESENT
+        && PAGEDIR    (Addr>>21) & PF_PRESENT
+        && PAGETABLE  (Addr>>12) & PF_PRESENT
+        && PAGETABLE  (Addr>>12) & PF_COW )
        {
                tPAddr  paddr;
-               if(MM_GetRefCount( gaPageTable[Addr>>12] & PADDR_MASK ) == 1)
+               if(MM_GetRefCount( PAGETABLE(Addr>>12) & PADDR_MASK ) == 1)
                {
-                       gaPageTable[Addr>>12] &= ~PF_COW;
-                       gaPageTable[Addr>>12] |= PF_PRESENT|PF_WRITE;
+                       PAGETABLE(Addr>>12) &= ~PF_COW;
+                       PAGETABLE(Addr>>12) |= PF_PRESENT|PF_WRITE;
                }
                else
                {
                        //Log("MM_PageFault: COW - MM_DuplicatePage(0x%x)", Addr);
-                       paddr = MM_DuplicatePage( Addr );
-                       MM_DerefPhys( gaPageTable[Addr>>12] & PADDR_MASK );
-                       gaPageTable[Addr>>12] &= PF_USER;
-                       gaPageTable[Addr>>12] |= paddr|PF_PRESENT|PF_WRITE;
+                       paddr = MM_AllocPhys();
+                       if( !paddr ) {
+                               Threads_SegFault(Addr);
+                               return ;
+                       }
+                       {
+                               void    *tmp = (void*)MM_MapTemp(paddr);
+                               memcpy( tmp, (void*)(Addr & ~0xFFF), 0x1000 );
+                               MM_FreeTemp( (tVAddr)tmp );
+                       }
+                       MM_DerefPhys( PAGETABLE(Addr>>12) & PADDR_MASK );
+                       PAGETABLE(Addr>>12) &= PF_USER;
+                       PAGETABLE(Addr>>12) |= paddr|PF_PRESENT|PF_WRITE;
                }
                
                INVLPG( Addr & ~0xFFF );
@@ -113,16 +127,15 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
        
        // If it was a user, tell the thread handler
        if(ErrorCode & 4) {
-               Warning("%s %s %s memory%s",
-                       (ErrorCode&4?"User":"Kernel"),
+               Warning("User %s %s memory%s",
                        (ErrorCode&2?"write to":"read from"),
                        (ErrorCode&1?"bad/locked":"non-present"),
                        (ErrorCode&16?" (Instruction Fetch)":"")
                        );
-               Warning("User Pagefault: Instruction at %04x:%08x accessed %p",
+               Warning("User Pagefault: Instruction at %04x:%p accessed %p",
                        Regs->CS, Regs->RIP, Addr);
                __asm__ __volatile__ ("sti");   // Restart IRQs
-//             Threads_SegFault(Addr);
+               Threads_SegFault(Addr);
                return ;
        }
        
@@ -133,8 +146,7 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
                Warning("Reserved Bits Trashed!");
        else
        {
-               Warning("%s %s %s memory%s",
-                       (ErrorCode&4?"User":"Kernel"),
+               Warning("Kernel %s %s memory%s",
                        (ErrorCode&2?"write to":"read from"),
                        (ErrorCode&1?"bad/locked":"non-present"),
                        (ErrorCode&16?" (Instruction Fetch)":"")
@@ -158,7 +170,7 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
 void MM_DumpTables(tVAddr Start, tVAddr End)
 {
        #define CANOICAL(addr)  ((addr)&0x800000000000?(addr)|0xFFFF000000000000:(addr))
-       const tPAddr    CHANGEABLE_BITS = 0xFF8;
+       const tPAddr    CHANGEABLE_BITS = ~(PF_PRESENT|PF_WRITE|PF_USER|PF_COW|PF_PAGED) & 0xFFF;
        const tPAddr    MASK = ~CHANGEABLE_BITS;        // Physical address and access bits
        tVAddr  rangeStart = 0;
        tPAddr  expected = CHANGEABLE_BITS;     // CHANGEABLE_BITS is used because it's not a vaild value
@@ -184,15 +196,15 @@ void MM_DumpTables(tVAddr Start, tVAddr End)
                //Debug("&PAGETABLE(%i page) = %p", page, &PAGETABLE(page));
                
                // End of a range
-               if(
-                       !(PAGEMAPLVL4(page>>27) & PF_PRESENT)
-               ||      !(PAGEDIRPTR(page>>18) & PF_PRESENT)
-               ||      !(PAGEDIR(page>>9) & PF_PRESENT)
-               ||  !(PAGETABLE(page) & PF_PRESENT)
-               ||  (PAGETABLE(page) & MASK) != expected)
+               if(!(PAGEMAPLVL4(page>>27) & PF_PRESENT)
+               || !(PAGEDIRPTR(page>>18) & PF_PRESENT)
+               || !(PAGEDIR(page>>9) & PF_PRESENT)
+               || !(PAGETABLE(page) & PF_PRESENT)
+               || (PAGETABLE(page) & MASK) != expected)
                {                       
-                       if(expected != CHANGEABLE_BITS) {
-                               Log("%016llx => %013llx : 0x%6llx (%c%c%c%c)",
+                       if(expected != CHANGEABLE_BITS)
+                       {
+                               Log("%016llx => %13llx : 0x%6llx (%c%c%c%c)",
                                        CANOICAL(rangeStart),
                                        PAGETABLE(rangeStart>>12) & PADDR_MASK,
                                        curPos - rangeStart,
@@ -231,7 +243,7 @@ void MM_DumpTables(tVAddr Start, tVAddr End)
        }
        
        if(expected != CHANGEABLE_BITS) {
-               Log("%016llx => %013llx : 0x%6llx (%c%c%c%c)",
+               Log("%016llx => %13llx : 0x%6llx (%c%c%c%c)",
                        CANOICAL(rangeStart),
                        PAGETABLE(rangeStart>>12) & PADDR_MASK,
                        curPos - rangeStart,
@@ -262,6 +274,8 @@ int MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage,
        const int       nADDR_SIZES = sizeof(ADDR_SIZES)/sizeof(ADDR_SIZES[0]);
         int    i;
        
+       #define BITMASK(bits)   ( (1LL << (bits))-1 )
+
        if( bTemp )
        {
                pmlevels[3] = &TMPTABLE(0);     // Page Table
@@ -272,9 +286,9 @@ int MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage,
        else
        {
                pmlevels[3] = (void*)MM_FRACTAL_BASE;   // Page Table
-               pmlevels[2] = &pmlevels[3][(MM_FRACTAL_BASE>>12)&PAGE_MASK];    // PDIR
-               pmlevels[1] = &pmlevels[2][(MM_FRACTAL_BASE>>21)&TABLE_MASK];   // PDPT
-               pmlevels[0] = &pmlevels[1][(MM_FRACTAL_BASE>>30)&PDP_MASK];     // PML4
+               pmlevels[2] = &pmlevels[3][(MM_FRACTAL_BASE>>12)&BITMASK(VIRT_BITS-12)];        // PDIR
+               pmlevels[1] = &pmlevels[2][(MM_FRACTAL_BASE>>21)&BITMASK(VIRT_BITS-21)];        // PDPT
+               pmlevels[0] = &pmlevels[1][(MM_FRACTAL_BASE>>30)&BITMASK(VIRT_BITS-30)];        // PML4
        }
        
        // Mask address
@@ -294,11 +308,15 @@ int MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage,
                if( !(pmlevels[i][Addr >> ADDR_SIZES[i]] & 1) )
                {
                        if( !bAllocate )        return -4;      // If allocation is not requested, error
-                       tmp = MM_AllocPhys();
-                       if(!tmp)        return -2;
+                       if( !(tmp = MM_AllocPhys()) )   return -2;
                        pmlevels[i][Addr >> ADDR_SIZES[i]] = tmp | 3;
+                       if( Addr < 0x800000000000 )
+                               pmlevels[i][Addr >> ADDR_SIZES[i]] |= PF_USER;
                        INVLPG( &pmlevels[i+1][ (Addr>>ADDR_SIZES[i])*512 ] );
                        memset( &pmlevels[i+1][ (Addr>>ADDR_SIZES[i])*512 ], 0, 0x1000 );
+                       LOG("Init PML%i ent 0x%x %p with %P", 4 - i,
+                               Addr>>ADDR_SIZES[i],
+                               (Addr>>ADDR_SIZES[i])<<ADDR_SIZES[i], tmp);
                }
                // Catch large pages
                else if( pmlevels[i][Addr >> ADDR_SIZES[i]] & PF_LARGE )
@@ -336,7 +354,10 @@ int MM_MapEx(tVAddr VAddr, tPAddr PAddr, BOOL bTemp, BOOL bLarge)
        if( *ent & 1 )  LEAVE_RET('i', 0);
        
        *ent = PAddr | 3;
-       
+
+       if( VAddr < 0x800000000000 )
+               *ent |= PF_USER;
+
        INVLPG( VAddr );
 
        LEAVE('i', 1);  
@@ -446,6 +467,8 @@ tPAddr MM_GetPhysAddr(tVAddr Addr)
        ret = MM_GetPageEntryPtr(Addr, 0, 0, 0, &ptr);
        if( ret < 0 )   return 0;
        
+       if( !(*ptr & 1) )       return 0;
+       
        return (*ptr & PADDR_MASK) | (Addr & 0xFFF);
 }
 
@@ -634,13 +657,29 @@ tVAddr MM_AllocDMA(int Pages, int MaxBits, tPAddr *PhysAddr)
 // --- Tempory Mappings ---
 tVAddr MM_MapTemp(tPAddr PAddr)
 {
-       Log_KernelPanic("MM", "TODO: Implement MM_MapTemp");
+       const int max_slots = (MM_TMPMAP_END - MM_TMPMAP_BASE) / PAGE_SIZE;
+       tVAddr  ret = MM_TMPMAP_BASE;
+        int    i;
+       
+       for( i = 0; i < max_slots; i ++, ret += PAGE_SIZE )
+       {
+               tPAddr  *ent;
+               if( MM_GetPageEntryPtr( ret, 0, 1, 0, &ent) < 0 ) {
+                       continue ;
+               }
+
+               if( *ent & 1 )
+                       continue ;
+
+               *ent = PAddr | 3;
+               return ret;
+       }
        return 0;
 }
 
 void MM_FreeTemp(tVAddr VAddr)
 {
-       Log_KernelPanic("MM", "TODO: Implement MM_FreeTemp");
+       MM_Deallocate(VAddr);
        return ;
 }
 
@@ -650,8 +689,8 @@ tPAddr MM_Clone(void)
 {
        tPAddr  ret;
         int    i;
-       tVAddr  kstackbase = Proc_GetCurThread()->KernelStack - KERNEL_STACK_SIZE + 0x1000;
-       
+       tVAddr  kstackbase;
+
        // #1 Create a copy of the PML4
        ret = MM_AllocPhys();
        if(!ret)        return 0;
@@ -661,8 +700,6 @@ tPAddr MM_Clone(void)
        TMPCR3() = ret | 3;
        INVLPG_ALL();
        
-//     Log_KernelPanic("MM", "TODO: Implement MM_Clone");
-       
        // #3 Set Copy-On-Write to all user pages
        for( i = 0; i < 256; i ++)
        {
@@ -683,32 +720,53 @@ tPAddr MM_Clone(void)
                // 320 0xFFFFA....      - Kernel Stacks
                if( i == 320 )  continue;
                // 509 0xFFFFFE0..      - Fractal mapping
-               if( i == 509 )  continue;
+               if( i == 508 )  continue;
                // 510 0xFFFFFE8..      - Temp fractal mapping
-               if( i == 510 )  continue;
+               if( i == 509 )  continue;
+               
+               TMPMAPLVL4(i) = PAGEMAPLVL4(i);
+               if( TMPMAPLVL4(i) & 1 )
+                       MM_RefPhys( TMPMAPLVL4(i) & PADDR_MASK );
        }
        
        // #5 Set fractal mapping
-       TMPMAPLVL4(509) = ret | 3;
-       TMPMAPLVL4(510) = 0;    // Temp
+       TMPMAPLVL4(508) = ret | 3;
+       TMPMAPLVL4(509) = 0;    // Temp
        
        // #6 Create kernel stack
-       TMPMAPLVL4(320) = 0;
-       for( i = 0; i < KERNEL_STACK_SIZE/0x1000-1; i ++ )
+       //  tThread->KernelStack is the top
+       //  There is 1 guard page below the stack
+       kstackbase = Proc_GetCurThread()->KernelStack - KERNEL_STACK_SIZE;
+
+//     Log("MM_Clone: kstackbase = %p", kstackbase);
+       
+       TMPMAPLVL4(MM_KSTACK_BASE >> PML4_SHIFT) = 0;
+       for( i = 1; i < KERNEL_STACK_SIZE/0x1000; i ++ )
        {
                tPAddr  phys = MM_AllocPhys();
                tVAddr  tmpmapping;
                MM_MapEx(kstackbase+i*0x1000, phys, 1, 0);
                
+               Log_Debug("MM", "MM_Clone: Cloning stack page %p from %P to %P",
+                       kstackbase+i*0x1000, MM_GetPhysAddr( kstackbase+i*0x1000 ), phys
+                       );
                tmpmapping = MM_MapTemp(phys);
-               memcpy((void*)tmpmapping, (void*)(kstackbase+i*0x1000), 0x1000);
+               if( MM_GetPhysAddr( kstackbase+i*0x1000 ) )
+                       memcpy((void*)tmpmapping, (void*)(kstackbase+i*0x1000), 0x1000);
+               else
+                       memset((void*)tmpmapping, 0, 0x1000);
+//             if( i == 0xF )
+//                     Debug_HexDump("MM_Clone: *tmpmapping = ", (void*)tmpmapping, 0x1000);
                MM_FreeTemp(tmpmapping);
        }
        
+//     MAGIC_BREAK();
+
        // #7 Return
        TMPCR3() = 0;
        INVLPG_ALL();
        Mutex_Release(&glMM_TempFractalLock);
+//     Log("MM_Clone: RETURN %P", ret);
        return ret;
 }
 
@@ -791,7 +849,7 @@ void MM_ClearUser(void)
        }
 }
 
-tVAddr MM_NewWorkerStack(void)
+tVAddr MM_NewWorkerStack(void *StackData, size_t StackSize)
 {
        tVAddr  ret;
         int    i;
@@ -803,7 +861,9 @@ tVAddr MM_NewWorkerStack(void)
        // #2 Scan for a free stack addresss < 2^47
        for(ret = 0x100000; ret < (1ULL << 47); ret += KERNEL_STACK_SIZE)
        {
-               if( MM_GetPhysAddr(ret) == 0 )  break;
+               tPAddr  *ptr;
+               if( MM_GetPageEntryPtr(ret, 1, 0, 0, &ptr) == 0 )       break;
+               if( !(*ptr & 1) )       break;
        }
        if( ret >= (1ULL << 47) ) {
                Mutex_Release(&glMM_TempFractalLock);
@@ -822,6 +882,19 @@ tVAddr MM_NewWorkerStack(void)
                }
                MM_MapEx(ret + i*0x1000, phys, 1, 0);
        }
+
+       if( StackSize > 0x1000 ) {
+               Log_Error("MM", "MM_NewWorkerStack: StackSize(0x%x) > 0x1000, cbf handling", StackSize);
+       }
+       else {
+               tPAddr  *ptr, paddr;
+               tVAddr  tmp_addr;
+               MM_GetPageEntryPtr(ret + i*0x1000, 1, 0, 0, &ptr);
+               paddr = *ptr & ~0xFFF;
+               tmp_addr = MM_MapTemp(paddr);
+               memcpy( (void*)(tmp_addr + (0x1000 - StackSize)), StackData, StackSize );
+               MM_FreeTemp(tmp_addr);
+       }
        
        Mutex_Release(&glMM_TempFractalLock);
        
@@ -837,11 +910,11 @@ tVAddr MM_NewKStack(void)
        Uint    i;
        for( ; base < MM_KSTACK_TOP; base += KERNEL_STACK_SIZE )
        {
-               if(MM_GetPhysAddr(base) != 0)
+               if(MM_GetPhysAddr(base+KERNEL_STACK_SIZE-0x1000) != 0)
                        continue;
                
                //Log("MM_NewKStack: Found one at %p", base + KERNEL_STACK_SIZE);
-               for( i = 0; i < KERNEL_STACK_SIZE; i += 0x1000)
+               for( i = 0x1000; i < KERNEL_STACK_SIZE; i += 0x1000)
                {
                        if( !MM_Allocate(base+i) )
                        {

UCC git Repository :: git.ucc.asn.au