Kernel/x86_64 - Implemented COW, fixed PMM bugs
[tpg/acess2.git] / Kernel / arch / x86_64 / mm_virt.c
index 5252ea3..a8d890a 100644 (file)
@@ -27,7 +27,8 @@
 #define        PF_PRESENT      0x001
 #define        PF_WRITE        0x002
 #define        PF_USER         0x004
-#define        PF_LARGE        0x000
+#define        PF_LARGE        0x080
+#define        PF_GLOBAL       0x100
 #define        PF_COW          0x200
 #define        PF_PAGED        0x400
 #define        PF_NX           0x80000000##00000000
 extern void    Error_Backtrace(Uint IP, Uint BP);
 extern tPAddr  gInitialPML4[512];
 extern void    Threads_SegFault(tVAddr Addr);
+extern char    _UsertextBase[];
 
 // === PROTOTYPES ===
 void   MM_InitVirt(void);
 //void MM_FinishVirtualInit(void);
-void   MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs);
+void   MM_int_ClonePageEnt( Uint64 *Ent, void *NextLevel, tVAddr Addr, int bTable );
+ int   MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs);
 void   MM_DumpTables(tVAddr Start, tVAddr End);
  int   MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage, tPAddr **Pointer);
  int   MM_MapEx(tVAddr VAddr, tPAddr PAddr, BOOL bTemp, BOOL bLarge);
@@ -74,7 +77,8 @@ tMutex        glMM_TempFractalLock;
 // === CODE ===
 void MM_InitVirt(void)
 {
-       MM_DumpTables(0, -1L);
+       Log_Debug("MMVirt", "&PAGEMAPLVL4(0) = %p", &PAGEMAPLVL4(0));
+//     MM_DumpTables(0, -1L);
 }
 
 void MM_FinishVirtualInit(void)
@@ -83,33 +87,106 @@ void MM_FinishVirtualInit(void)
 }
 
 /**
+ * \brief Clone a page from an entry
+ * \param Ent  Pointer to the entry in the PML4/PDP/PD/PT
+ * \param NextLevel    Pointer to contents of the entry
+ * \param Addr Dest address
+ * \note Used in COW
+ */
+void MM_int_ClonePageEnt( Uint64 *Ent, void *NextLevel, tVAddr Addr, int bTable )
+{
+       tPAddr  curpage = *Ent & PADDR_MASK; 
+       if( MM_GetRefCount( curpage ) <= 0 ) {
+               Log_KernelPanic("MMVirt", "Page %P still marked COW, but unreferenced", curpage);
+       }
+//     Log_Debug("MM_Virt", "%P refcount %i", curpage, MM_GetRefCount( curpage ));
+       if( MM_GetRefCount( curpage ) == 1 )
+       {
+               *Ent &= ~PF_COW;
+               *Ent |= PF_PRESENT|PF_WRITE;
+//             Log_Debug("MMVirt", "COW ent at %p (%p), last (%P)", Ent, NextLevel, curpage);
+       }
+       else
+       {
+               void    *tmp;
+               tPAddr  paddr;
+               
+               if( !(paddr = MM_AllocPhys()) ) {
+                       Threads_SegFault(Addr);
+                       return ;
+               }
+
+               ASSERT(paddr != curpage);
+                       
+               tmp = (void*)MM_MapTemp(paddr);
+               memcpy( tmp, NextLevel, 0x1000 );
+               MM_FreeTemp( (tVAddr)tmp );
+               
+//             Log_Debug("MMVirt", "COW ent at %p (%p) from %P to %P", Ent, NextLevel, curpage, paddr);
+
+               MM_DerefPhys( curpage );
+               *Ent &= PF_USER;
+               *Ent |= paddr|PF_PRESENT|PF_WRITE;
+       }
+       INVLPG( (tVAddr)NextLevel );
+       
+       // Mark COW on pages
+       if(bTable) 
+       {
+               Uint64  *dp = NextLevel;
+                int    i;
+               for( i = 0; i < 512; i ++ )
+               {
+                       if( !(dp[i] & PF_PRESENT) )     continue;
+                       MM_RefPhys( dp[i] & PADDR_MASK );
+                       if( dp[i] & PF_WRITE ) {
+                               dp[i] &= ~PF_WRITE;
+                               dp[i] |= PF_COW;
+                       }
+               }
+       }
+}
+
+/*
  * \brief Called on a page fault
  */
-void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
+int MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
 {
        // TODO: Implement Copy-on-Write
-       #if 0
-       if( gaPageDir  [Addr>>22] & PF_PRESENT
-        && gaPageTable[Addr>>12] & PF_PRESENT
-        && gaPageTable[Addr>>12] & PF_COW )
+       #if 1
+       if( PAGEMAPLVL4(Addr>>39) & PF_PRESENT
+        && PAGEDIRPTR (Addr>>30) & PF_PRESENT
+        && PAGEDIR    (Addr>>21) & PF_PRESENT
+        && PAGETABLE  (Addr>>12) & PF_PRESENT )
        {
-               tPAddr  paddr;
-               if(MM_GetRefCount( gaPageTable[Addr>>12] & PADDR_MASK ) == 1)
+               // PML4 Entry
+               if( PAGEMAPLVL4(Addr>>39) & PF_COW )
                {
-                       gaPageTable[Addr>>12] &= ~PF_COW;
-                       gaPageTable[Addr>>12] |= PF_PRESENT|PF_WRITE;
+                       tPAddr  *dp = &PAGEDIRPTR((Addr>>39)*512);
+                       MM_int_ClonePageEnt( &PAGEMAPLVL4(Addr>>39), dp, Addr, 1 );
+//                     MM_DumpTables(Addr>>39 << 39, (((Addr>>39) + 1) << 39) - 1);
                }
-               else
+               // PDP Entry
+               if( PAGEDIRPTR(Addr>>30) & PF_COW )
                {
-                       //Log("MM_PageFault: COW - MM_DuplicatePage(0x%x)", Addr);
-                       paddr = MM_DuplicatePage( Addr );
-                       MM_DerefPhys( gaPageTable[Addr>>12] & PADDR_MASK );
-                       gaPageTable[Addr>>12] &= PF_USER;
-                       gaPageTable[Addr>>12] |= paddr|PF_PRESENT|PF_WRITE;
+                       tPAddr  *dp = &PAGEDIR( (Addr>>30)*512 );
+                       MM_int_ClonePageEnt( &PAGEDIRPTR(Addr>>30), dp, Addr, 1 );
+//                     MM_DumpTables(Addr>>30 << 30, (((Addr>>30) + 1) << 30) - 1);
+               }
+               // PD Entry
+               if( PAGEDIR(Addr>>21) & PF_COW )
+               {
+                       tPAddr  *dp = &PAGETABLE( (Addr>>21)*512 );
+                       MM_int_ClonePageEnt( &PAGEDIR(Addr>>21), dp, Addr, 1 );
+//                     MM_DumpTables(Addr>>21 << 21, (((Addr>>21) + 1) << 21) - 1);
+               }
+               // PT Entry
+               if( PAGETABLE(Addr>>12) & PF_COW )
+               {
+                       MM_int_ClonePageEnt( &PAGETABLE(Addr>>12), (void*)(Addr & ~0xFFF), Addr, 0 );
+                       INVLPG( Addr & ~0xFFF );
+                       return 0;
                }
-               
-               INVLPG( Addr & ~0xFFF );
-               return;
        }
        #endif
        
@@ -124,7 +201,7 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
                        Regs->CS, Regs->RIP, Addr);
                __asm__ __volatile__ ("sti");   // Restart IRQs
                Threads_SegFault(Addr);
-               return ;
+               return 0;
        }
        
        // Kernel #PF
@@ -146,10 +223,8 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
        Error_Backtrace(Regs->RIP, Regs->RBP);
        
        MM_DumpTables(0, -1);
-       
-       __asm__ __volatile__ ("cli");
-       for( ;; )
-               HALT();
+
+       return 1;       
 }
 
 /**
@@ -192,7 +267,7 @@ void MM_DumpTables(tVAddr Start, tVAddr End)
                {                       
                        if(expected != CHANGEABLE_BITS)
                        {
-                               Log("%016llx => %013llx : 0x%6llx (%c%c%c%c)",
+                               Log("%016llx => %13llx : 0x%6llx (%c%c%c%c)",
                                        CANOICAL(rangeStart),
                                        PAGETABLE(rangeStart>>12) & PADDR_MASK,
                                        curPos - rangeStart,
@@ -206,19 +281,16 @@ void MM_DumpTables(tVAddr Start, tVAddr End)
                        if( !(PAGEMAPLVL4(page>>27) & PF_PRESENT) ) {
                                page += (1 << 27) - 1;
                                curPos += (1L << 39) - 0x1000;
-                               //Debug("pml4 ent unset (page = 0x%x now)", page);
                                continue;
                        }
                        if( !(PAGEDIRPTR(page>>18) & PF_PRESENT) ) {
                                page += (1 << 18) - 1;
                                curPos += (1L << 30) - 0x1000;
-                               //Debug("pdp ent unset (page = 0x%x now)", page);
                                continue;
                        }
                        if( !(PAGEDIR(page>>9) & PF_PRESENT) ) {
                                page += (1 << 9) - 1;
                                curPos += (1L << 21) - 0x1000;
-                               //Debug("pd ent unset (page = 0x%x now)", page);
                                continue;
                        }
                        if( !(PAGETABLE(page) & PF_PRESENT) )   continue;
@@ -231,7 +303,7 @@ void MM_DumpTables(tVAddr Start, tVAddr End)
        }
        
        if(expected != CHANGEABLE_BITS) {
-               Log("%016llx => %013llx : 0x%6llx (%c%c%c%c)",
+               Log("%016llx => %13llx : 0x%6llx (%c%c%c%c)",
                        CANOICAL(rangeStart),
                        PAGETABLE(rangeStart>>12) & PADDR_MASK,
                        curPos - rangeStart,
@@ -373,7 +445,7 @@ void MM_Unmap(tVAddr VAddr)
        if( !(PAGEDIRPTR(VAddr >> 30) & 1) )    return ;
        // Check Page Dir
        if( !(PAGEDIR(VAddr >> 21) & 1) )       return ;
-       
+
        PAGETABLE(VAddr >> PTAB_SHIFT) = 0;
        INVLPG( VAddr );
 }
@@ -572,6 +644,7 @@ tVAddr MM_MapHWPages(tPAddr PAddr, Uint Number)
                        ret -= 0x1000;
                        PAddr -= 0x1000;
                        MM_Map(ret, PAddr);
+                       MM_RefPhys(PAddr);
                }
                
                return ret;
@@ -589,6 +662,7 @@ void MM_UnmapHWPages(tVAddr VAddr, Uint Number)
 //     Log_KernelPanic("MM", "TODO: Implement MM_UnmapHWPages");
        while( Number -- )
        {
+               MM_DerefPhys( MM_GetPhysAddr(VAddr) );
                MM_Unmap(VAddr);
                VAddr += 0x1000;
        }
@@ -617,10 +691,7 @@ tVAddr MM_AllocDMA(int Pages, int MaxBits, tPAddr *PhysAddr)
                phys = MM_AllocPhys();
                *PhysAddr = phys;
                ret = MM_MapHWPages(phys, 1);
-               if(ret == 0) {
-                       MM_DerefPhys(phys);
-                       return 0;
-               }
+               MM_DerefPhys(phys);
                return ret;
        }
        
@@ -631,10 +702,11 @@ tVAddr MM_AllocDMA(int Pages, int MaxBits, tPAddr *PhysAddr)
        
        // Allocated successfully, now map
        ret = MM_MapHWPages(phys, Pages);
+       // MapHWPages references the pages, so deref them back down to 1
+       for(;Pages--;phys+=0x1000)
+               MM_DerefPhys(phys);
        if( ret == 0 ) {
                // If it didn't map, free then return 0
-               for(;Pages--;phys+=0x1000)
-                       MM_DerefPhys(phys);
                return 0;
        }
        
@@ -660,6 +732,8 @@ tVAddr MM_MapTemp(tPAddr PAddr)
                        continue ;
 
                *ent = PAddr | 3;
+               MM_RefPhys(PAddr);
+               INVLPG(ret);
                return ret;
        }
        return 0;
@@ -679,12 +753,6 @@ tPAddr MM_Clone(void)
         int    i;
        tVAddr  kstackbase;
 
-       // tThread->KernelStack is the top
-       // There is 1 guard page below the stack
-       kstackbase = Proc_GetCurThread()->KernelStack - KERNEL_STACK_SIZE + 0x1000;
-
-       Log("MM_Clone: kstackbase = %p", kstackbase);
-       
        // #1 Create a copy of the PML4
        ret = MM_AllocPhys();
        if(!ret)        return 0;
@@ -699,9 +767,11 @@ tPAddr MM_Clone(void)
        {
                TMPMAPLVL4(i) = PAGEMAPLVL4(i);
 //             Log_Debug("MM", "TMPMAPLVL4(%i) = 0x%016llx", i, TMPMAPLVL4(i));
-               if( TMPMAPLVL4(i) & 1 )
-               {
-                       MM_RefPhys( TMPMAPLVL4(i) & PADDR_MASK );
+               if( !(TMPMAPLVL4(i) & PF_PRESENT) )     continue ;
+               
+               MM_RefPhys( TMPMAPLVL4(i) & PADDR_MASK );
+               
+               if( TMPMAPLVL4(i) & PF_WRITE ) {
                        TMPMAPLVL4(i) |= PF_COW;
                        TMPMAPLVL4(i) &= ~PF_WRITE;
                }
@@ -727,16 +797,30 @@ tPAddr MM_Clone(void)
        TMPMAPLVL4(508) = ret | 3;
        TMPMAPLVL4(509) = 0;    // Temp
        
-       // #6 Create kernel stack (-1 to account for the guard)
-       TMPMAPLVL4(320) = 0;
-       for( i = 0; i < KERNEL_STACK_SIZE/0x1000-1; i ++ )
+       // #6 Create kernel stack
+       //  tThread->KernelStack is the top
+       //  There is 1 guard page below the stack
+       kstackbase = Proc_GetCurThread()->KernelStack - KERNEL_STACK_SIZE;
+
+       Log("MM_Clone: kstackbase = %p", kstackbase);
+       
+       TMPMAPLVL4(MM_KSTACK_BASE >> PML4_SHIFT) = 0;
+       for( i = 1; i < KERNEL_STACK_SIZE/0x1000; i ++ )
        {
                tPAddr  phys = MM_AllocPhys();
                tVAddr  tmpmapping;
                MM_MapEx(kstackbase+i*0x1000, phys, 1, 0);
                
+               Log_Debug("MM", "MM_Clone: Cloning stack page %p from %P to %P",
+                       kstackbase+i*0x1000, MM_GetPhysAddr( kstackbase+i*0x1000 ), phys
+                       );
                tmpmapping = MM_MapTemp(phys);
-               memcpy((void*)tmpmapping, (void*)(kstackbase+i*0x1000), 0x1000);
+               if( MM_GetPhysAddr( kstackbase+i*0x1000 ) )
+                       memcpy((void*)tmpmapping, (void*)(kstackbase+i*0x1000), 0x1000);
+               else
+                       memset((void*)tmpmapping, 0, 0x1000);
+//             if( i == 0xF )
+//                     Debug_HexDump("MM_Clone: *tmpmapping = ", (void*)tmpmapping, 0x1000);
                MM_FreeTemp(tmpmapping);
        }
        
@@ -746,7 +830,7 @@ tPAddr MM_Clone(void)
        TMPCR3() = 0;
        INVLPG_ALL();
        Mutex_Release(&glMM_TempFractalLock);
-       Log("MM_Clone: RETURN %P\n", ret);
+//     Log("MM_Clone: RETURN %P", ret);
        return ret;
 }
 
@@ -829,7 +913,7 @@ void MM_ClearUser(void)
        }
 }
 
-tVAddr MM_NewWorkerStack(void)
+tVAddr MM_NewWorkerStack(void *StackData, size_t StackSize)
 {
        tVAddr  ret;
         int    i;
@@ -841,7 +925,9 @@ tVAddr MM_NewWorkerStack(void)
        // #2 Scan for a free stack addresss < 2^47
        for(ret = 0x100000; ret < (1ULL << 47); ret += KERNEL_STACK_SIZE)
        {
-               if( MM_GetPhysAddr(ret) == 0 )  break;
+               tPAddr  *ptr;
+               if( MM_GetPageEntryPtr(ret, 1, 0, 0, &ptr) <= 0 )       break;
+               if( !(*ptr & 1) )       break;
        }
        if( ret >= (1ULL << 47) ) {
                Mutex_Release(&glMM_TempFractalLock);
@@ -860,6 +946,19 @@ tVAddr MM_NewWorkerStack(void)
                }
                MM_MapEx(ret + i*0x1000, phys, 1, 0);
        }
+
+       if( StackSize > 0x1000 ) {
+               Log_Error("MM", "MM_NewWorkerStack: StackSize(0x%x) > 0x1000, cbf handling", StackSize);
+       }
+       else {
+               tPAddr  *ptr, paddr;
+               tVAddr  tmp_addr;
+               MM_GetPageEntryPtr(ret + i*0x1000, 1, 0, 0, &ptr);
+               paddr = *ptr & ~0xFFF;
+               tmp_addr = MM_MapTemp(paddr);
+               memcpy( (void*)(tmp_addr + (0x1000 - StackSize)), StackData, StackSize );
+               MM_FreeTemp(tmp_addr);
+       }
        
        Mutex_Release(&glMM_TempFractalLock);
        
@@ -875,11 +974,11 @@ tVAddr MM_NewKStack(void)
        Uint    i;
        for( ; base < MM_KSTACK_TOP; base += KERNEL_STACK_SIZE )
        {
-               if(MM_GetPhysAddr(base) != 0)
+               if(MM_GetPhysAddr(base+KERNEL_STACK_SIZE-0x1000) != 0)
                        continue;
                
                //Log("MM_NewKStack: Found one at %p", base + KERNEL_STACK_SIZE);
-               for( i = 0; i < KERNEL_STACK_SIZE; i += 0x1000)
+               for( i = 0x1000; i < KERNEL_STACK_SIZE; i += 0x1000)
                {
                        if( !MM_Allocate(base+i) )
                        {

UCC git Repository :: git.ucc.asn.au