X-Git-Url: https://git.ucc.asn.au/?a=blobdiff_plain;f=Kernel%2Farch%2Fx86_64%2Fmm_virt.c;h=a8d890a1547641d07157d9f34ebf312dadd7bb88;hb=f086aa018b58f23bc15fbee7b2c648e35bb7dc1c;hp=40d4eb17a4985c4cc153fc848ed3463c7062ea6a;hpb=2ebf89999759fc9d1ece6f98dfd439170995bb28;p=tpg%2Facess2.git diff --git a/Kernel/arch/x86_64/mm_virt.c b/Kernel/arch/x86_64/mm_virt.c index 40d4eb17..a8d890a1 100644 --- a/Kernel/arch/x86_64/mm_virt.c +++ b/Kernel/arch/x86_64/mm_virt.c @@ -11,6 +11,7 @@ // === CONSTANTS === #define PHYS_BITS 52 // TODO: Move out +#define VIRT_BITS 48 #define PML4_SHIFT 39 #define PDP_SHIFT 30 @@ -18,15 +19,16 @@ #define PTAB_SHIFT 12 #define PADDR_MASK 0x7FFFFFFF##FFFFF000 -#define PAGE_MASK (((Uint)1 << 36)-1) -#define TABLE_MASK (((Uint)1 << 27)-1) -#define PDP_MASK (((Uint)1 << 18)-1) -#define PML4_MASK (((Uint)1 << 9)-1) +#define PAGE_MASK ((1LL << 36)-1) +#define TABLE_MASK ((1LL << 27)-1) +#define PDP_MASK ((1LL << 18)-1) +#define PML4_MASK ((1LL << 9)-1) #define PF_PRESENT 0x001 #define PF_WRITE 0x002 #define PF_USER 0x004 -#define PF_LARGE 0x000 +#define PF_LARGE 0x080 +#define PF_GLOBAL 0x100 #define PF_COW 0x200 #define PF_PAGED 0x400 #define PF_NX 0x80000000##00000000 @@ -53,11 +55,14 @@ // === IMPORTS === extern void Error_Backtrace(Uint IP, Uint BP); extern tPAddr gInitialPML4[512]; +extern void Threads_SegFault(tVAddr Addr); +extern char _UsertextBase[]; // === PROTOTYPES === void MM_InitVirt(void); //void MM_FinishVirtualInit(void); -void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs); +void MM_int_ClonePageEnt( Uint64 *Ent, void *NextLevel, tVAddr Addr, int bTable ); + int MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs); void MM_DumpTables(tVAddr Start, tVAddr End); int MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage, tPAddr **Pointer); int MM_MapEx(tVAddr VAddr, tPAddr PAddr, BOOL bTemp, BOOL bLarge); @@ -72,7 +77,8 @@ tMutex glMM_TempFractalLock; // === CODE === void MM_InitVirt(void) { - MM_DumpTables(0, -1L); + Log_Debug("MMVirt", "&PAGEMAPLVL4(0) = %p", &PAGEMAPLVL4(0)); +// MM_DumpTables(0, -1L); } void MM_FinishVirtualInit(void) @@ -81,49 +87,121 @@ void MM_FinishVirtualInit(void) } /** + * \brief Clone a page from an entry + * \param Ent Pointer to the entry in the PML4/PDP/PD/PT + * \param NextLevel Pointer to contents of the entry + * \param Addr Dest address + * \note Used in COW + */ +void MM_int_ClonePageEnt( Uint64 *Ent, void *NextLevel, tVAddr Addr, int bTable ) +{ + tPAddr curpage = *Ent & PADDR_MASK; + if( MM_GetRefCount( curpage ) <= 0 ) { + Log_KernelPanic("MMVirt", "Page %P still marked COW, but unreferenced", curpage); + } +// Log_Debug("MM_Virt", "%P refcount %i", curpage, MM_GetRefCount( curpage )); + if( MM_GetRefCount( curpage ) == 1 ) + { + *Ent &= ~PF_COW; + *Ent |= PF_PRESENT|PF_WRITE; +// Log_Debug("MMVirt", "COW ent at %p (%p), last (%P)", Ent, NextLevel, curpage); + } + else + { + void *tmp; + tPAddr paddr; + + if( !(paddr = MM_AllocPhys()) ) { + Threads_SegFault(Addr); + return ; + } + + ASSERT(paddr != curpage); + + tmp = (void*)MM_MapTemp(paddr); + memcpy( tmp, NextLevel, 0x1000 ); + MM_FreeTemp( (tVAddr)tmp ); + +// Log_Debug("MMVirt", "COW ent at %p (%p) from %P to %P", Ent, NextLevel, curpage, paddr); + + MM_DerefPhys( curpage ); + *Ent &= PF_USER; + *Ent |= paddr|PF_PRESENT|PF_WRITE; + } + INVLPG( (tVAddr)NextLevel ); + + // Mark COW on pages + if(bTable) + { + Uint64 *dp = NextLevel; + int i; + for( i = 0; i < 512; i ++ ) + { + if( !(dp[i] & PF_PRESENT) ) continue; + MM_RefPhys( dp[i] & PADDR_MASK ); + if( dp[i] & PF_WRITE ) { + dp[i] &= ~PF_WRITE; + dp[i] |= PF_COW; + } + } + } +} + +/* * \brief Called on a page fault */ -void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs) +int MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs) { // TODO: Implement Copy-on-Write - #if 0 - if( gaPageDir [Addr>>22] & PF_PRESENT - && gaPageTable[Addr>>12] & PF_PRESENT - && gaPageTable[Addr>>12] & PF_COW ) + #if 1 + if( PAGEMAPLVL4(Addr>>39) & PF_PRESENT + && PAGEDIRPTR (Addr>>30) & PF_PRESENT + && PAGEDIR (Addr>>21) & PF_PRESENT + && PAGETABLE (Addr>>12) & PF_PRESENT ) { - tPAddr paddr; - if(MM_GetRefCount( gaPageTable[Addr>>12] & PADDR_MASK ) == 1) + // PML4 Entry + if( PAGEMAPLVL4(Addr>>39) & PF_COW ) { - gaPageTable[Addr>>12] &= ~PF_COW; - gaPageTable[Addr>>12] |= PF_PRESENT|PF_WRITE; + tPAddr *dp = &PAGEDIRPTR((Addr>>39)*512); + MM_int_ClonePageEnt( &PAGEMAPLVL4(Addr>>39), dp, Addr, 1 ); +// MM_DumpTables(Addr>>39 << 39, (((Addr>>39) + 1) << 39) - 1); } - else + // PDP Entry + if( PAGEDIRPTR(Addr>>30) & PF_COW ) { - //Log("MM_PageFault: COW - MM_DuplicatePage(0x%x)", Addr); - paddr = MM_DuplicatePage( Addr ); - MM_DerefPhys( gaPageTable[Addr>>12] & PADDR_MASK ); - gaPageTable[Addr>>12] &= PF_USER; - gaPageTable[Addr>>12] |= paddr|PF_PRESENT|PF_WRITE; + tPAddr *dp = &PAGEDIR( (Addr>>30)*512 ); + MM_int_ClonePageEnt( &PAGEDIRPTR(Addr>>30), dp, Addr, 1 ); +// MM_DumpTables(Addr>>30 << 30, (((Addr>>30) + 1) << 30) - 1); + } + // PD Entry + if( PAGEDIR(Addr>>21) & PF_COW ) + { + tPAddr *dp = &PAGETABLE( (Addr>>21)*512 ); + MM_int_ClonePageEnt( &PAGEDIR(Addr>>21), dp, Addr, 1 ); +// MM_DumpTables(Addr>>21 << 21, (((Addr>>21) + 1) << 21) - 1); + } + // PT Entry + if( PAGETABLE(Addr>>12) & PF_COW ) + { + MM_int_ClonePageEnt( &PAGETABLE(Addr>>12), (void*)(Addr & ~0xFFF), Addr, 0 ); + INVLPG( Addr & ~0xFFF ); + return 0; } - - INVLPG( Addr & ~0xFFF ); - return; } #endif // If it was a user, tell the thread handler if(ErrorCode & 4) { - Warning("%s %s %s memory%s", - (ErrorCode&4?"User":"Kernel"), + Warning("User %s %s memory%s", (ErrorCode&2?"write to":"read from"), (ErrorCode&1?"bad/locked":"non-present"), (ErrorCode&16?" (Instruction Fetch)":"") ); - Warning("User Pagefault: Instruction at %04x:%08x accessed %p", + Warning("User Pagefault: Instruction at %04x:%p accessed %p", Regs->CS, Regs->RIP, Addr); __asm__ __volatile__ ("sti"); // Restart IRQs -// Threads_SegFault(Addr); - return ; + Threads_SegFault(Addr); + return 0; } // Kernel #PF @@ -133,8 +211,7 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs) Warning("Reserved Bits Trashed!"); else { - Warning("%s %s %s memory%s", - (ErrorCode&4?"User":"Kernel"), + Warning("Kernel %s %s memory%s", (ErrorCode&2?"write to":"read from"), (ErrorCode&1?"bad/locked":"non-present"), (ErrorCode&16?" (Instruction Fetch)":"") @@ -146,10 +223,8 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs) Error_Backtrace(Regs->RIP, Regs->RBP); MM_DumpTables(0, -1); - - __asm__ __volatile__ ("cli"); - for( ;; ) - HALT(); + + return 1; } /** @@ -190,8 +265,9 @@ void MM_DumpTables(tVAddr Start, tVAddr End) || !(PAGETABLE(page) & PF_PRESENT) || (PAGETABLE(page) & MASK) != expected) { - if(expected != CHANGEABLE_BITS) { - Log("%016llx => %013llx : 0x%6llx (%c%c%c%c)", + if(expected != CHANGEABLE_BITS) + { + Log("%016llx => %13llx : 0x%6llx (%c%c%c%c)", CANOICAL(rangeStart), PAGETABLE(rangeStart>>12) & PADDR_MASK, curPos - rangeStart, @@ -205,19 +281,16 @@ void MM_DumpTables(tVAddr Start, tVAddr End) if( !(PAGEMAPLVL4(page>>27) & PF_PRESENT) ) { page += (1 << 27) - 1; curPos += (1L << 39) - 0x1000; - //Debug("pml4 ent unset (page = 0x%x now)", page); continue; } if( !(PAGEDIRPTR(page>>18) & PF_PRESENT) ) { page += (1 << 18) - 1; curPos += (1L << 30) - 0x1000; - //Debug("pdp ent unset (page = 0x%x now)", page); continue; } if( !(PAGEDIR(page>>9) & PF_PRESENT) ) { page += (1 << 9) - 1; curPos += (1L << 21) - 0x1000; - //Debug("pd ent unset (page = 0x%x now)", page); continue; } if( !(PAGETABLE(page) & PF_PRESENT) ) continue; @@ -230,7 +303,7 @@ void MM_DumpTables(tVAddr Start, tVAddr End) } if(expected != CHANGEABLE_BITS) { - Log("%016llx => %013llx : 0x%6llx (%c%c%c%c)", + Log("%016llx => %13llx : 0x%6llx (%c%c%c%c)", CANOICAL(rangeStart), PAGETABLE(rangeStart>>12) & PADDR_MASK, curPos - rangeStart, @@ -261,6 +334,8 @@ int MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage, const int nADDR_SIZES = sizeof(ADDR_SIZES)/sizeof(ADDR_SIZES[0]); int i; + #define BITMASK(bits) ( (1LL << (bits))-1 ) + if( bTemp ) { pmlevels[3] = &TMPTABLE(0); // Page Table @@ -271,9 +346,9 @@ int MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage, else { pmlevels[3] = (void*)MM_FRACTAL_BASE; // Page Table - pmlevels[2] = &pmlevels[3][(MM_FRACTAL_BASE>>12)&PAGE_MASK]; // PDIR - pmlevels[1] = &pmlevels[2][(MM_FRACTAL_BASE>>21)&TABLE_MASK]; // PDPT - pmlevels[0] = &pmlevels[1][(MM_FRACTAL_BASE>>30)&PDP_MASK]; // PML4 + pmlevels[2] = &pmlevels[3][(MM_FRACTAL_BASE>>12)&BITMASK(VIRT_BITS-12)]; // PDIR + pmlevels[1] = &pmlevels[2][(MM_FRACTAL_BASE>>21)&BITMASK(VIRT_BITS-21)]; // PDPT + pmlevels[0] = &pmlevels[1][(MM_FRACTAL_BASE>>30)&BITMASK(VIRT_BITS-30)]; // PML4 } // Mask address @@ -293,11 +368,15 @@ int MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage, if( !(pmlevels[i][Addr >> ADDR_SIZES[i]] & 1) ) { if( !bAllocate ) return -4; // If allocation is not requested, error - tmp = MM_AllocPhys(); - if(!tmp) return -2; + if( !(tmp = MM_AllocPhys()) ) return -2; pmlevels[i][Addr >> ADDR_SIZES[i]] = tmp | 3; + if( Addr < 0x800000000000 ) + pmlevels[i][Addr >> ADDR_SIZES[i]] |= PF_USER; INVLPG( &pmlevels[i+1][ (Addr>>ADDR_SIZES[i])*512 ] ); memset( &pmlevels[i+1][ (Addr>>ADDR_SIZES[i])*512 ], 0, 0x1000 ); + LOG("Init PML%i ent 0x%x %p with %P", 4 - i, + Addr>>ADDR_SIZES[i], + (Addr>>ADDR_SIZES[i])<> ADDR_SIZES[i]] & PF_LARGE ) @@ -335,7 +414,10 @@ int MM_MapEx(tVAddr VAddr, tPAddr PAddr, BOOL bTemp, BOOL bLarge) if( *ent & 1 ) LEAVE_RET('i', 0); *ent = PAddr | 3; - + + if( VAddr < 0x800000000000 ) + *ent |= PF_USER; + INVLPG( VAddr ); LEAVE('i', 1); @@ -363,7 +445,7 @@ void MM_Unmap(tVAddr VAddr) if( !(PAGEDIRPTR(VAddr >> 30) & 1) ) return ; // Check Page Dir if( !(PAGEDIR(VAddr >> 21) & 1) ) return ; - + PAGETABLE(VAddr >> PTAB_SHIFT) = 0; INVLPG( VAddr ); } @@ -562,6 +644,7 @@ tVAddr MM_MapHWPages(tPAddr PAddr, Uint Number) ret -= 0x1000; PAddr -= 0x1000; MM_Map(ret, PAddr); + MM_RefPhys(PAddr); } return ret; @@ -579,6 +662,7 @@ void MM_UnmapHWPages(tVAddr VAddr, Uint Number) // Log_KernelPanic("MM", "TODO: Implement MM_UnmapHWPages"); while( Number -- ) { + MM_DerefPhys( MM_GetPhysAddr(VAddr) ); MM_Unmap(VAddr); VAddr += 0x1000; } @@ -607,10 +691,7 @@ tVAddr MM_AllocDMA(int Pages, int MaxBits, tPAddr *PhysAddr) phys = MM_AllocPhys(); *PhysAddr = phys; ret = MM_MapHWPages(phys, 1); - if(ret == 0) { - MM_DerefPhys(phys); - return 0; - } + MM_DerefPhys(phys); return ret; } @@ -621,10 +702,11 @@ tVAddr MM_AllocDMA(int Pages, int MaxBits, tPAddr *PhysAddr) // Allocated successfully, now map ret = MM_MapHWPages(phys, Pages); + // MapHWPages references the pages, so deref them back down to 1 + for(;Pages--;phys+=0x1000) + MM_DerefPhys(phys); if( ret == 0 ) { // If it didn't map, free then return 0 - for(;Pages--;phys+=0x1000) - MM_DerefPhys(phys); return 0; } @@ -650,6 +732,8 @@ tVAddr MM_MapTemp(tPAddr PAddr) continue ; *ent = PAddr | 3; + MM_RefPhys(PAddr); + INVLPG(ret); return ret; } return 0; @@ -669,12 +753,6 @@ tPAddr MM_Clone(void) int i; tVAddr kstackbase; - // tThread->KernelStack is the top - // There is 1 guard page below the stack - kstackbase = Proc_GetCurThread()->KernelStack - KERNEL_STACK_SIZE + 0x1000; - - Log("MM_Clone: kstackbase = %p", kstackbase); - // #1 Create a copy of the PML4 ret = MM_AllocPhys(); if(!ret) return 0; @@ -689,9 +767,11 @@ tPAddr MM_Clone(void) { TMPMAPLVL4(i) = PAGEMAPLVL4(i); // Log_Debug("MM", "TMPMAPLVL4(%i) = 0x%016llx", i, TMPMAPLVL4(i)); - if( TMPMAPLVL4(i) & 1 ) - { - MM_RefPhys( TMPMAPLVL4(i) & PADDR_MASK ); + if( !(TMPMAPLVL4(i) & PF_PRESENT) ) continue ; + + MM_RefPhys( TMPMAPLVL4(i) & PADDR_MASK ); + + if( TMPMAPLVL4(i) & PF_WRITE ) { TMPMAPLVL4(i) |= PF_COW; TMPMAPLVL4(i) &= ~PF_WRITE; } @@ -704,9 +784,9 @@ tPAddr MM_Clone(void) // 320 0xFFFFA.... - Kernel Stacks if( i == 320 ) continue; // 509 0xFFFFFE0.. - Fractal mapping - if( i == 509 ) continue; + if( i == 508 ) continue; // 510 0xFFFFFE8.. - Temp fractal mapping - if( i == 510 ) continue; + if( i == 509 ) continue; TMPMAPLVL4(i) = PAGEMAPLVL4(i); if( TMPMAPLVL4(i) & 1 ) @@ -714,27 +794,43 @@ tPAddr MM_Clone(void) } // #5 Set fractal mapping - TMPMAPLVL4(509) = ret | 3; - TMPMAPLVL4(510) = 0; // Temp + TMPMAPLVL4(508) = ret | 3; + TMPMAPLVL4(509) = 0; // Temp - // #6 Create kernel stack (-1 to account for the guard) - TMPMAPLVL4(320) = 0; - for( i = 0; i < KERNEL_STACK_SIZE/0x1000-1; i ++ ) + // #6 Create kernel stack + // tThread->KernelStack is the top + // There is 1 guard page below the stack + kstackbase = Proc_GetCurThread()->KernelStack - KERNEL_STACK_SIZE; + + Log("MM_Clone: kstackbase = %p", kstackbase); + + TMPMAPLVL4(MM_KSTACK_BASE >> PML4_SHIFT) = 0; + for( i = 1; i < KERNEL_STACK_SIZE/0x1000; i ++ ) { tPAddr phys = MM_AllocPhys(); tVAddr tmpmapping; MM_MapEx(kstackbase+i*0x1000, phys, 1, 0); + Log_Debug("MM", "MM_Clone: Cloning stack page %p from %P to %P", + kstackbase+i*0x1000, MM_GetPhysAddr( kstackbase+i*0x1000 ), phys + ); tmpmapping = MM_MapTemp(phys); - memcpy((void*)tmpmapping, (void*)(kstackbase+i*0x1000), 0x1000); + if( MM_GetPhysAddr( kstackbase+i*0x1000 ) ) + memcpy((void*)tmpmapping, (void*)(kstackbase+i*0x1000), 0x1000); + else + memset((void*)tmpmapping, 0, 0x1000); +// if( i == 0xF ) +// Debug_HexDump("MM_Clone: *tmpmapping = ", (void*)tmpmapping, 0x1000); MM_FreeTemp(tmpmapping); } +// MAGIC_BREAK(); + // #7 Return TMPCR3() = 0; INVLPG_ALL(); Mutex_Release(&glMM_TempFractalLock); - Log("MM_Clone: RETURN %P\n", ret); +// Log("MM_Clone: RETURN %P", ret); return ret; } @@ -817,7 +913,7 @@ void MM_ClearUser(void) } } -tVAddr MM_NewWorkerStack(void) +tVAddr MM_NewWorkerStack(void *StackData, size_t StackSize) { tVAddr ret; int i; @@ -829,7 +925,9 @@ tVAddr MM_NewWorkerStack(void) // #2 Scan for a free stack addresss < 2^47 for(ret = 0x100000; ret < (1ULL << 47); ret += KERNEL_STACK_SIZE) { - if( MM_GetPhysAddr(ret) == 0 ) break; + tPAddr *ptr; + if( MM_GetPageEntryPtr(ret, 1, 0, 0, &ptr) <= 0 ) break; + if( !(*ptr & 1) ) break; } if( ret >= (1ULL << 47) ) { Mutex_Release(&glMM_TempFractalLock); @@ -848,6 +946,19 @@ tVAddr MM_NewWorkerStack(void) } MM_MapEx(ret + i*0x1000, phys, 1, 0); } + + if( StackSize > 0x1000 ) { + Log_Error("MM", "MM_NewWorkerStack: StackSize(0x%x) > 0x1000, cbf handling", StackSize); + } + else { + tPAddr *ptr, paddr; + tVAddr tmp_addr; + MM_GetPageEntryPtr(ret + i*0x1000, 1, 0, 0, &ptr); + paddr = *ptr & ~0xFFF; + tmp_addr = MM_MapTemp(paddr); + memcpy( (void*)(tmp_addr + (0x1000 - StackSize)), StackData, StackSize ); + MM_FreeTemp(tmp_addr); + } Mutex_Release(&glMM_TempFractalLock); @@ -863,11 +974,11 @@ tVAddr MM_NewKStack(void) Uint i; for( ; base < MM_KSTACK_TOP; base += KERNEL_STACK_SIZE ) { - if(MM_GetPhysAddr(base) != 0) + if(MM_GetPhysAddr(base+KERNEL_STACK_SIZE-0x1000) != 0) continue; //Log("MM_NewKStack: Found one at %p", base + KERNEL_STACK_SIZE); - for( i = 0; i < KERNEL_STACK_SIZE; i += 0x1000) + for( i = 0x1000; i < KERNEL_STACK_SIZE; i += 0x1000) { if( !MM_Allocate(base+i) ) {