/* $NetBSD: pmap.c,v 1.117 2022/03/20 18:56:29 andvar Exp $ */ /*- * Copyright (c) 2011 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.117 2022/03/20 18:56:29 andvar Exp $"); #include "opt_memsize.h" #include "opt_kmempages.h" #include "opt_misc.h" #include #include #include #include #include #include #include #include #include #include #include struct pv_entry { struct pv_entry *pv_next; pmap_t pv_pmap; uintptr_t pv_ppn; /* physical page number */ uintptr_t pv_lpn; /* logical page number */ vm_prot_t pv_prot; /* logical protection */ uint8_t pv_mmap_ppl; /* programmed protection */ uint8_t pv_vflags; /* per mapping flags */ #define PV_WIRED 0x01 /* wired mapping */ #define PV_UNMANAGED 0x02 /* entered by pmap_kenter_ */ uint8_t pv_pflags; /* per phys page flags */ #define PV_REFERENCED 0x01 #define PV_MODIFIED 0x02 }; #define PMAP_L2_SIZE PAGE_SIZE #define PMAP_L2_NENTRY (PMAP_L2_SIZE / sizeof(struct pv_entry *)) struct pmap_l2 { struct pv_entry *pm_l2[PMAP_L2_NENTRY]; }; struct pmap { int pm_count; int pm_flags; #define PM_ACTIVE 0x01 struct pmap_statistics pm_stats; struct pmap_l2 **pm_l1; }; /* * pv_table is list of pv_entry structs completely spanning the total memory. * It is indexed on physical page number. Each entry will be daisy chained * with pv_entry records for each usage in all the pmaps. * * kernel_pm_entries contains all kernel L2 pages for its complete map. * */ static struct pv_entry **kernel_pm_entries; static struct pv_entry *pv_table; /* physical pages info (direct mapped) */ static struct pv_entry **tlb; /* current tlb mappings (direct mapped) */ static struct pmap pmap_kernel_store; struct pmap * const kernel_pmap_ptr = &pmap_kernel_store; static pmap_t active_pmap = NULL; static char mem_name[20] = ""; static int mem_fh; static int phys_npages = 0; static int pm_nentries = 0; static int pm_nl1 = 0; static int pm_l1_size = 0; static uint64_t pm_entries_size = 0; static void *pm_tmp_p0; static void *pm_tmp_p1; static struct pool pmap_pool; static struct pool pmap_pventry_pool; /* forwards */ void pmap_bootstrap(void); static void pmap_page_activate(struct pv_entry *pv); static void pmap_page_deactivate(struct pv_entry *pv); static void pv_update(struct pv_entry *pv); static void pmap_update_page(uintptr_t ppn); bool pmap_fault(pmap_t pmap, vaddr_t va, vm_prot_t *atype); static struct pv_entry *pv_get(pmap_t pmap, uintptr_t ppn, uintptr_t lpn); static struct pv_entry *pv_alloc(void); static void pv_free(struct pv_entry *pv); static void pmap_deferred_init(void); extern void setup_signal_handlers(void); /* exposed (to signal handler f.e.) */ vaddr_t kmem_k_start, kmem_k_end; vaddr_t kmem_kvm_start, kmem_kvm_end; vaddr_t kmem_user_start, kmem_user_end; vaddr_t kmem_kvm_cur_start, kmem_kvm_cur_end; /* amount of physical memory */ int num_pv_entries = 0; int num_pmaps = 0; #define SPARSE_MEMFILE void pmap_bootstrap(void) { struct pmap *pmap; paddr_t DRAM_cfg; paddr_t fpos, file_len; paddr_t kernel_fpos, pv_fpos, tlb_fpos, pm_l1_fpos, pm_fpos; paddr_t wlen; paddr_t barrier_len; paddr_t pv_table_size; vaddr_t free_start, free_end; paddr_t pa; vaddr_t va; size_t kmem_k_length, written; uintptr_t pg, l1; void *addr; int err; extern void _start(void); /* start of kernel */ extern int etext; /* end of the kernel */ extern int edata; /* end of the init. data segment */ extern int end; /* end of bss */ vaddr_t vm_min_addr; vm_min_addr = thunk_get_vm_min_address(); vm_min_addr = vm_min_addr < PAGE_SIZE ? PAGE_SIZE : vm_min_addr; thunk_printf_debug("Information retrieved from system and elf image\n"); thunk_printf_debug("min VM address at %p\n", (void *) vm_min_addr); thunk_printf_debug("start kernel at %p\n", _start); thunk_printf_debug(" end kernel at %p\n", &etext); thunk_printf_debug(" end of init. data at %p\n", &edata); thunk_printf_debug("1st end of data at %p\n", &end); thunk_printf_debug("CUR end data at %p\n", thunk_sbrk(0)); barrier_len = 2 * 1024 * 1024; /* calculate kernel section (R-X) */ kmem_k_start = (vaddr_t) PAGE_SIZE * (atop(_start) ); kmem_k_end = (vaddr_t) PAGE_SIZE * (atop(&etext) + 1); kmem_k_length = kmem_k_end - kmem_k_start; /* calculate total available memory space & available pages */ DRAM_cfg = (vaddr_t) TEXTADDR; physmem = DRAM_cfg / PAGE_SIZE; /* kvm at the top */ kmem_kvm_end = kmem_k_start - barrier_len; kmem_kvm_start = kmem_kvm_end - KVMSIZE; /* allow some pmap scratch space */ pm_tmp_p0 = (void *) (kmem_kvm_start); pm_tmp_p1 = (void *) (kmem_kvm_start + PAGE_SIZE); kmem_kvm_start += 2*PAGE_SIZE; /* claim an area for userland (---/R--/RW-/RWX) */ kmem_user_start = vm_min_addr; kmem_user_end = kmem_kvm_start - barrier_len; /* print summary */ aprint_verbose("\nMemory summary\n"); aprint_verbose("\tkmem_user_start\t%p\n", (void *) kmem_user_start); aprint_verbose("\tkmem_user_end\t%p\n", (void *) kmem_user_end); aprint_verbose("\tkmem_k_start\t%p\n", (void *) kmem_k_start); aprint_verbose("\tkmem_k_end\t%p\n", (void *) kmem_k_end); aprint_verbose("\tkmem_kvm_start\t%p\n", (void *) kmem_kvm_start); aprint_verbose("\tkmem_kvm_end\t%p\n", (void *) kmem_kvm_end); aprint_verbose("\tDRAM_cfg\t%10d\n", (int) DRAM_cfg); aprint_verbose("\tkvmsize\t\t%10d\n", (int) KVMSIZE); aprint_verbose("\tuser_len\t%10d\n", (int) (kmem_user_end - kmem_user_start)); aprint_verbose("\n\n"); /* make critical assertions before modifying anything */ if (sizeof(struct pcb) > USPACE) { panic("sizeof(struct pcb) is %d bytes too big for USPACE. " "Please adjust TRAPSTACKSIZE calculation", (int) (USPACE - sizeof(struct pcb))); } if (TRAPSTACKSIZE < 4*PAGE_SIZE) { panic("TRAPSTACKSIZE is too small, please increase UPAGES"); } if (sizeof(struct pmap_l2) > PAGE_SIZE) { panic("struct pmap_l2 bigger than one page?\n"); } /* protect user memory UVM area (---) */ err = thunk_munmap((void *) kmem_user_start, kmem_k_start - kmem_user_start); if (err) panic("pmap_bootstrap: userland uvm space protection " "failed (%d)\n", thunk_geterrno()); #if 0 /* protect kvm UVM area if separate (---) */ err = thunk_munmap((void *) kmem_kvm_start, kmem_kvm_end - kmem_kvm_start); if (err) panic("pmap_bootstrap: kvm uvm space protection " "failed (%d)\n", thunk_geterrno()); #endif thunk_printf_debug("Creating memory mapped backend\n"); /* create memory file since mmap/maccess only can be on files */ strlcpy(mem_name, "/tmp/netbsd.XXXXXX", sizeof(mem_name)); mem_fh = thunk_mkstemp(mem_name); if (mem_fh < 0) panic("pmap_bootstrap: can't create memory file\n"); /* unlink the file so space is freed when we quit */ if (thunk_unlink(mem_name) == -1) panic("pmap_bootstrap: can't unlink %s", mem_name); /* file_len is the backing store length, nothing to do with placement */ file_len = DRAM_cfg; #ifdef SPARSE_MEMFILE { char dummy; wlen = thunk_pwrite(mem_fh, &dummy, 1, file_len - 1); if (wlen != 1) panic("pmap_bootstrap: can't grow file\n"); } #else { char block[PAGE_SIZE]; printf("Creating memory file\r"); for (pg = 0; pg < file_len; pg += PAGE_SIZE) { wlen = thunk_pwrite(mem_fh, block, PAGE_SIZE, pg); if (wlen != PAGE_SIZE) panic("pmap_bootstrap: write fails, disc full?"); } } #endif /* protect the current kernel section */ err = thunk_mprotect((void *) kmem_k_start, kmem_k_length, THUNK_PROT_READ | THUNK_PROT_EXEC); assert(err == 0); /* madvise the host kernel about our intentions with the memory */ /* no measured effect, but might make a difference on high load */ err = thunk_madvise((void *) kmem_user_start, kmem_k_start - kmem_user_start, THUNK_MADV_WILLNEED | THUNK_MADV_RANDOM); assert(err == 0); /* map the kernel at the start of the 'memory' file */ kernel_fpos = 0; written = thunk_pwrite(mem_fh, (void *) kmem_k_start, kmem_k_length, kernel_fpos); assert(written == kmem_k_length); fpos = kernel_fpos + kmem_k_length; /* initialize counters */ free_start = fpos; /* in physical space ! */ free_end = file_len; /* in physical space ! */ kmem_kvm_cur_start = kmem_kvm_start; /* calculate pv table size */ phys_npages = file_len / PAGE_SIZE; pv_table_size = round_page(phys_npages * sizeof(struct pv_entry)); thunk_printf_debug("claiming %"PRIu64" KB of pv_table for " "%"PRIdPTR" pages of physical memory\n", (uint64_t) pv_table_size/1024, (uintptr_t) phys_npages); /* calculate number of pmap entries needed for a complete map */ pm_nentries = (kmem_k_end - VM_MIN_ADDRESS) / PAGE_SIZE; pm_entries_size = round_page(pm_nentries * sizeof(struct pv_entry *)); thunk_printf_debug("tlb va->pa lookup table is %"PRIu64" KB for " "%d logical pages\n", pm_entries_size/1024, pm_nentries); /* calculate how big the l1 tables are going to be */ pm_nl1 = pm_nentries / PMAP_L2_NENTRY; pm_l1_size = round_page(pm_nl1 * sizeof(struct pmap_l1 *)); /* claim pv table */ pv_fpos = fpos; pv_table = (struct pv_entry *) kmem_kvm_cur_start; addr = thunk_mmap(pv_table, pv_table_size, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, mem_fh, pv_fpos); if (addr != (void *) pv_table) panic("pmap_bootstrap: can't map in pv table\n"); memset(pv_table, 0, pv_table_size); /* test and clear */ thunk_printf_debug("pv_table initialised correctly, mmap works\n"); /* advance */ kmem_kvm_cur_start += pv_table_size; fpos += pv_table_size; /* set up tlb space */ tlb = (struct pv_entry **) kmem_kvm_cur_start; tlb_fpos = fpos; addr = thunk_mmap(tlb, pm_entries_size, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, mem_fh, tlb_fpos); if (addr != (void *) tlb) panic("pmap_bootstrap: can't map in tlb entries\n"); memset(tlb, 0, pm_entries_size); /* test and clear */ thunk_printf_debug("kernel tlb entries initialized correctly\n"); /* advance */ kmem_kvm_cur_start += pm_entries_size; fpos += pm_entries_size; /* set up kernel pmap and add a l1 map */ pmap = pmap_kernel(); memset(pmap, 0, sizeof(*pmap)); pmap->pm_count = 1; /* reference */ pmap->pm_flags = PM_ACTIVE; /* kernel pmap is always active */ pmap->pm_l1 = (struct pmap_l2 **) kmem_kvm_cur_start; pm_l1_fpos = fpos; addr = thunk_mmap(pmap->pm_l1, pm_l1_size, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, mem_fh, pm_l1_fpos); if (addr != (void *) pmap->pm_l1) panic("pmap_bootstrap: can't map in pmap l1 entries\n"); memset(pmap->pm_l1, 0, pm_l1_size); /* test and clear */ thunk_printf_debug("kernel pmap l1 table initialised correctly\n"); /* advance for l1 tables */ kmem_kvm_cur_start += round_page(pm_l1_size); fpos += round_page(pm_l1_size); /* followed by the pm entries */ pm_fpos = fpos; kernel_pm_entries = (struct pv_entry **) kmem_kvm_cur_start; addr = thunk_mmap(kernel_pm_entries, pm_entries_size, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, mem_fh, pm_fpos); if (addr != (void *) kernel_pm_entries) panic("pmap_bootstrap: can't map in kernel pmap entries\n"); memset(kernel_pm_entries, 0, pm_entries_size); /* test and clear */ /* advance for the statically allocated pm_entries */ kmem_kvm_cur_start += pm_entries_size; fpos += pm_entries_size; /* put pointers in the l1 to point to the pv_entry space */ for (l1 = 0; l1 < pm_nl1; l1++) { pmap = pmap_kernel(); pmap->pm_l1[l1] = (struct pmap_l2 *) ((vaddr_t) kernel_pm_entries + l1 * PMAP_L2_SIZE); } /* kmem used [kmem_kvm_start - kmem_kvm_cur_start] */ kmem_kvm_cur_end = kmem_kvm_cur_start; /* manually enter the mappings into the kernel map */ for (pg = 0; pg < pv_table_size; pg += PAGE_SIZE) { pa = pv_fpos + pg; va = (vaddr_t) pv_table + pg; pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); } thunk_printf_debug("pv_table mem added to the kernel pmap\n"); for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) { pa = tlb_fpos + pg; va = (vaddr_t) tlb + pg; pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); } thunk_printf_debug("kernel tlb entries mem added to the kernel pmap\n"); for (pg = 0; pg < pm_l1_size; pg += PAGE_SIZE) { pa = pm_l1_fpos + pg; va = (vaddr_t) pmap->pm_l1 + pg; pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); } thunk_printf_debug("kernel pmap l1 mem added to the kernel pmap\n"); for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) { pa = pm_fpos + pg; va = (vaddr_t) kernel_pm_entries + pg; pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); } thunk_printf_debug("kernel pmap entries mem added to the kernel pmap\n"); #if 0 /* not yet, or not needed */ for (pg = 0; pg < kmem_k_length; pg += PAGE_SIZE) { pa = kernel_fpos + pg; va = (vaddr_t) kmem_k_start + pg; pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE, 0); } thunk_printf_debug("kernel mem added to the kernel pmap\n"); #endif /* add file space to uvm's FREELIST */ uvm_page_physload(atop(0), atop(free_end), atop(free_start + fpos), /* mark used till fpos */ atop(free_end), VM_FREELIST_DEFAULT); /* setup syscall emulation */ if (thunk_syscallemu_init((void *)VM_MIN_ADDRESS, (void *)VM_MAXUSER_ADDRESS) != 0) panic("couldn't enable syscall emulation"); aprint_verbose("leaving pmap_bootstrap:\n"); aprint_verbose("\t%"PRIu64" MB of physical pages left\n", (uint64_t) (free_end - (free_start + fpos))/1024/1024); aprint_verbose("\t%"PRIu64" MB of kmem left\n", (uint64_t) (kmem_kvm_end - kmem_kvm_cur_end)/1024/1024); setup_signal_handlers(); } void pmap_init(void) { } /* return kernel space start and end (including growth) */ void pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) { if (vstartp) *vstartp = kmem_kvm_cur_start; /* min to map in */ if (vendp) *vendp = kmem_kvm_end - PAGE_SIZE; /* max available */ } static void pmap_deferred_init(void) { /* XXX we COULD realloc our pv_table etc with malloc() but for what? */ /* create pmap pool */ pool_init(&pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappool", NULL, IPL_NONE); pool_init(&pmap_pventry_pool, sizeof(struct pv_entry), 0, 0, 0, "pventry", NULL, IPL_HIGH); } pmap_t pmap_create(void) { static int pmap_initialised = 0; struct pmap *pmap; if (!pmap_initialised) { pmap_deferred_init(); pmap_initialised = 1; } thunk_printf_debug("pmap_create\n"); num_pmaps++; #if 0 printf("%s: pre alloc: num_pmaps %"PRIu64" (%"PRIu64" kb), " "num_pv_entries %"PRIu64" (%"PRIu64" kb)\n", __func__, (uint64_t) num_pmaps, (uint64_t) num_pmaps * (sizeof(*pmap) + pm_l1_size) / 1024, (uint64_t) num_pv_entries, (uint64_t) num_pv_entries * (sizeof(struct pv_entry)) / 1024); #endif pmap = pool_get(&pmap_pool, PR_WAITOK); memset(pmap, 0, sizeof(*pmap)); pmap->pm_count = 1; pmap->pm_flags = 0; /* claim l1 table */ pmap->pm_l1 = kmem_zalloc(pm_l1_size, KM_SLEEP); assert(pmap->pm_l1); thunk_printf_debug("\tpmap %p\n", pmap); return pmap; } void pmap_destroy(pmap_t pmap) { struct pmap_l2 *l2tbl; int l1; /* if multiple references exist just remove a reference */ thunk_printf_debug("pmap_destroy %p\n", pmap); if (--pmap->pm_count > 0) return; num_pmaps--; /* safe guard against silly errors */ KASSERT((pmap->pm_flags & PM_ACTIVE) == 0); KASSERT(pmap->pm_stats.resident_count == 0); KASSERT(pmap->pm_stats.wired_count == 0); #ifdef DIAGNOSTIC for (l1 = 0; l1 < pm_nl1; l1++) { int l2; l2tbl = pmap->pm_l1[l1]; if (!l2tbl) continue; for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) { if (l2tbl->pm_l2[l2]) panic("pmap_destroy: pmap isn't empty"); } } #endif for (l1 = 0; l1 < pm_nl1; l1++) { l2tbl = pmap->pm_l1[l1]; if (!l2tbl) continue; kmem_free(l2tbl, PMAP_L2_SIZE); } kmem_free(pmap->pm_l1, pm_l1_size); pool_put(&pmap_pool, pmap); } void pmap_reference(pmap_t pmap) { thunk_printf_debug("pmap_reference %p\n", (void *) pmap); pmap->pm_count++; } long pmap_resident_count(pmap_t pmap) { return pmap->pm_stats.resident_count; } long pmap_wired_count(pmap_t pmap) { return pmap->pm_stats.wired_count; } static struct pv_entry * pv_alloc(void) { struct pv_entry *pv; num_pv_entries++; pv = pool_get(&pmap_pventry_pool, PR_WAITOK); memset(pv, 0, sizeof(struct pv_entry)); return pv; } static void pv_free(struct pv_entry *pv) { num_pv_entries--; pool_put(&pmap_pventry_pool, pv); } static struct pv_entry * pv_get(pmap_t pmap, uintptr_t ppn, uintptr_t lpn) { struct pv_entry *pv; /* If the head entry's free use that. */ pv = &pv_table[ppn]; if (pv->pv_pmap == NULL) { pmap->pm_stats.resident_count++; return pv; } /* If this mapping exists already, use that. */ for (pv = pv; pv != NULL; pv = pv->pv_next) { if ((pv->pv_pmap == pmap) && (pv->pv_lpn == lpn)) { return pv; } } /* Otherwise, allocate a new entry and link it in after the head. */ thunk_printf_debug("pv_get: multiple mapped page ppn %"PRIdPTR", " "lpn %"PRIdPTR"\n", ppn, lpn); /* extra sanity */ assert(ppn < phys_npages); pv = pv_alloc(); if (pv == NULL) return NULL; pv->pv_next = pv_table[ppn].pv_next; pv_table[ppn].pv_next = pv; pmap->pm_stats.resident_count++; return pv; } static void pmap_set_pv(pmap_t pmap, uintptr_t lpn, struct pv_entry *pv) { struct pmap_l2 *l2tbl; int l1 = lpn / PMAP_L2_NENTRY; int l2 = lpn % PMAP_L2_NENTRY; #ifdef DIAGNOSTIC if (lpn >= pm_nentries) panic("peeing outside box : addr in page around %"PRIx64"\n", (uint64_t) lpn*PAGE_SIZE); #endif l2tbl = pmap->pm_l1[l1]; if (!l2tbl) { l2tbl = pmap->pm_l1[l1] = kmem_zalloc(PMAP_L2_SIZE, KM_SLEEP); /* should be zero filled */ } l2tbl->pm_l2[l2] = pv; } static struct pv_entry * pmap_lookup_pv(pmap_t pmap, uintptr_t lpn) { struct pmap_l2 *l2tbl; int l1 = lpn / PMAP_L2_NENTRY; int l2 = lpn % PMAP_L2_NENTRY; if (lpn >= pm_nentries) return NULL; l2tbl = pmap->pm_l1[l1]; if (l2tbl) return l2tbl->pm_l2[l2]; return NULL; } /* * Check if the given page fault was our reference / modified emulation fault; * if so return true otherwise return false and let uvm handle it */ bool pmap_fault(pmap_t pmap, vaddr_t va, vm_prot_t *atype) { struct pv_entry *pv, *ppv; uintptr_t lpn, ppn; int prot, cur_prot, diff; thunk_printf_debug("pmap_fault pmap %p, va %p\n", pmap, (void *) va); /* get logical page from vaddr */ lpn = atop(va - VM_MIN_ADDRESS); /* V->L */ pv = pmap_lookup_pv(pmap, lpn); /* not known! then it must be UVM's work */ if (pv == NULL) { //thunk_printf("%s: no mapping yet for %p\n", // __func__, (void *) va); *atype = VM_PROT_READ; /* assume it was a read */ return false; } /* determine physical address and lookup 'root' pv_entry */ ppn = pv->pv_ppn; ppv = &pv_table[ppn]; /* if unmanaged we just make sure it is there! */ if (ppv->pv_vflags & PV_UNMANAGED) { printf("%s: oops warning unmanaged page %"PRIiPTR" faulted\n", __func__, ppn); /* atype not set */ pmap_page_activate(pv); return true; } /* check the TLB, if NULL we have a TLB fault */ if (tlb[pv->pv_lpn] == NULL) { if (pv->pv_mmap_ppl != THUNK_PROT_NONE) { thunk_printf_debug("%s: tlb fault page lpn %"PRIiPTR"\n", __func__, pv->pv_lpn); pmap_page_activate(pv); return true; } } /* determine pmap access type (mmap doesnt need to be 1:1 on VM_PROT_) */ prot = pv->pv_prot; cur_prot = VM_PROT_NONE; if (pv->pv_mmap_ppl & THUNK_PROT_READ) cur_prot |= VM_PROT_READ; if (pv->pv_mmap_ppl & THUNK_PROT_WRITE) cur_prot |= VM_PROT_WRITE; if (pv->pv_mmap_ppl & THUNK_PROT_EXEC) cur_prot |= VM_PROT_EXECUTE; diff = prot & (prot ^ cur_prot); thunk_printf_debug("%s: prot = %d, cur_prot = %d, diff = %d\n", __func__, prot, cur_prot, diff); *atype = VM_PROT_READ; /* assume its a read error */ if (diff & VM_PROT_READ) { if ((ppv->pv_pflags & PV_REFERENCED) == 0) { ppv->pv_pflags |= PV_REFERENCED; pmap_update_page(ppn); return true; } panic("pmap: page not readable but marked referenced?"); return false; } #if 0 /* this might be questionable */ if (diff & VM_PROT_EXECUTE) { *atype = VM_PROT_EXECUTE; /* assume it was executing */ if (prot & VM_PROT_EXECUTE) { if ((ppv->pv_pflags & PV_REFERENCED) == 0) { ppv->pv_pflags |= PV_REFERENCED; pmap_update_page(ppn); return true; } } return false; } #endif *atype = VM_PROT_WRITE; /* assume its a write error */ if (diff & VM_PROT_WRITE) { if (prot & VM_PROT_WRITE) { /* should be allowed to write */ if ((ppv->pv_pflags & PV_MODIFIED) == 0) { /* was marked unmodified */ ppv->pv_pflags |= PV_MODIFIED; pmap_update_page(ppn); return true; } } panic("pmap: page not writable but marked modified?"); return false; } /* not due to our r/m handling, let uvm handle it ! */ return false; } static void pmap_page_activate(struct pv_entry *pv) { paddr_t pa = pv->pv_ppn * PAGE_SIZE; vaddr_t va = pv->pv_lpn * PAGE_SIZE + VM_MIN_ADDRESS; /* L->V */ uint32_t map_flags; void *addr; map_flags = THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED; addr = thunk_mmap((void *) va, PAGE_SIZE, pv->pv_mmap_ppl, map_flags, mem_fh, pa); thunk_printf_debug("page_activate: (va %p, pa %p, prot %d, ppl %d) -> %p\n", (void *) va, (void *) pa, pv->pv_prot, pv->pv_mmap_ppl, (void *) addr); if (addr != (void *) va) panic("pmap_page_activate: mmap failed (expected %p got %p): %d", (void *)va, addr, thunk_geterrno()); tlb[pv->pv_lpn] = NULL; if (pv->pv_mmap_ppl != THUNK_PROT_NONE) tlb[pv->pv_lpn] = pv; } static void pmap_page_deactivate(struct pv_entry *pv) { paddr_t pa = pv->pv_ppn * PAGE_SIZE; vaddr_t va = pv->pv_lpn * PAGE_SIZE + VM_MIN_ADDRESS; /* L->V */ uint32_t map_flags; void *addr; /* don't try to unmap pv entries that are already unmapped */ if (!tlb[pv->pv_lpn]) return; if (tlb[pv->pv_lpn]->pv_mmap_ppl == THUNK_PROT_NONE) goto deactivate; map_flags = THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED; addr = thunk_mmap((void *) va, PAGE_SIZE, THUNK_PROT_NONE, map_flags, mem_fh, pa); thunk_printf_debug("page_deactivate: (va %p, pa %p, ppl %d) -> %p\n", (void *) va, (void *) pa, pv->pv_mmap_ppl, (void *) addr); if (addr != (void *) va) panic("pmap_page_deactivate: mmap failed"); deactivate: tlb[pv->pv_lpn] = NULL; } static void pv_update(struct pv_entry *pv) { int pflags, vflags; int mmap_ppl; /* get our per-physical-page flags */ pflags = pv_table[pv->pv_ppn].pv_pflags; vflags = pv_table[pv->pv_ppn].pv_vflags; KASSERT(THUNK_PROT_READ == VM_PROT_READ); KASSERT(THUNK_PROT_WRITE == VM_PROT_WRITE); KASSERT(THUNK_PROT_EXEC == VM_PROT_EXECUTE); /* create referenced/modified emulation */ if ((pv->pv_prot & VM_PROT_WRITE) && (pflags & PV_REFERENCED) && (pflags & PV_MODIFIED)) { mmap_ppl = THUNK_PROT_READ | THUNK_PROT_WRITE; } else if ((pv->pv_prot & (VM_PROT_READ | VM_PROT_EXECUTE)) && (pflags & PV_REFERENCED)) { mmap_ppl = THUNK_PROT_READ; if (pv->pv_prot & VM_PROT_EXECUTE) mmap_ppl |= THUNK_PROT_EXEC; } else { mmap_ppl = THUNK_PROT_NONE; } /* unmanaged or wired pages are special; they dont track r/m */ if (vflags & (PV_UNMANAGED | PV_WIRED)) mmap_ppl = THUNK_PROT_READ | THUNK_PROT_WRITE; pv->pv_mmap_ppl = mmap_ppl; } /* update mapping of a physical page */ static void pmap_update_page(uintptr_t ppn) { struct pv_entry *pv; for (pv = &pv_table[ppn]; pv != NULL; pv = pv->pv_next) { thunk_printf_debug("pmap_update_page: ppn %"PRIdPTR", pv->pv_map = %p\n", ppn, pv->pv_pmap); if (pv->pv_pmap != NULL) { pv_update(pv); if (pv->pv_pmap->pm_flags & PM_ACTIVE) pmap_page_activate(pv); else pmap_page_deactivate(pv) ; } } } static int pmap_do_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, uint flags, int unmanaged) { struct pv_entry *pv, *ppv; uintptr_t ppn, lpn; int s; /* to page numbers */ ppn = atop(pa); lpn = atop(va - VM_MIN_ADDRESS); /* V->L */ #ifdef DIAGNOSTIC if ((va < VM_MIN_ADDRESS) || (va > VM_MAX_KERNEL_ADDRESS)) panic("pmap_do_enter: invalid va issued\n"); #endif /* raise interrupt level */ s = splvm(); /* remove existing mapping at this lpn */ pv = pmap_lookup_pv(pmap, lpn); if (pv && pv->pv_ppn != ppn) pmap_remove(pmap, va, va + PAGE_SIZE); /* get our entry */ ppv = &pv_table[ppn]; pv = pv_get(pmap, ppn, lpn); /* get our (copy) of pv entry */ /* and adjust stats */ if (pv == NULL) panic("pamp_do_enter: didn't find pv entry!"); if (pv->pv_vflags & PV_WIRED) pmap->pm_stats.wired_count--; /* enter our details */ pv->pv_pmap = pmap; pv->pv_ppn = ppn; pv->pv_lpn = lpn; pv->pv_prot = prot; pv->pv_vflags = 0; /* pv->pv_next = NULL; */ /* might confuse linked list? */ if (flags & PMAP_WIRED) pv->pv_vflags |= PV_WIRED; if (unmanaged) { /* dont track r/m */ pv->pv_vflags |= PV_UNMANAGED; } else { if (flags & VM_PROT_WRITE) ppv->pv_pflags |= PV_REFERENCED | PV_MODIFIED; else if (flags & (VM_PROT_ALL)) ppv->pv_pflags |= PV_REFERENCED; } /* map it in */ pmap_update_page(ppn); pmap_set_pv(pmap, lpn, pv); /* adjust stats */ if (pv->pv_vflags & PV_WIRED) pmap->pm_stats.wired_count++; splx(s); /* activate page directly when on active pmap */ if (pmap->pm_flags & PM_ACTIVE) pmap_page_activate(pv); return 0; } int pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) { thunk_printf_debug("pmap_enter %p : v %p, p %p, prot %d, flags %d\n", (void *) pmap, (void *) va, (void *) pa, (int) prot, (int) flags); return pmap_do_enter(pmap, va, pa, prot, flags, 0); } /* release the pv_entry for a mapping. Code derived also from hp300 pmap */ static void pv_release(pmap_t pmap, uintptr_t ppn, uintptr_t lpn) { struct pv_entry *pv, *npv; thunk_printf_debug("pv_release ppn %"PRIdPTR", lpn %"PRIdPTR"\n", ppn, lpn); pv = &pv_table[ppn]; /* * If it is the first entry on the list, it is actually * in the header and we must copy the following entry up * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ if ((pmap == pv->pv_pmap) && (lpn == pv->pv_lpn)) { npv = pv->pv_next; if (npv) { /* pull up first entry from chain. */ memcpy(pv, npv, offsetof(struct pv_entry, pv_pflags)); pmap_set_pv(pv->pv_pmap, pv->pv_lpn, pv); pv_free(npv); } else { memset(pv, 0, offsetof(struct pv_entry, pv_pflags)); } } else { for (npv = pv->pv_next; npv; npv = npv->pv_next) { if ((pmap == npv->pv_pmap) && (lpn == npv->pv_lpn)) break; pv = npv; } KASSERT(npv != NULL); pv->pv_next = npv->pv_next; pv_free(npv); } pmap_set_pv(pmap, lpn, NULL); pmap->pm_stats.resident_count--; } void pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) { uintptr_t slpn, elpn, lpn; struct pv_entry *pv; int s; slpn = atop(sva - VM_MIN_ADDRESS); /* V->L */ elpn = atop(eva - VM_MIN_ADDRESS); /* V->L */ thunk_printf_debug("pmap_remove() called from " "lpn %"PRIdPTR" to lpn %"PRIdPTR"\n", slpn, elpn); s = splvm(); for (lpn = slpn; lpn < elpn; lpn++) { pv = pmap_lookup_pv(pmap, lpn); if (pv != NULL) { if (pmap->pm_flags & PM_ACTIVE) { pmap_page_deactivate(pv); // MEMC_WRITE(pv->pv_deactivate); // cpu_cache_flush(); } pmap_set_pv(pmap, lpn, NULL); if (pv->pv_vflags & PV_WIRED) pmap->pm_stats.wired_count--; pv_release(pmap, pv->pv_ppn, lpn); } } splx(s); } bool pmap_remove_all(pmap_t pmap) { /* just a hint that all the entries are to be removed */ thunk_printf_debug("pmap_remove_all() dummy called\n"); /* we dont do anything with the kernel pmap */ if (pmap == pmap_kernel()) return false; #if 0 /* remove all mappings in one-go; not needed */ pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); thunk_munmap((void *) VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS); #endif #if 0 /* remove all cached info from the pages */ thunk_msync(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS, THUNK_MS_SYNC | THUNK_MS_INVALIDATE); #endif return false; } void pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) { struct pv_entry *pv; intptr_t slpn, elpn, lpn; int s; if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; /* apparently we're meant to */ if (pmap == pmap_kernel()) return; /* can't restrict kernel w/o unmapping. */ slpn = atop(sva - VM_MIN_ADDRESS); /* V->L */ elpn = atop(eva - VM_MIN_ADDRESS); /* V->L */ thunk_printf_debug("pmap_protect() called from " "lpn %"PRIdPTR" to lpn %"PRIdPTR"\n", slpn, elpn); s = splvm(); for (lpn = slpn; lpn < elpn; lpn++) { pv = pmap_lookup_pv(pmap, lpn); if (pv != NULL) { pv->pv_prot &= prot; pv_update(pv); if (pv->pv_pmap->pm_flags & PM_ACTIVE) pmap_page_activate(pv); } } splx(s); } void pmap_unwire(pmap_t pmap, vaddr_t va) { struct pv_entry *pv; intptr_t lpn; thunk_printf_debug("pmap_unwire called va = %p\n", (void *) va); if (pmap == NULL) return; lpn = atop(va - VM_MIN_ADDRESS); /* V->L */ pv = pmap_lookup_pv(pmap, lpn); if (pv == NULL) return; /* but is it wired? */ if ((pv->pv_vflags & PV_WIRED) == 0) return; pmap->pm_stats.wired_count--; pv->pv_vflags &= ~PV_WIRED; /* XXX needed? */ pmap_update_page(pv->pv_ppn); } bool pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *ppa) { struct pv_entry *pv; intptr_t lpn; thunk_printf_debug("pmap_extract: extracting va %p\n", (void *) va); #ifdef DIAGNOSTIC if ((va < VM_MIN_ADDRESS) || (va > VM_MAX_KERNEL_ADDRESS)) { thunk_printf_debug("pmap_extract: invalid va issued\n"); thunk_printf("%p not in [%p, %p]\n", (void *) va, (void *) VM_MIN_ADDRESS, (void *) VM_MAX_KERNEL_ADDRESS); return false; } #endif lpn = atop(va - VM_MIN_ADDRESS); /* V->L */ pv = pmap_lookup_pv(pmap, lpn); if (pv == NULL) return false; if (ppa) *ppa = ptoa(pv->pv_ppn); return true; } /* * Enter an unmanaged, `wired' kernel mapping. * Only to be removed by pmap_kremove() */ void pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) { thunk_printf_debug("pmap_kenter_pa : v %p, p %p, prot %d, flags %d\n", (void *) va, (void *) pa, (int) prot, (int) flags); pmap_do_enter(pmap_kernel(), va, pa, prot, prot | PMAP_WIRED, 1); } void pmap_kremove(vaddr_t va, vsize_t size) { pmap_remove(pmap_kernel(), va, va + size); } void pmap_copy(pmap_t dst_map, pmap_t src_map, vaddr_t dst_addr, vsize_t len, vaddr_t src_addr) { thunk_printf_debug("pmap_copy (dummy)\n"); } void pmap_update(pmap_t pmap) { thunk_printf_debug("pmap_update (dummy)\n"); } void pmap_activate(struct lwp *l) { struct proc *p = l->l_proc; pmap_t pmap; pmap = p->p_vmspace->vm_map.pmap; thunk_printf_debug("pmap_activate for lwp %p, pmap = %p\n", l, pmap); if (pmap == pmap_kernel()) return; /* kernel pmap is always active */ KASSERT(active_pmap == NULL); KASSERT((pmap->pm_flags & PM_ACTIVE) == 0); active_pmap = pmap; pmap->pm_flags |= PM_ACTIVE; } void pmap_deactivate(struct lwp *l) { struct proc *p = l->l_proc; struct pv_entry *pv; struct pmap_l2 *l2tbl; pmap_t pmap; int l1, l2; pmap = p->p_vmspace->vm_map.pmap; thunk_printf_debug("pmap_DEactivate for lwp %p, pmap = %p\n", l, pmap); if (pmap == pmap_kernel()) return; /* kernel pmap is always active */ KASSERT(pmap == active_pmap); KASSERT(pmap->pm_flags & PM_ACTIVE); active_pmap = NULL; pmap->pm_flags &=~ PM_ACTIVE; for (l1 = 0; l1 < pm_nl1; l1++) { l2tbl = pmap->pm_l1[l1]; if (!l2tbl) continue; for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) { pv = l2tbl->pm_l2[l2]; if (pv) { pmap_page_deactivate(pv); // MEMC_WRITE(pmap->pm_entries[i]->pv_deactivate); } } } /* dummy */ // cpu_cache_flush(); } void pmap_zero_page(paddr_t pa) { char *blob; thunk_printf_debug("pmap_zero_page: pa %p\n", (void *) pa); if (pa & (PAGE_SIZE-1)) panic("%s: unaligned address passed : %p\n", __func__, (void *) pa); blob = thunk_mmap(pm_tmp_p0, PAGE_SIZE, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, mem_fh, pa); if (blob != pm_tmp_p0) panic("%s: couldn't get mapping", __func__); memset(blob, 0, PAGE_SIZE); thunk_munmap(blob, PAGE_SIZE); } void pmap_copy_page(paddr_t src_pa, paddr_t dst_pa) { char *sblob, *dblob; if (src_pa & (PAGE_SIZE-1)) panic("%s: unaligned address passed : %p\n", __func__, (void *) src_pa); if (dst_pa & (PAGE_SIZE-1)) panic("%s: unaligned address passed : %p\n", __func__, (void *) dst_pa); thunk_printf_debug("pmap_copy_page: pa src %p, pa dst %p\n", (void *) src_pa, (void *) dst_pa); /* source */ sblob = thunk_mmap(pm_tmp_p0, PAGE_SIZE, THUNK_PROT_READ, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, mem_fh, src_pa); if (sblob != pm_tmp_p0) panic("%s: couldn't get src mapping", __func__); /* destination */ dblob = thunk_mmap(pm_tmp_p1, PAGE_SIZE, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, mem_fh, dst_pa); if (dblob != pm_tmp_p1) panic("%s: couldn't get dst mapping", __func__); memcpy(dblob, sblob, PAGE_SIZE); thunk_munmap(sblob, PAGE_SIZE); thunk_munmap(dblob, PAGE_SIZE); } /* change access permissions on a given physical page */ void pmap_page_protect(struct vm_page *page, vm_prot_t prot) { intptr_t ppn; struct pv_entry *pv, *npv; ppn = atop(VM_PAGE_TO_PHYS(page)); thunk_printf_debug("pmap_page_protect page %"PRIiPTR" to prot %d\n", ppn, prot); if (prot == VM_PROT_NONE) { /* visit all mappings */ npv = pv = &pv_table[ppn]; while ((pv != NULL) && (pv->pv_pmap != NULL)) { /* skip unmanaged entries */ if (pv->pv_vflags & PV_UNMANAGED) { pv = pv->pv_next; continue; } /* if in an active pmap deactivate */ if (pv->pv_pmap->pm_flags & PM_ACTIVE) pmap_page_deactivate(pv); /* if not on the head, remember our next */ if (pv != &pv_table[ppn]) npv = pv->pv_next; /* remove from pmap */ pmap_set_pv(pv->pv_pmap, pv->pv_lpn, NULL); if (pv->pv_vflags & PV_WIRED) pv->pv_pmap->pm_stats.wired_count--; pv_release(pv->pv_pmap, ppn, pv->pv_lpn); pv = npv; } } else if (prot != VM_PROT_ALL) { /* visit all mappings */ for (pv = &pv_table[ppn]; pv != NULL; pv = pv->pv_next) { /* if managed and in a pmap restrict access */ if ((pv->pv_pmap != NULL) && ((pv->pv_vflags & PV_UNMANAGED) == 0)) { pv->pv_prot &= prot; pv_update(pv); /* if in active pmap (re)activate page */ if (pv->pv_pmap->pm_flags & PM_ACTIVE) pmap_page_activate(pv); } } } } bool pmap_clear_modify(struct vm_page *page) { struct pv_entry *pv; uintptr_t ppn; bool rv; ppn = atop(VM_PAGE_TO_PHYS(page)); rv = pmap_is_modified(page); thunk_printf_debug("pmap_clear_modify page %"PRIiPTR"\n", ppn); /* if marked modified, clear it in all the pmap's referencing it */ if (rv) { /* if its marked modified in a kernel mapping, don't clear it */ for (pv = &pv_table[ppn]; pv != NULL; pv = pv->pv_next) if (pv->pv_pmap == pmap_kernel() && (pv->pv_prot & VM_PROT_WRITE)) return rv; /* clear it */ pv_table[ppn].pv_pflags &= ~PV_MODIFIED; pmap_update_page(ppn); } return rv; } bool pmap_clear_reference(struct vm_page *page) { uintptr_t ppn; bool rv; ppn = atop(VM_PAGE_TO_PHYS(page)); rv = pmap_is_referenced(page); thunk_printf_debug("pmap_clear_reference page %"PRIiPTR"\n", ppn); if (rv) { pv_table[ppn].pv_pflags &= ~PV_REFERENCED; pmap_update_page(ppn); } return rv; } bool pmap_is_modified(struct vm_page *page) { intptr_t ppn; bool rv; ppn = atop(VM_PAGE_TO_PHYS(page)); rv = (pv_table[ppn].pv_pflags & PV_MODIFIED) != 0; thunk_printf_debug("pmap_is_modified page %"PRIiPTR" : %s\n", ppn, rv?"yes":"no"); return rv; } bool pmap_is_referenced(struct vm_page *page) { intptr_t ppn; ppn = atop(VM_PAGE_TO_PHYS(page)); thunk_printf_debug("pmap_is_referenced page %"PRIiPTR"\n", ppn); return (pv_table[ppn].pv_pflags & PV_REFERENCED) != 0; } paddr_t pmap_phys_address(paddr_t cookie) { return ptoa(cookie); } vaddr_t pmap_growkernel(vaddr_t maxkvaddr) { thunk_printf_debug("pmap_growkernel: till %p (adding %"PRIu64" KB)\n", (void *) maxkvaddr, (uint64_t) (maxkvaddr - kmem_kvm_cur_end)/1024); if (maxkvaddr > kmem_kvm_end) return kmem_kvm_end; kmem_kvm_cur_end = maxkvaddr; return kmem_kvm_cur_end; }