*/
/*
- * Copyright (c) 2009, 2010, 2011, ETH Zurich.
+ * Copyright (c) 2009-2013 ETH Zurich.
+ * Copyright (c) 2014 HP Labs.
* All rights reserved.
*
* This file is distributed under the terms in the attached LICENSE file.
* If you do not find this file, copies can be found by writing to:
- * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
+ * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
*/
#include <barrelfish/barrelfish.h>
return pmap_flags;
}
+// returns whether va1 and va2 share a page directory entry
+// not using X86_64_PDIR_BASE() macro as this would give false positives (same
+// entry in different directories)
static inline bool is_same_pdir(genvaddr_t va1, genvaddr_t va2)
{
- return (va1>>X86_64_LARGE_PAGE_BITS) == (va2>>X86_64_LARGE_PAGE_BITS);
+ return (va1>>X86_64_LARGE_PAGE_BITS) == ((va2-1)>>X86_64_LARGE_PAGE_BITS);
}
-static inline genvaddr_t get_addr_prefix(genvaddr_t va)
+// returns whether va1 and va2 share a page directory pointer table entry
+static inline bool is_same_pdpt(genvaddr_t va1, genvaddr_t va2)
{
- return va >> X86_64_LARGE_PAGE_BITS;
+ return (va1>>X86_64_HUGE_PAGE_BITS) == ((va2-1)>>X86_64_HUGE_PAGE_BITS);
}
-static bool has_vnode(struct vnode *root, uint32_t entry, size_t len)
+// returns whether va1 and va2 share a page map level 4 entry
+static inline bool is_same_pml4(genvaddr_t va1, genvaddr_t va2)
{
- assert(root != NULL);
- assert(root->is_vnode);
- struct vnode *n;
-
- uint32_t end_entry = entry + len;
-
- for (n = root->u.vnode.children; n; n = n->next) {
- if (n->is_vnode && n->entry == entry) {
- return true;
- }
- // n is frame
- uint32_t end = n->entry + n->u.frame.pte_count;
- if (n->entry < entry && end > end_entry) {
- return true;
- }
- if (n->entry >= entry && n->entry < end_entry) {
- return true;
- }
- }
-
- return false;
+ // the base macros work here as we only have one pml4.
+ return X86_64_PML4_BASE(va1) == X86_64_PML4_BASE(va2-1);
}
-
-/**
- * \brief Starting at a given root, return the vnode with starting entry equal to #entry
- */
-static struct vnode *find_vnode(struct vnode *root, uint32_t entry)
+// size indicates how many bits to shift
+static inline genvaddr_t get_addr_prefix(genvaddr_t va, uint8_t size)
{
- assert(root != NULL);
- assert(root->is_vnode);
- struct vnode *n;
-
- for(n = root->u.vnode.children; n != NULL; n = n->next) {
- if(n->entry == entry) {
- return n;
- }
- }
- return NULL;
+ return va >> size;
}
-static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
+/**
+ * \brief Returns the vnode for the pdpt mapping a given vspace address
+ */
+static inline errval_t get_pdpt(struct pmap_x86 *pmap, genvaddr_t base,
+ struct vnode **pdpt)
{
+ errval_t err;
+ struct vnode *root = &pmap->root;
assert(root != NULL);
- assert(root->is_vnode);
-
- struct vnode *n;
- for (n = root->u.vnode.children; n; n = n->next) {
- if (!n->is_vnode) {
- uint16_t end = n->entry + n->u.frame.pte_count;
- if (n->entry <= entry && entry + npages <= end) {
- return true;
- }
+ // PML4 mapping
+ if((*pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
+ err = alloc_vnode(pmap, root, ObjType_VNode_x86_64_pdpt,
+ X86_64_PML4_BASE(base), pdpt);
+ if (err_is_fail(err)) {
+ return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
}
}
- return false;
-}
-
-static void remove_vnode(struct vnode *root, struct vnode *item)
-{
- assert(root->is_vnode);
- struct vnode *walk = root->u.vnode.children;
- struct vnode *prev = NULL;
- while (walk) {
- if (walk == item) {
- if (prev) {
- prev->next = walk->next;
- return;
- } else {
- root->u.vnode.children = walk->next;
- return;
- }
- }
- prev = walk;
- walk = walk->next;
- }
- assert(!"Should not get here");
+ return SYS_ERR_OK;
}
/**
- * \brief Allocates a new VNode, adding it to the page table and our metadata
+ * \brief Returns the vnode for the page directory mapping a given vspace
+ * address
*/
-static errval_t alloc_vnode(struct pmap_x86 *pmap, struct vnode *root,
- enum objtype type, uint32_t entry,
- struct vnode **retvnode)
+static inline errval_t get_pdir(struct pmap_x86 *pmap, genvaddr_t base,
+ struct vnode **pdir)
{
errval_t err;
-
- struct vnode *newvnode = slab_alloc(&pmap->slab);
- if (newvnode == NULL) {
- return LIB_ERR_SLAB_ALLOC_FAIL;
- }
-
- // The VNode capability
- err = pmap->p.slot_alloc->alloc(pmap->p.slot_alloc, &newvnode->u.vnode.cap);
+ struct vnode *pdpt;
+ err = get_pdpt(pmap, base, &pdpt);
if (err_is_fail(err)) {
- return err_push(err, LIB_ERR_SLOT_ALLOC);
+ return err;
}
+ assert(pdpt != NULL);
- err = vnode_create(newvnode->u.vnode.cap, type);
- if (err_is_fail(err)) {
- return err_push(err, LIB_ERR_VNODE_CREATE);
- }
-
- // Map it
- //printf("\talloc_vnode calling vnode_map()\n");
- err = vnode_map(root->u.vnode.cap, newvnode->u.vnode.cap, entry,
- PTABLE_ACCESS_DEFAULT, 0, 1);
- if (err_is_fail(err)) {
- return err_push(err, LIB_ERR_VNODE_MAP);
+ // PDPT mapping
+ if((*pdir = find_vnode(pdpt, X86_64_PDPT_BASE(base))) == NULL) {
+ err = alloc_vnode(pmap, pdpt, ObjType_VNode_x86_64_pdir,
+ X86_64_PDPT_BASE(base), pdir);
+ if (err_is_fail(err)) {
+ printf("failure mapping pdpt\n");
+ return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
+ }
}
- // The VNode meta data
- newvnode->is_vnode = true;
- newvnode->entry = entry;
- newvnode->next = root->u.vnode.children;
- root->u.vnode.children = newvnode;
- newvnode->u.vnode.children = NULL;
-
- *retvnode = newvnode;
return SYS_ERR_OK;
}
/**
* \brief Returns the vnode for the pagetable mapping a given vspace address
*/
-static errval_t get_ptable(struct pmap_x86 *pmap, genvaddr_t base,
- struct vnode **ptable)
+static inline errval_t get_ptable(struct pmap_x86 *pmap, genvaddr_t base,
+ struct vnode **ptable)
{
errval_t err;
- struct vnode *root = &pmap->root;
- struct vnode *pdpt, *pdir;
- assert(root != NULL);
-
- // PML4 mapping
- if((pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
- err = alloc_vnode(pmap, root, ObjType_VNode_x86_64_pdpt,
- X86_64_PML4_BASE(base), &pdpt);
- if (err_is_fail(err)) {
- return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
- }
- }
-
- // PDPT mapping
- if((pdir = find_vnode(pdpt, X86_64_PDPT_BASE(base))) == NULL) {
- err = alloc_vnode(pmap, pdpt, ObjType_VNode_x86_64_pdir,
- X86_64_PDPT_BASE(base), &pdir);
- if (err_is_fail(err)) {
- return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
- }
+ struct vnode *pdir;
+ err = get_pdir(pmap, base, &pdir);
+ if (err_is_fail(err)) {
+ return err;
}
+ assert(pdir != NULL);
// PDIR mapping
if((*ptable = find_vnode(pdir, X86_64_PDIR_BASE(base))) == NULL) {
return SYS_ERR_OK;
}
-
/**
- * \brief Returns the vnode for the pagetable mapping a given vspace address,
- * without performing allocations as get_ptable() does
+ * \brief Returns the vnode for the page directory pointer table mapping for a
+ * given vspace address
*/
-static struct vnode *find_ptable(struct pmap_x86 *pmap, genvaddr_t base)
+static inline struct vnode *find_pdpt(struct pmap_x86 *pmap, genvaddr_t base)
{
struct vnode *root = &pmap->root;
- struct vnode *pdpt, *pdir;
assert(root != NULL);
- // PML4 mapping
- if((pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
+ // PDPT mapping
+ return find_vnode(root, X86_64_PML4_BASE(base));
+}
+
+/**
+ * \brief Returns the vnode for the page directory mapping a given vspace
+ * address, without performing allocations as get_pdir() does
+ */
+static inline struct vnode *find_pdir(struct pmap_x86 *pmap, genvaddr_t base)
+{
+ struct vnode *pdpt = find_pdpt(pmap, base);
+
+ if (pdpt) {
+ // PDPT mapping
+ return find_vnode(pdpt, X86_64_PDPT_BASE(base));
+ } else {
return NULL;
}
+}
- // PDPT mapping
- if((pdir = find_vnode(pdpt, X86_64_PDPT_BASE(base))) == NULL) {
+/**
+ * \brief Returns the vnode for the pagetable mapping a given vspace address,
+ * without performing allocations as get_ptable() does
+ */
+static inline struct vnode *find_ptable(struct pmap_x86 *pmap, genvaddr_t base)
+{
+ struct vnode *pdir = find_pdir(pmap, base);
+
+ if (pdir) {
+ // PDIR mapping
+ return find_vnode(pdir, X86_64_PDIR_BASE(base));
+ } else {
return NULL;
}
-
- // PDIR mapping
- return find_vnode(pdir, X86_64_PDIR_BASE(base));
}
-static errval_t do_single_map(struct pmap_x86 *pmap, genvaddr_t vaddr, genvaddr_t vend,
- struct capref frame, size_t offset, size_t pte_count,
+static errval_t do_single_map(struct pmap_x86 *pmap, genvaddr_t vaddr,
+ genvaddr_t vend, struct capref frame,
+ size_t offset, size_t pte_count,
vregion_flags_t flags)
{
+ if (pte_count == 0) {
+ debug_printf("do_single_map: pte_count == 0, called from %p\n",
+ __builtin_return_address(0));
+ return SYS_ERR_OK;
+ }
+ assert(pte_count > 0);
// translate flags
paging_x86_64_flags_t pmap_flags = vregion_to_pmap_flag(flags);
- // Get the page table
+ // Get the paging structure and set paging relevant parameters
struct vnode *ptable;
- errval_t err = get_ptable(pmap, vaddr, &ptable);
+ errval_t err;
+ size_t table_base;
+
+ // get the right paging table and address part
+ if(flags & VREGION_FLAGS_LARGE) {
+ //large 2M pages, mapped into pdir
+ err = get_pdir(pmap, vaddr, &ptable);
+ table_base = X86_64_PDIR_BASE(vaddr);
+ } else if (flags & VREGION_FLAGS_HUGE) {
+ //huge 1GB pages, mapped into pdpt
+ err = get_pdpt(pmap, vaddr, &ptable);
+ table_base = X86_64_PDPT_BASE(vaddr);
+ } else {
+ //normal 4K pages, mapped into ptable
+ err = get_ptable(pmap, vaddr, &ptable);
+ table_base = X86_64_PTABLE_BASE(vaddr);
+ }
if (err_is_fail(err)) {
return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
}
+ assert(ptable->is_vnode);
// check if there is an overlapping mapping
- if (has_vnode(ptable, X86_64_PTABLE_BASE(vaddr), pte_count)) {
- printf("page already exists in 0x%"PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
- return LIB_ERR_PMAP_EXISTING_MAPPING;
+ if (has_vnode(ptable, table_base, pte_count, false)) {
+ if (has_vnode(ptable, table_base, pte_count, true)) {
+ printf("page already exists in 0x%"
+ PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
+ return LIB_ERR_PMAP_EXISTING_MAPPING;
+ } else {
+ // clean out empty page tables. We do this here because we benefit
+ // from having the page tables in place when doing lots of small
+ // mappings
+ remove_empty_vnodes(pmap, ptable, table_base, pte_count);
+ }
}
// setup userspace mapping
struct vnode *page = slab_alloc(&pmap->slab);
assert(page);
page->is_vnode = false;
- page->entry = X86_64_PTABLE_BASE(vaddr);
+ page->entry = table_base;
page->next = ptable->u.vnode.children;
ptable->u.vnode.children = page;
page->u.frame.cap = frame;
page->u.frame.pte_count = pte_count;
// do map
- err = vnode_map(ptable->u.vnode.cap, frame, X86_64_PTABLE_BASE(vaddr),
+ err = vnode_map(ptable->u.vnode.cap, frame, table_base,
pmap_flags, offset, pte_count);
if (err_is_fail(err)) {
return err_push(err, LIB_ERR_VNODE_MAP);
{
errval_t err;
- size = ROUND_UP(size, X86_64_BASE_PAGE_SIZE);
- size_t pte_count = DIVIDE_ROUND_UP(size, X86_64_BASE_PAGE_SIZE);
+ // determine page size and relevant address part
+ size_t page_size = X86_64_BASE_PAGE_SIZE;
+ size_t table_base = X86_64_PTABLE_BASE(vaddr);
+ uint8_t map_bits = X86_64_BASE_PAGE_BITS + X86_64_PTABLE_BITS;
+ bool debug_out = false;
+
+ // get base address and size of frame
+ struct frame_identity fi;
+ err = invoke_frame_identify(frame, &fi);
+ if (err_is_fail(err)) {
+ return err_push(err, LIB_ERR_PMAP_DO_MAP);
+ }
+
+ if ((flags & VREGION_FLAGS_HUGE) &&
+ (vaddr & X86_64_HUGE_PAGE_MASK) == 0 &&
+ fi.bits >= X86_64_HUGE_PAGE_BITS &&
+ ((fi.base & X86_64_HUGE_PAGE_MASK) == 0))
+ {
+ // huge page branch (1GB)
+ page_size = X86_64_HUGE_PAGE_SIZE;
+ table_base = X86_64_PDPT_BASE(vaddr);
+ map_bits = X86_64_HUGE_PAGE_BITS + X86_64_PTABLE_BITS;
+ debug_out = false;
+ // remove large flag, if we're doing huge mapping
+ flags &= ~VREGION_FLAGS_LARGE;
+ } else if ((flags & VREGION_FLAGS_LARGE) &&
+ (vaddr & X86_64_LARGE_PAGE_MASK) == 0 &&
+ fi.bits >= X86_64_LARGE_PAGE_BITS &&
+ ((fi.base & X86_64_LARGE_PAGE_MASK) == 0))
+ {
+ // large page branch (2MB)
+ page_size = X86_64_LARGE_PAGE_SIZE;
+ table_base = X86_64_PDIR_BASE(vaddr);
+ map_bits = X86_64_LARGE_PAGE_BITS + X86_64_PTABLE_BITS;
+ debug_out = false;
+ } else {
+ // remove large/huge flags
+ flags &= ~(VREGION_FLAGS_LARGE|VREGION_FLAGS_HUGE);
+ }
+
+ // round to the next full page and calculate end address and #ptes
+ size = ROUND_UP(size, page_size);
+ size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
genvaddr_t vend = vaddr + size;
+ if (offset+size > (1ULL<<fi.bits)) {
+ debug_printf("do_map: offset=%zu; size=%zu; frame size=%zu\n",
+ offset, size, ((size_t)1<<fi.bits));
+ return LIB_ERR_PMAP_FRAME_SIZE;
+ }
+
#if 0
- struct frame_identity fi;
- invoke_frame_identify(frame, &fi);
- genpaddr_t paddr = fi.base + offset;
+ if (true || debug_out) {
+ genpaddr_t paddr = fi.base + offset;
- debug_printf("do_map: 0x%"
- PRIxGENVADDR"--0x%"PRIxGENVADDR" -> 0x%"PRIxGENPADDR
- "; pte_count = %zd; frame bits = %zd\n", vaddr, vend, paddr,
- pte_count, (size_t)fi.bits);
+ debug_printf("do_map: 0x%"
+ PRIxGENVADDR"--0x%"PRIxGENVADDR" -> 0x%"PRIxGENPADDR
+ "; pte_count = %zd; frame bits = %zd; page size = 0x%zx\n",
+ vaddr, vend, paddr, pte_count, (size_t)fi.bits, page_size);
+ }
#endif
-
- if (is_same_pdir(vaddr, vend)) {
+ // all mapping on one leaf table?
+ if (is_same_pdir(vaddr, vend) ||
+ (flags & VREGION_FLAGS_LARGE && is_same_pdpt(vaddr, vend)) ||
+ (flags & VREGION_FLAGS_HUGE && is_same_pml4(vaddr, vend))) {
// fast path
- //debug_printf("do_map: fast path: %zd\n", pte_count);
+ if (debug_out) {
+ debug_printf(" do_map: fast path: %zd\n", pte_count);
+ }
err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
if (err_is_fail(err)) {
return err_push(err, LIB_ERR_PMAP_DO_MAP);
}
- } else { // multiple leaf page tables
+ }
+ else { // multiple leaf page tables
// first leaf
- uint32_t c = X86_64_PTABLE_SIZE - X86_64_PTABLE_BASE(vaddr);
- //debug_printf("do_map: slow path: first leaf %"PRIu32"\n", c);
- genvaddr_t temp_end = vaddr + c * X86_64_BASE_PAGE_SIZE;
+ uint32_t c = X86_64_PTABLE_SIZE - table_base;
+ if (debug_out) {
+ debug_printf(" do_map: slow path: first leaf %"PRIu32"\n", c);
+ }
+ genvaddr_t temp_end = vaddr + c * page_size;
err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
if (err_is_fail(err)) {
return err_push(err, LIB_ERR_PMAP_DO_MAP);
}
// map full leaves
- while (get_addr_prefix(temp_end) < get_addr_prefix(vend)) {
+ while (get_addr_prefix(temp_end, map_bits) <
+ get_addr_prefix(vend, map_bits))
+ {
// update vars
vaddr = temp_end;
- temp_end = vaddr + X86_64_PTABLE_SIZE * X86_64_BASE_PAGE_SIZE;
- offset += c * X86_64_BASE_PAGE_SIZE;
+ temp_end = vaddr + X86_64_PTABLE_SIZE * page_size;
+ offset += c * page_size;
c = X86_64_PTABLE_SIZE;
// copy cap
struct capref next;
frame = next;
// do mapping
- //debug_printf("do_map: slow path: full leaf %d\n", X86_64_PTABLE_SIZE);
- err = do_single_map(pmap, vaddr, temp_end, frame, offset, X86_64_PTABLE_SIZE, flags);
+ if (debug_out) {
+ debug_printf(" do_map: slow path: full leaf\n");
+ }
+ err = do_single_map(pmap, vaddr, temp_end, frame, offset,
+ X86_64_PTABLE_SIZE, flags);
if (err_is_fail(err)) {
return err_push(err, LIB_ERR_PMAP_DO_MAP);
}
}
// map remaining part
- offset += c * X86_64_BASE_PAGE_SIZE;
- c = X86_64_PTABLE_BASE(vend) - X86_64_PTABLE_BASE(temp_end);
+ offset += c * page_size;
+
+ // calculate remaining pages (subtract ptable bits from map_bits to
+ // get #ptes of last-level instead of 2nd-to-last).
+ c = get_addr_prefix(vend, map_bits-X86_64_PTABLE_BITS) -
+ get_addr_prefix(temp_end, map_bits-X86_64_PTABLE_BITS);
+
if (c) {
// copy cap
struct capref next;
}
// do mapping
- //debug_printf("do_map: slow path: last leaf %"PRIu32"\n", c);
+ if (debug_out) {
+ debug_printf("do_map: slow path: last leaf %"PRIu32"\n", c);
+ }
err = do_single_map(pmap, temp_end, vend, next, offset, c, flags);
if (err_is_fail(err)) {
return err_push(err, LIB_ERR_PMAP_DO_MAP);
return max_pages + max_ptable + max_pdir + max_pdpt;
}
+static size_t max_slabs_for_mapping_large(size_t bytes)
+{
+ size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_LARGE_PAGE_SIZE);
+ size_t max_pdir = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
+ size_t max_pdpt = DIVIDE_ROUND_UP(max_pdir, X86_64_PTABLE_SIZE);
+ return max_pages + max_pdir + max_pdpt;
+}
+
+static size_t max_slabs_for_mapping_huge(size_t bytes)
+{
+ size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_HUGE_PAGE_SIZE);
+ size_t max_pdpt = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
+ return max_pages + max_pdpt;
+}
+
/**
* \brief Refill slabs used for metadata
*
/* Grow the slab */
lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
- slab_grow(&pmap->slab, (void*)buf, bytes);
+ slab_grow(&pmap->slab, (void*)buf, bytes);
}
return SYS_ERR_OK;
errval_t err;
struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
+ struct frame_identity fi;
+ err = invoke_frame_identify(frame, &fi);
+ if (err_is_fail(err)) {
+ return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
+ }
+
+ size_t max_slabs;
// Adjust the parameters to page boundaries
- size += BASE_PAGE_OFFSET(offset);
- size = ROUND_UP(size, BASE_PAGE_SIZE);
- offset -= BASE_PAGE_OFFSET(offset);
+ // TODO: overestimating needed slabs shouldn't hurt much in the long run,
+ // and would keep the code easier to read and possibly faster due to less
+ // branching
+ if ((flags & VREGION_FLAGS_LARGE) &&
+ (vaddr & X86_64_LARGE_PAGE_MASK) == 0 &&
+ (fi.base & X86_64_LARGE_PAGE_MASK) == 0 &&
+ (1UL<<fi.bits) >= offset+size) {
+ //case large pages (2MB)
+ size += LARGE_PAGE_OFFSET(offset);
+ size = ROUND_UP(size, LARGE_PAGE_SIZE);
+ offset -= LARGE_PAGE_OFFSET(offset);
+ max_slabs = max_slabs_for_mapping_large(size);
+ } else if ((flags & VREGION_FLAGS_HUGE) &&
+ (vaddr & X86_64_HUGE_PAGE_MASK) == 0 &&
+ (fi.base & X86_64_HUGE_PAGE_MASK) == 0 &&
+ (1UL<<fi.bits) >= offset+size) {
+ // case huge pages (1GB)
+ size += HUGE_PAGE_OFFSET(offset);
+ size = ROUND_UP(size, HUGE_PAGE_SIZE);
+ offset -= HUGE_PAGE_OFFSET(offset);
+ max_slabs = max_slabs_for_mapping_huge(size);
+ } else {
+ //case normal pages (4KB)
+ size += BASE_PAGE_OFFSET(offset);
+ size = ROUND_UP(size, BASE_PAGE_SIZE);
+ offset -= BASE_PAGE_OFFSET(offset);
+ max_slabs = max_slabs_for_mapping(size);
+ }
// Refill slab allocator if necessary
size_t slabs_free = slab_freecount(&x86->slab);
- size_t max_slabs = max_slabs_for_mapping(size);
+
max_slabs += 5; // minimum amount required to map a page
- if (slabs_free < max_slabs) {
+ if (slabs_free < max_slabs) {
struct pmap *mypmap = get_current_pmap();
if (pmap == mypmap) {
err = refill_slabs(x86, max_slabs);
return err;
}
-static errval_t do_single_unmap(struct pmap_x86 *pmap, genvaddr_t vaddr, size_t pte_count, bool delete_cap)
+/**
+ * \brief Find mapping for `vaddr` in `pmap`.
+ * \arg pmap the pmap to search in
+ * \arg vaddr the virtual address to search for
+ * \arg pt the last-level page table meta-data we found if any
+ * \arg page the page meta-data we found if any
+ * \returns `true` iff we found a mapping for vaddr
+ */
+static bool find_mapping(struct pmap_x86 *pmap, genvaddr_t vaddr,
+ struct vnode **outpt, struct vnode **outpage)
{
- errval_t err;
- struct vnode *pt = find_ptable(pmap, vaddr);
- if (pt) {
- struct vnode *page = find_vnode(pt, X86_64_PTABLE_BASE(vaddr));
- if (page && page->u.frame.pte_count == pte_count) {
- err = vnode_unmap(pt->u.vnode.cap, page->u.frame.cap, page->entry, page->u.frame.pte_count);
- if (err_is_fail(err)) {
- printf("vnode_unmap returned error: %s (%d)\n", err_getstring(err), err_no(err));
- return err_push(err, LIB_ERR_VNODE_UNMAP);
+ struct vnode *pdpt = NULL, *pdir = NULL, *pt = NULL, *page = NULL;
+
+ // find page and last-level page table (can be pdir or pdpt)
+ if ((pdpt = find_pdpt(pmap, vaddr)) != NULL) {
+ page = find_vnode(pdpt, X86_64_PDPT_BASE(vaddr));
+ if (page && page->is_vnode) { // not 1G pages
+ pdir = page;
+ page = find_vnode(pdir, X86_64_PDIR_BASE(vaddr));
+ if (page && page->is_vnode) { // not 2M pages
+ pt = page;
+ page = find_vnode(pt, X86_64_PTABLE_BASE(vaddr));
+ } else if (page) {
+ pt = pdir;
}
+ } else if (page) {
+ pt = pdpt;
+ }
+ }
+ if (outpt) {
+ *outpt = pt;
+ }
+ if (outpage) {
+ *outpage = page;
+ }
+ if (pt && page) {
+ return true;
+ } else {
+ return false;
+ }
+}
- // Free up the resources
- if (delete_cap) {
- err = cap_destroy(page->u.frame.cap);
- if (err_is_fail(err)) {
- return err_push(err, LIB_ERR_PMAP_DO_SINGLE_UNMAP);
- }
- }
- remove_vnode(pt, page);
- slab_free(&pmap->slab, page);
+static errval_t do_single_unmap(struct pmap_x86 *pmap, genvaddr_t vaddr,
+ size_t pte_count, bool delete_cap)
+{
+ errval_t err;
+ struct vnode *pt = NULL, *page = NULL;
+
+ if (!find_mapping(pmap, vaddr, &pt, &page)) {
+ return LIB_ERR_PMAP_FIND_VNODE;
+ }
+ assert(pt && pt->is_vnode && page && !page->is_vnode);
+
+ if (page->u.frame.pte_count == pte_count) {
+ err = vnode_unmap(pt->u.vnode.cap, page->u.frame.cap, page->entry,
+ page->u.frame.pte_count);
+ if (err_is_fail(err)) {
+ printf("vnode_unmap returned error: %s (%d)\n",
+ err_getstring(err), err_no(err));
+ return err_push(err, LIB_ERR_VNODE_UNMAP);
}
- else {
- return LIB_ERR_PMAP_FIND_VNODE;
+
+ // Free up the resources
+ if (delete_cap) {
+ err = cap_destroy(page->u.frame.cap);
+ if (err_is_fail(err)) {
+ printf("delete_cap\n");
+ return err_push(err, LIB_ERR_PMAP_DO_SINGLE_UNMAP);
+ }
}
+ remove_vnode(pt, page);
+ slab_free(&pmap->slab, page);
}
return SYS_ERR_OK;
}
+static inline bool is_large_page(struct vnode *p)
+{
+ return !p->is_vnode && p->u.frame.flags & VREGION_FLAGS_LARGE;
+}
+static inline bool is_huge_page(struct vnode *p)
+{
+ return !p->is_vnode && p->u.frame.flags & VREGION_FLAGS_HUGE;
+}
+
/**
* \brief Remove page mappings
*
//printf("[unmap] 0x%"PRIxGENVADDR", %zu\n", vaddr, size);
errval_t err, ret = SYS_ERR_OK;
struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
- size = ROUND_UP(size, X86_64_BASE_PAGE_SIZE);
+
+ //determine if we unmap a larger page
+ struct vnode* page = NULL;
+
+ if (!find_mapping(x86, vaddr, NULL, &page)) {
+ //TODO: better error --> LIB_ERR_PMAP_NOT_MAPPED
+ return LIB_ERR_PMAP_UNMAP;
+ }
+
+ assert(!page->is_vnode);
+
+ size_t page_size = X86_64_BASE_PAGE_SIZE;
+ size_t table_base = X86_64_PTABLE_BASE(vaddr);
+ uint8_t map_bits= X86_64_BASE_PAGE_BITS + X86_64_PTABLE_BITS;
+ if (is_large_page(page)) {
+ //large 2M page
+ page_size = X86_64_LARGE_PAGE_SIZE;
+ table_base = X86_64_PDIR_BASE(vaddr);
+ map_bits = X86_64_LARGE_PAGE_BITS + X86_64_PTABLE_BITS;
+ } else if (is_huge_page(page)) {
+ //huge 1GB page
+ page_size = X86_64_HUGE_PAGE_SIZE;
+ table_base = X86_64_PDPT_BASE(vaddr);
+ map_bits = X86_64_HUGE_PAGE_BITS + X86_64_PTABLE_BITS;
+ }
+
+ // TODO: match new policy of map when implemented
+ size = ROUND_UP(size, page_size);
genvaddr_t vend = vaddr + size;
- if (is_same_pdir(vaddr, vend)) {
+ if (is_same_pdir(vaddr, vend) ||
+ (is_same_pdpt(vaddr, vend) && is_large_page(page)) ||
+ (is_same_pml4(vaddr, vend) && is_huge_page(page)))
+ {
// fast path
- err = do_single_unmap(x86, vaddr, size / X86_64_BASE_PAGE_SIZE, false);
- if (err_is_fail(err)) {
+ err = do_single_unmap(x86, vaddr, size / page_size, false);
+ if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
+ printf("error fast path\n");
return err_push(err, LIB_ERR_PMAP_UNMAP);
}
}
else { // slow path
// unmap first leaf
- uint32_t c = X86_64_PTABLE_SIZE - X86_64_PTABLE_BASE(vaddr);
+ uint32_t c = X86_64_PTABLE_SIZE - table_base;
+
err = do_single_unmap(x86, vaddr, c, false);
- if (err_is_fail(err)) {
+ if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
+ printf("error first leaf\n");
return err_push(err, LIB_ERR_PMAP_UNMAP);
}
// unmap full leaves
- vaddr += c * X86_64_BASE_PAGE_SIZE;
- while (get_addr_prefix(vaddr) < get_addr_prefix(vend)) {
+ vaddr += c * page_size;
+ while (get_addr_prefix(vaddr, map_bits) < get_addr_prefix(vend, map_bits)) {
c = X86_64_PTABLE_SIZE;
err = do_single_unmap(x86, vaddr, X86_64_PTABLE_SIZE, true);
- if (err_is_fail(err)) {
+ if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
+ printf("error while loop\n");
return err_push(err, LIB_ERR_PMAP_UNMAP);
}
- vaddr += c * X86_64_BASE_PAGE_SIZE;
+ vaddr += c * page_size;
}
// unmap remaining part
- c = X86_64_PTABLE_BASE(vend) - X86_64_PTABLE_BASE(vaddr);
+ // subtracting ptable bits from map_bits to get #ptes in last-level table
+ // instead of 2nd-to-last.
+ c = get_addr_prefix(vend, map_bits-X86_64_PTABLE_BITS) -
+ get_addr_prefix(vaddr, map_bits-X86_64_PTABLE_BITS);
+ assert(c < X86_64_PTABLE_SIZE);
if (c) {
err = do_single_unmap(x86, vaddr, c, true);
- if (err_is_fail(err)) {
+ if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
+ printf("error remaining part\n");
return err_push(err, LIB_ERR_PMAP_UNMAP);
}
}
// we're modifying part of a valid mapped region
// arguments to invocation: invoke frame cap, first affected
// page (as offset from first page in mapping), #affected
- // pages, new flags. Invocation should check compatibility of
- // new set of flags with cap permissions.
+ // pages, new flags. Invocation mask flags based on capability
+ // access permissions.
size_t off = ptentry - page->entry;
paging_x86_64_flags_t pmap_flags = vregion_to_pmap_flag(flags);
err = invoke_frame_modify_flags(page->u.frame.cap, off, pages, pmap_flags);
// vaddr and vend specify begin and end of the region (inside a mapping)
// that should receive the new set of flags
+ //
+ // TODO: figure out page_size etc of original mapping
+ uint8_t map_bits = X86_64_BASE_PAGE_BITS + X86_64_PTABLE_BITS;
if (is_same_pdir(vaddr, vend)) {
// fast path
// modify full leaves
vaddr += c * X86_64_BASE_PAGE_SIZE;
- while (get_addr_prefix(vaddr) < get_addr_prefix(vend)) {
+ while (get_addr_prefix(vaddr, map_bits) < get_addr_prefix(vend, map_bits)) {
c = X86_64_PTABLE_SIZE;
err = do_single_modify_flags(x86, vaddr, X86_64_PTABLE_SIZE, flags);
if (err_is_fail(err)) {
{
struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
+ uint32_t base = X86_64_PTABLE_BASE(vaddr);
// Find the page table
struct vnode *ptable = find_ptable(x86, vaddr);
if (ptable == NULL) {
- return LIB_ERR_PMAP_FIND_VNODE;
+ //mapped in pdir?
+ ptable = find_pdir(x86, vaddr);
+ if (ptable == NULL) {
+ return LIB_ERR_PMAP_FIND_VNODE;
+ }
+ base = X86_64_PDIR_BASE(vaddr);
}
// Find the page
- struct vnode *vn = find_vnode(ptable, X86_64_PTABLE_BASE(vaddr));
+ struct vnode *vn = find_vnode(ptable, base);
if (vn == NULL) {
return LIB_ERR_PMAP_FIND_VNODE;
}
return SYS_ERR_OK;
}
+
+
static errval_t dump(struct pmap *pmap, struct pmap_dump_info *buf, size_t buflen, size_t *items_written)
{
struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
return SYS_ERR_OK;
}
+static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
+ size_t alignment, genvaddr_t *retvaddr)
+{
+ struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
+
+ struct vnode *walk_pml4 = x86->root.u.vnode.children;
+ assert(walk_pml4 != NULL); // assume there's always at least one existing entry
+
+ if (alignment == 0) {
+ alignment = BASE_PAGE_SIZE;
+ } else {
+ alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
+ }
+ size = ROUND_UP(size, alignment);
+ assert(size < 512ul * 1024 * 1024 * 1024); // pml4 size
+
+ // try to find free pml4 entry
+ bool f[512];
+ for (int i = 0; i < 512; i++) {
+ f[i] = true;
+ }
+ //debug_printf("entry: %d\n", walk_pml4->entry);
+ f[walk_pml4->entry] = false;
+ while (walk_pml4) {
+ //debug_printf("looping over pml4 entries\n");
+ assert(walk_pml4->is_vnode);
+ f[walk_pml4->entry] = false;
+ walk_pml4 = walk_pml4->next;
+ }
+ genvaddr_t first_free = 16;
+ for (; first_free < 512; first_free++) {
+ //debug_printf("f[%"PRIuGENVADDR"] = %d\n", first_free, f[first_free]);
+ if (f[first_free]) {
+ break;
+ }
+ }
+ //debug_printf("first_free: %"PRIuGENVADDR"\n", first_free);
+ if (first_free < 512) {
+ //debug_printf("first_free: %"PRIuGENVADDR"\n", first_free);
+ *retvaddr = first_free << 39;
+ return SYS_ERR_OK;
+ } else {
+ return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
+ }
+}
+
static struct pmap_funcs pmap_funcs = {
.determine_addr = pmap_x86_determine_addr,
+ .determine_addr_raw = determine_addr_raw,
.map = map,
.unmap = unmap,
.lookup = lookup,
// pointer derefs with suitably large offsets
x86->min_mappable_va = 64 * 1024;
- // maximum mappable VA is drived from X86_64_MEMORY_OFFSET in kernel
+ // maximum mappable VA is derived from X86_64_MEMORY_OFFSET in kernel
x86->max_mappable_va = (genvaddr_t)0xffffff8000000000;
return SYS_ERR_OK;