3 * \brief pmap management
5 * x86_64 specific management of page tables
7 * Warning: This code is coupled with the code in slot_alloc/. and pinned.c
9 * The maximum number of slots required to map a BASE_PAGE_SIZE
10 * sized page is the number of page table levels + 1.
11 * The sum for x86_64 is 4.
13 * Warning: Additional slots will be required to map a BASE_PAGE_SIZE size page,
14 * if we also track the actual frames that are mapped.
15 * Currently this is not the case.
19 * Copyright (c) 2009-2013 ETH Zurich.
20 * All rights reserved.
22 * This file is distributed under the terms in the attached LICENSE file.
23 * If you do not find this file, copies can be found by writing to:
24 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
27 #include <barrelfish/barrelfish.h>
28 #include <barrelfish/dispatch.h>
29 #include "target/x86/pmap_x86.h"
32 // Size of virtual region mapped by a single PML4 entry
33 #define PML4_MAPPING_SIZE ((genvaddr_t)512*512*512*BASE_PAGE_SIZE)
35 // Location and size of virtual address space reserved for mapping
36 // frames backing refill_slabs
37 #define META_DATA_RESERVED_BASE (PML4_MAPPING_SIZE * (disp_get_core_id() + 1))
38 #define META_DATA_RESERVED_SIZE (X86_64_BASE_PAGE_SIZE * 20000)
40 // large mapping flags
41 #define FLAGS_LARGE 0x0100
42 #define FLAGS_HUGE 0x0200
45 * \brief Translate generic vregion flags to architecture specific pmap flags
47 static paging_x86_64_flags_t vregion_to_pmap_flag(vregion_flags_t vregion_flags)
49 paging_x86_64_flags_t pmap_flags =
50 PTABLE_USER_SUPERVISOR | PTABLE_EXECUTE_DISABLE;
52 if (!(vregion_flags & VREGION_FLAGS_GUARD)) {
53 if (vregion_flags & VREGION_FLAGS_WRITE) {
54 pmap_flags |= PTABLE_READ_WRITE;
56 if (vregion_flags & VREGION_FLAGS_EXECUTE) {
57 pmap_flags &= ~PTABLE_EXECUTE_DISABLE;
59 if (vregion_flags & VREGION_FLAGS_NOCACHE) {
60 pmap_flags |= PTABLE_CACHE_DISABLED;
67 static inline bool is_same_pdir(genvaddr_t va1, genvaddr_t va2)
69 return (va1>>X86_64_LARGE_PAGE_BITS) == (va2>>X86_64_LARGE_PAGE_BITS);
71 static inline bool is_same_pdpt(genvaddr_t va1, genvaddr_t va2)
73 return (va1>>X86_64_HUGE_PAGE_BITS) == (va2>>X86_64_HUGE_PAGE_BITS);
75 static inline genvaddr_t get_addr_prefix(genvaddr_t va)
77 return va >> X86_64_LARGE_PAGE_BITS;
79 static inline genvaddr_t get_addr_prefix_large(genvaddr_t va)
81 return va >> X86_64_HUGE_PAGE_BITS;
83 static bool has_vnode(struct vnode *root, uint32_t entry, size_t len)
86 assert(root->is_vnode);
89 uint32_t end_entry = entry + len;
91 for (n = root->u.vnode.children; n; n = n->next) {
92 if (n->is_vnode && n->entry == entry) {
96 uint32_t end = n->entry + n->u.frame.pte_count;
97 if (n->entry < entry && end > end_entry) {
100 if (n->entry >= entry && n->entry < end_entry) {
109 * \brief Starting at a given root, return the vnode with starting entry equal to #entry
111 static struct vnode *find_vnode(struct vnode *root, uint32_t entry)
113 assert(root != NULL);
114 assert(root->is_vnode);
117 for(n = root->u.vnode.children; n != NULL; n = n->next) {
118 if(n->entry == entry) {
125 static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
127 assert(root != NULL);
128 assert(root->is_vnode);
132 for (n = root->u.vnode.children; n; n = n->next) {
134 uint16_t end = n->entry + n->u.frame.pte_count;
135 if (n->entry <= entry && entry + npages <= end) {
144 static void remove_vnode(struct vnode *root, struct vnode *item)
146 assert(root->is_vnode);
147 struct vnode *walk = root->u.vnode.children;
148 struct vnode *prev = NULL;
152 prev->next = walk->next;
155 root->u.vnode.children = walk->next;
162 assert(!"Should not get here");
166 * \brief Allocates a new VNode, adding it to the page table and our metadata
168 static errval_t alloc_vnode(struct pmap_x86 *pmap, struct vnode *root,
169 enum objtype type, uint32_t entry,
170 struct vnode **retvnode)
174 struct vnode *newvnode = slab_alloc(&pmap->slab);
175 if (newvnode == NULL) {
176 return LIB_ERR_SLAB_ALLOC_FAIL;
179 // The VNode capability
180 err = pmap->p.slot_alloc->alloc(pmap->p.slot_alloc, &newvnode->u.vnode.cap);
181 if (err_is_fail(err)) {
182 return err_push(err, LIB_ERR_SLOT_ALLOC);
185 err = vnode_create(newvnode->u.vnode.cap, type);
186 if (err_is_fail(err)) {
187 return err_push(err, LIB_ERR_VNODE_CREATE);
191 err = vnode_map(root->u.vnode.cap, newvnode->u.vnode.cap, entry,
192 PTABLE_ACCESS_DEFAULT, 0, 1);
193 if (err_is_fail(err)) {
194 return err_push(err, LIB_ERR_VNODE_MAP);
197 // The VNode meta data
198 newvnode->is_vnode = true;
199 newvnode->entry = entry;
200 newvnode->next = root->u.vnode.children;
201 root->u.vnode.children = newvnode;
202 newvnode->u.vnode.children = NULL;
204 *retvnode = newvnode;
209 * \brief Returns the vnode for the pagetable mapping a given vspace address
211 static errval_t get_ptable(struct pmap_x86 *pmap, genvaddr_t base,
212 struct vnode **ptable)
215 struct vnode *root = &pmap->root;
216 struct vnode *pdpt, *pdir;
217 assert(root != NULL);
220 if((pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
221 err = alloc_vnode(pmap, root, ObjType_VNode_x86_64_pdpt,
222 X86_64_PML4_BASE(base), &pdpt);
223 if (err_is_fail(err)) {
224 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
229 if((pdir = find_vnode(pdpt, X86_64_PDPT_BASE(base))) == NULL) {
230 err = alloc_vnode(pmap, pdpt, ObjType_VNode_x86_64_pdir,
231 X86_64_PDPT_BASE(base), &pdir);
232 if (err_is_fail(err)) {
233 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
238 if((*ptable = find_vnode(pdir, X86_64_PDIR_BASE(base))) == NULL) {
239 err = alloc_vnode(pmap, pdir, ObjType_VNode_x86_64_ptable,
240 X86_64_PDIR_BASE(base), ptable);
241 if (err_is_fail(err)) {
242 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
250 * \brief Returns the vnode for the pagedirectory (for large pages) mapping a given vspace address
253 static errval_t get_pdir(struct pmap_x86 *pmap, genvaddr_t base,
257 struct vnode *root = &pmap->root;
259 assert(root != NULL);
262 if((pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
263 err = alloc_vnode(pmap, root, ObjType_VNode_x86_64_pdpt,
264 X86_64_PML4_BASE(base), &pdpt);
265 if (err_is_fail(err)) {
266 printf("failure mapping pml4\n");
267 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
272 if((*pdir = find_vnode(pdpt, X86_64_PDPT_BASE(base))) == NULL) {
273 err = alloc_vnode(pmap, pdpt, ObjType_VNode_x86_64_pdir,
274 X86_64_PDPT_BASE(base), pdir);
275 if (err_is_fail(err)) {
276 printf("failure mapping pdpt\n");
277 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
285 * \brief Returns the vnode for the pdpt (for huge pages) mapping a given vspace address
288 static errval_t get_pdpt(struct pmap_x86 *pmap, genvaddr_t base,
292 struct vnode *root = &pmap->root;
293 assert(root != NULL);
296 if((*pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
297 err = alloc_vnode(pmap, root, ObjType_VNode_x86_64_pdpt,
298 X86_64_PML4_BASE(base), pdpt);
299 if (err_is_fail(err)) {
300 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
309 * \brief Returns the vnode for the pagetable mapping a given vspace address,
310 * without performing allocations as get_ptable() does
312 static struct vnode *find_ptable(struct pmap_x86 *pmap, genvaddr_t base)
314 struct vnode *root = &pmap->root;
315 struct vnode *pdpt, *pdir;
316 assert(root != NULL);
319 if((pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
324 if((pdir = find_vnode(pdpt, X86_64_PDPT_BASE(base))) == NULL) {
329 return find_vnode(pdir, X86_64_PDIR_BASE(base));
332 * \brief Returns the vnode for the page directory mapping a given vspace address,
333 * without performing allocations as get_pdir() does
335 static struct vnode *find_pdir(struct pmap_x86 *pmap, genvaddr_t base)
337 struct vnode *root = &pmap->root;
339 assert(root != NULL);
342 if((pdpt = find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
347 return find_vnode(pdpt, X86_64_PDPT_BASE(base));
350 * \brief Returns the vnode for the page directory pointer table mapping
351 * for a given vspace address
353 static struct vnode *find_pdpt(struct pmap_x86 *pmap, genvaddr_t base)
355 struct vnode *root = &pmap->root;
356 assert(root != NULL);
359 return find_vnode(root, X86_64_PML4_BASE(base));
363 static errval_t do_single_map(struct pmap_x86 *pmap, genvaddr_t vaddr, genvaddr_t vend,
364 struct capref frame, size_t offset, size_t pte_count,
365 vregion_flags_t flags)
368 paging_x86_64_flags_t pmap_flags = vregion_to_pmap_flag(flags & ~(FLAGS_LARGE | FLAGS_HUGE));
370 // Get the paging structure and set paging relevant parameters
371 struct vnode *ptable;
375 // get the right paging table and address part
376 if(flags&FLAGS_LARGE) {
377 //large 2M pages, mapped into pdir
378 err = get_pdir(pmap, vaddr, &ptable);
379 table_base = X86_64_PDIR_BASE(vaddr);
380 } else if (flags&FLAGS_HUGE) {
381 //huge 1GB pages, mapped into pdpt
382 err = get_pdpt(pmap, vaddr, &ptable);
383 table_base = X86_64_PDPT_BASE(vaddr);
385 //normal 4K pages, mapped into ptable
386 err = get_ptable(pmap, vaddr, &ptable);
387 table_base = X86_64_PTABLE_BASE(vaddr);
390 if (err_is_fail(err)) {
391 return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
394 // check if there is an overlapping mapping
395 if (has_vnode(ptable, table_base, pte_count)) {
396 printf("page already exists in 0x%"PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
397 return LIB_ERR_PMAP_EXISTING_MAPPING;
400 // setup userspace mapping
401 struct vnode *page = slab_alloc(&pmap->slab);
403 page->is_vnode = false;
404 page->entry = table_base;
405 page->next = ptable->u.vnode.children;
406 ptable->u.vnode.children = page;
407 page->u.frame.cap = frame;
408 page->u.frame.offset = offset;
409 page->u.frame.flags = flags;
410 page->u.frame.pte_count = pte_count;
413 err = vnode_map(ptable->u.vnode.cap, frame, table_base,
414 pmap_flags, offset, pte_count);
415 if (err_is_fail(err)) {
416 return err_push(err, LIB_ERR_VNODE_MAP);
423 * \brief Called when enough slabs exist for the given mapping
425 static errval_t do_map(struct pmap_x86 *pmap, genvaddr_t vaddr,
426 struct capref frame, size_t offset, size_t size,
427 vregion_flags_t flags, size_t *retoff, size_t *retsize)
431 // determine page size and relevant address part
434 if (flags&FLAGS_LARGE) {
435 // large page branch (2MB)
436 page_size = X86_64_LARGE_PAGE_SIZE;
437 table_base = X86_64_PDIR_BASE(vaddr);
438 } else if (flags&FLAGS_HUGE) {
439 // huge page branch (1GB)
440 page_size = X86_64_HUGE_PAGE_SIZE;
441 table_base = X86_64_PDPT_BASE(vaddr);
443 // normal branch (4KB)
444 page_size = X86_64_BASE_PAGE_SIZE;
445 table_base = X86_64_PTABLE_BASE(vaddr);
448 // round to the next full page
449 size = ROUND_UP(size, page_size);
450 size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
451 genvaddr_t vend = vaddr + size;
454 struct frame_identity fi;
455 invoke_frame_identify(frame, &fi);
456 genpaddr_t paddr = fi.base + offset;
458 debug_printf("do_map: 0x%"
459 PRIxGENVADDR"--0x%"PRIxGENVADDR" -> 0x%"PRIxGENPADDR
460 "; pte_count = %zd; frame bits = %zd\n", vaddr, vend, paddr,
461 pte_count, (size_t)fi.bits);
464 // all mapping on one leaf table?
465 // trivially true for huge pages
466 if (is_same_pdir(vaddr, vend) ||
467 (is_same_pdpt(vaddr, vend) && flags&FLAGS_LARGE) ||
470 //debug_printf(" do_map: fast path: %zd\n", pte_count);
471 err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
472 if (err_is_fail(err)) {
473 return err_push(err, LIB_ERR_PMAP_DO_MAP);
476 else { // multiple leaf page tables
478 uint32_t c = X86_64_PTABLE_SIZE - table_base;
479 //debug_printf(" do_map: slow path: first leaf %"PRIu32"\n", c);
480 genvaddr_t temp_end = vaddr + c * page_size;
481 err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
482 if (err_is_fail(err)) {
483 return err_push(err, LIB_ERR_PMAP_DO_MAP);
486 // XXX: huge pages not yet supportet on the long path
487 // but determine_addr_raw gives us empty pdpt entries,
488 // so should do it one address at a time anyways
490 // this if/else could be pushed up to the other page specific switch
491 // for performance reasons, kept here so far for readability
493 if (flags&FLAGS_LARGE) {
494 addr_prefix = (get_addr_prefix_large(temp_end) < get_addr_prefix_large(vend));
496 addr_prefix = (get_addr_prefix(temp_end) < get_addr_prefix(vend));
499 while (addr_prefix) {
502 temp_end = vaddr + X86_64_PTABLE_SIZE * page_size;
503 offset += c * page_size;
504 c = X86_64_PTABLE_SIZE;
507 err = slot_alloc(&next);
508 if (err_is_fail(err)) {
509 return err_push(err, LIB_ERR_PMAP_DO_MAP);
511 err = cap_copy(next, frame);
512 if (err_is_fail(err)) {
513 return err_push(err, LIB_ERR_PMAP_DO_MAP);
518 //debug_printf(" do_map: slow path: full leaf %d\n", X86_64_PTABLE_SIZE);
519 err = do_single_map(pmap, vaddr, temp_end, frame, offset, X86_64_PTABLE_SIZE, flags);
520 if (err_is_fail(err)) {
521 return err_push(err, LIB_ERR_PMAP_DO_MAP);
524 // update loop condition
525 if (flags&FLAGS_LARGE) {
526 addr_prefix = (get_addr_prefix_large(temp_end) < get_addr_prefix_large(vend));
528 addr_prefix = (get_addr_prefix(temp_end) < get_addr_prefix(vend));
532 // map remaining part
533 offset += c * page_size;
535 if (flags&FLAGS_LARGE) {
537 c = X86_64_PDIR_BASE(vend) - X86_64_PDIR_BASE(temp_end);
540 c = X86_64_PTABLE_BASE(vend) - X86_64_PTABLE_BASE(temp_end);
547 err = slot_alloc(&next);
548 if (err_is_fail(err)) {
549 return err_push(err, LIB_ERR_PMAP_DO_MAP);
551 err = cap_copy(next, frame);
552 if (err_is_fail(err)) {
553 return err_push(err, LIB_ERR_PMAP_DO_MAP);
557 //debug_printf("do_map: slow path: last leaf %"PRIu32"\n", c);
558 err = do_single_map(pmap, temp_end, vend, next, offset, c, flags);
559 if (err_is_fail(err)) {
560 return err_push(err, LIB_ERR_PMAP_DO_MAP);
574 /// Computer upper limit on number of slabs required to perform a mapping
575 static size_t max_slabs_for_mapping(size_t bytes)
577 size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_BASE_PAGE_SIZE);
578 size_t max_ptable = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
579 size_t max_pdir = DIVIDE_ROUND_UP(max_ptable, X86_64_PTABLE_SIZE);
580 size_t max_pdpt = DIVIDE_ROUND_UP(max_pdir, X86_64_PTABLE_SIZE);
581 return max_pages + max_ptable + max_pdir + max_pdpt;
584 static size_t max_slabs_for_mapping_large(size_t bytes)
586 size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_LARGE_PAGE_SIZE);
587 size_t max_pdir = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
588 size_t max_pdpt = DIVIDE_ROUND_UP(max_pdir, X86_64_PTABLE_SIZE);
589 return max_pages + max_pdir + max_pdpt;
592 static size_t max_slabs_for_mapping_huge(size_t bytes)
594 size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_HUGE_PAGE_SIZE);
595 size_t max_pdpt = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
596 return max_pages + max_pdpt;
600 * \brief Refill slabs used for metadata
602 * \param pmap The pmap to refill in
603 * \param request The number of slabs the allocator must have
604 * when the function returns
606 * When the current pmap is initialized,
607 * it reserves some virtual address space for metadata.
608 * This reserved address space is used here
610 * Can only be called for the current pmap
611 * Will recursively call into itself till it has enough slabs
613 static errval_t refill_slabs(struct pmap_x86 *pmap, size_t request)
617 /* Keep looping till we have #request slabs */
618 while (slab_freecount(&pmap->slab) < request) {
619 // Amount of bytes required for #request
620 size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab),
621 sizeof(struct vnode));
623 /* Get a frame of that size */
625 err = frame_alloc(&cap, bytes, &bytes);
626 if (err_is_fail(err)) {
627 return err_push(err, LIB_ERR_FRAME_ALLOC);
630 /* If we do not have enough slabs to map the frame in, recurse */
631 size_t required_slabs_for_frame = max_slabs_for_mapping(bytes);
632 if (slab_freecount(&pmap->slab) < required_slabs_for_frame) {
633 // If we recurse, we require more slabs than to map a single page
634 assert(required_slabs_for_frame > 4);
636 err = refill_slabs(pmap, required_slabs_for_frame);
637 if (err_is_fail(err)) {
638 return err_push(err, LIB_ERR_SLAB_REFILL);
642 /* Perform mapping */
643 genvaddr_t genvaddr = pmap->vregion_offset;
644 pmap->vregion_offset += (genvaddr_t)bytes;
645 assert(pmap->vregion_offset < vregion_get_base_addr(&pmap->vregion) +
646 vregion_get_size(&pmap->vregion));
648 err = do_map(pmap, genvaddr, cap, 0, bytes,
649 VREGION_FLAGS_READ_WRITE, NULL, NULL);
650 if (err_is_fail(err)) {
651 return err_push(err, LIB_ERR_PMAP_DO_MAP);
655 lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
656 slab_grow(&pmap->slab, (void*)buf, bytes);
662 /// Minimally refill the slab allocator
663 static errval_t min_refill_slabs(struct pmap_x86 *pmap)
665 return refill_slabs(pmap, 5);
669 * \brief Create page mappings
671 * \param pmap The pmap object
672 * \param vaddr The virtual address to create the mapping for
673 * \param frame The frame cap to map in
674 * \param offset Offset into the frame cap
675 * \param size Size of the mapping
676 * \param flags Flags for the mapping
677 * \param retoff If non-NULL, filled in with adjusted offset of mapped region
678 * \param retsize If non-NULL, filled in with adjusted size of mapped region
680 static errval_t map(struct pmap *pmap, genvaddr_t vaddr, struct capref frame,
681 size_t offset, size_t size, vregion_flags_t flags,
682 size_t *retoff, size_t *retsize)
685 struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
688 // Adjust the parameters to page boundaries
689 if (flags&FLAGS_LARGE) {
690 //case large pages (2MB)
691 size += LARGE_PAGE_OFFSET(offset);
692 size = ROUND_UP(size, LARGE_PAGE_SIZE);
693 offset -= LARGE_PAGE_OFFSET(offset);
694 max_slabs = max_slabs_for_mapping_large(size);
695 } else if (flags&FLAGS_HUGE) {
696 // case huge pages (1GB)
697 size += HUGE_PAGE_OFFSET(offset);
698 size = ROUND_UP(size, HUGE_PAGE_SIZE);
699 offset -= HUGE_PAGE_OFFSET(offset);
700 max_slabs = max_slabs_for_mapping_huge(size);
702 //case normal pages (4KB)
703 size += BASE_PAGE_OFFSET(offset);
704 size = ROUND_UP(size, BASE_PAGE_SIZE);
705 offset -= BASE_PAGE_OFFSET(offset);
706 max_slabs = max_slabs_for_mapping(size);
710 // Refill slab allocator if necessary
711 size_t slabs_free = slab_freecount(&x86->slab);
713 max_slabs += 5; // minimum amount required to map a page
714 if (slabs_free < max_slabs) {
715 struct pmap *mypmap = get_current_pmap();
716 if (pmap == mypmap) {
717 err = refill_slabs(x86, max_slabs);
718 if (err_is_fail(err)) {
719 return err_push(err, LIB_ERR_SLAB_REFILL);
722 size_t bytes = SLAB_STATIC_SIZE(max_slabs - slabs_free,
723 sizeof(struct vnode));
724 void *buf = malloc(bytes);
726 return LIB_ERR_MALLOC_FAIL;
728 slab_grow(&x86->slab, buf, bytes);
732 err = do_map(x86, vaddr, frame, offset, size, flags, retoff, retsize);
738 static errval_t do_single_unmap(struct pmap_x86 *pmap, genvaddr_t vaddr, size_t pte_count, bool delete_cap)
741 bool large_flag = false;
742 bool huge_flag = false;
743 struct vnode *lpage = NULL;
744 //determine if we unmap a large page, find the page
745 if ((lpage = find_pdpt(pmap, vaddr)) != NULL) {
746 if ((lpage = find_vnode(lpage, X86_64_PML4_BASE(vaddr))) != NULL) {
747 huge_flag = lpage->u.frame.flags&FLAGS_HUGE;
750 if ((lpage = find_pdir(pmap, vaddr)) != NULL && !huge_flag) {
751 if ((lpage = find_vnode(lpage, X86_64_PDPT_BASE(vaddr))) != NULL) {
752 large_flag = lpage->u.frame.flags&FLAGS_LARGE;
756 struct vnode *pt = find_ptable(pmap, vaddr);
759 pt = find_pdir(pmap, vaddr);
760 } else if (huge_flag) {
761 pt = find_pdpt(pmap, vaddr);
767 page = find_vnode(pt, X86_64_PDIR_BASE(vaddr));
768 } else if (huge_flag) {
769 page = find_vnode(pt, X86_64_PML4_BASE(vaddr));
771 page = find_vnode(pt, X86_64_PTABLE_BASE(vaddr));
774 if (page && page->u.frame.pte_count == pte_count) {
775 err = vnode_unmap(pt->u.vnode.cap, page->u.frame.cap, page->entry, page->u.frame.pte_count);
776 if (err_is_fail(err)) {
777 printf("vnode_unmap returned error: %s (%d)\n", err_getstring(err), err_no(err));
778 return err_push(err, LIB_ERR_VNODE_UNMAP);
781 // Free up the resources
783 err = cap_destroy(page->u.frame.cap);
784 if (err_is_fail(err)) {
785 printf("delete_cap\n");
786 return err_push(err, LIB_ERR_PMAP_DO_SINGLE_UNMAP);
789 remove_vnode(pt, page);
790 slab_free(&pmap->slab, page);
793 printf("else, pmap find\n");
794 return LIB_ERR_PMAP_FIND_VNODE;
802 * \brief Remove page mappings
804 * \param pmap The pmap object
805 * \param vaddr The start of the virtual addres to remove
806 * \param size The size of virtual address to remove
807 * \param retsize If non-NULL, filled in with the actual size removed
809 static errval_t unmap(struct pmap *pmap, genvaddr_t vaddr, size_t size,
812 //printf("[unmap] 0x%"PRIxGENVADDR", %zu\n", vaddr, size);
813 errval_t err, ret = SYS_ERR_OK;
814 struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
816 //determine if we unmap a larger page
817 struct vnode* page = NULL;
818 bool large_flag = false;
819 bool huge_flag = false;
820 //find table, then entry
821 if ((page = find_pdpt(x86, vaddr)) != NULL) {
822 if ((page = find_vnode(page, X86_64_PML4_BASE(vaddr))) != NULL) {
823 huge_flag = page->u.frame.flags&FLAGS_HUGE;
826 if ((page = find_pdir(x86, vaddr)) != NULL && !huge_flag) {
827 if ((page = find_vnode(page, X86_64_PDPT_BASE(vaddr))) != NULL) {
828 large_flag = page->u.frame.flags&FLAGS_LARGE;
831 size_t page_size = X86_64_BASE_PAGE_SIZE;
832 size_t table_base = X86_64_PTABLE_BASE(vaddr);
835 page_size = X86_64_LARGE_PAGE_SIZE;
836 table_base = X86_64_PDIR_BASE(vaddr);
837 } else if (huge_flag) {
839 page_size = X86_64_HUGE_PAGE_SIZE;
840 table_base = X86_64_PDPT_BASE(vaddr);
843 size = ROUND_UP(size, page_size);
844 genvaddr_t vend = vaddr + size;
846 if (is_same_pdir(vaddr, vend) || (is_same_pdpt(vaddr, vend)&&large_flag)
849 err = do_single_unmap(x86, vaddr, size / page_size, false);
850 if (err_is_fail(err)) {
851 printf("error fast path\n");
852 return err_push(err, LIB_ERR_PMAP_UNMAP);
857 uint32_t c = X86_64_PTABLE_SIZE - table_base;
859 err = do_single_unmap(x86, vaddr, c, false);
860 if (err_is_fail(err)) {
861 printf("error first leaf\n");
862 return err_push(err, LIB_ERR_PMAP_UNMAP);
866 vaddr += c * page_size;
868 // this if/else could be combined with the one above for performance reasons,
869 // kept here for readability
871 addr_prefix = (get_addr_prefix_large(vaddr) < get_addr_prefix_large(vend));
873 addr_prefix = (get_addr_prefix(vaddr) < get_addr_prefix(vend));
875 while (addr_prefix) {
876 c = X86_64_PTABLE_SIZE;
877 err = do_single_unmap(x86, vaddr, X86_64_PTABLE_SIZE, true);
878 if (err_is_fail(err)) {
879 printf("error while loop\n");
880 return err_push(err, LIB_ERR_PMAP_UNMAP);
882 vaddr += c * page_size;
886 addr_prefix = (get_addr_prefix_large(vaddr) < get_addr_prefix_large(vend));
888 addr_prefix = (get_addr_prefix(vaddr) < get_addr_prefix(vend));
892 // unmap remaining part
893 c = X86_64_PTABLE_BASE(vend) - X86_64_PTABLE_BASE(vaddr);
896 c = X86_64_PDIR_BASE(vend) - X86_64_PDIR_BASE(vaddr);
897 } else if (huge_flag) {
898 c = X86_64_PDPT_BASE(vend) - X86_64_PDPT_BASE(vaddr);
901 err = do_single_unmap(x86, vaddr, c, true);
902 if (err_is_fail(err)) {
903 printf("error remaining part\n");
904 return err_push(err, LIB_ERR_PMAP_UNMAP);
913 //printf("[unmap] exiting\n");
917 static errval_t do_single_modify_flags(struct pmap_x86 *pmap, genvaddr_t vaddr,
918 size_t pages, vregion_flags_t flags)
920 errval_t err = SYS_ERR_OK;
921 struct vnode *ptable = find_ptable(pmap, vaddr);
922 uint16_t ptentry = X86_64_PTABLE_BASE(vaddr);
924 struct vnode *page = find_vnode(ptable, ptentry);
926 if (inside_region(ptable, ptentry, pages)) {
927 // we're modifying part of a valid mapped region
928 // arguments to invocation: invoke frame cap, first affected
929 // page (as offset from first page in mapping), #affected
930 // pages, new flags. Invocation should check compatibility of
931 // new set of flags with cap permissions.
932 size_t off = ptentry - page->entry;
933 paging_x86_64_flags_t pmap_flags = vregion_to_pmap_flag(flags);
934 err = invoke_frame_modify_flags(page->u.frame.cap, off, pages, pmap_flags);
935 printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n",
936 err_getstring(err), err);
939 // overlaps some region border
940 return LIB_ERR_PMAP_EXISTING_MAPPING;
949 * \brief Modify page mapping
951 * \param pmap The pmap object
952 * \param vaddr The virtual address to unmap
953 * \param flags New flags for the mapping
954 * \param retsize If non-NULL, filled in with the actual size modified
956 static errval_t modify_flags(struct pmap *pmap, genvaddr_t vaddr, size_t size,
957 vregion_flags_t flags, size_t *retsize)
960 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
961 size = ROUND_UP(size, X86_64_BASE_PAGE_SIZE);
962 size_t pages = size / X86_64_BASE_PAGE_SIZE;
963 genvaddr_t vend = vaddr + size;
965 // vaddr and vend specify begin and end of the region (inside a mapping)
966 // that should receive the new set of flags
968 if (is_same_pdir(vaddr, vend)) {
970 err = do_single_modify_flags(x86, vaddr, pages, flags);
971 if (err_is_fail(err)) {
972 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
977 uint32_t c = X86_64_PTABLE_SIZE - X86_64_PTABLE_BASE(vaddr);
978 err = do_single_modify_flags(x86, vaddr, c, flags);
979 if (err_is_fail(err)) {
980 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
983 // modify full leaves
984 vaddr += c * X86_64_BASE_PAGE_SIZE;
985 while (get_addr_prefix(vaddr) < get_addr_prefix(vend)) {
986 c = X86_64_PTABLE_SIZE;
987 err = do_single_modify_flags(x86, vaddr, X86_64_PTABLE_SIZE, flags);
988 if (err_is_fail(err)) {
989 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
991 vaddr += c * X86_64_BASE_PAGE_SIZE;
994 // modify remaining part
995 c = X86_64_PTABLE_BASE(vend) - X86_64_PTABLE_BASE(vaddr);
997 err = do_single_modify_flags(x86, vaddr, c, flags);
998 if (err_is_fail(err)) {
999 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
1008 //printf("[modify_flags] exiting\n");
1013 * \brief Query existing page mapping
1015 * \param pmap The pmap object
1016 * \param vaddr The virtual address to query
1017 * \param retvaddr Returns the base virtual address of the mapping
1018 * \param retsize Returns the actual size of the mapping
1019 * \param retcap Returns the cap mapped at this address
1020 * \param retoffset Returns the offset within the cap that is mapped
1021 * \param retflags Returns the flags for this mapping
1023 * All of the ret parameters are optional.
1025 static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
1026 genvaddr_t *retvaddr, size_t *retsize,
1027 struct capref *retcap, genvaddr_t *retoffset,
1028 vregion_flags_t *retflags)
1030 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
1032 uint32_t base = X86_64_PTABLE_BASE(vaddr);
1033 // Find the page table
1034 struct vnode *ptable = find_ptable(x86, vaddr);
1035 if (ptable == NULL) {
1037 ptable = find_pdir(x86, vaddr);
1038 if (ptable == NULL) {
1039 return LIB_ERR_PMAP_FIND_VNODE;
1041 base = X86_64_PDIR_BASE(vaddr);
1045 struct vnode *vn = find_vnode(ptable, base);
1047 return LIB_ERR_PMAP_FIND_VNODE;
1051 *retvaddr = vaddr & ~(genvaddr_t)BASE_PAGE_MASK;
1055 *retsize = BASE_PAGE_SIZE;
1059 *retcap = vn->u.frame.cap;
1063 *retoffset = vn->u.frame.offset;
1067 *retflags = vn->u.frame.flags;
1075 static errval_t dump(struct pmap *pmap, struct pmap_dump_info *buf, size_t buflen, size_t *items_written)
1077 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
1078 struct pmap_dump_info *buf_ = buf;
1080 struct vnode *pml4 = &x86->root;
1081 struct vnode *pdpt, *pdir, *pt, *frame;
1082 assert(pml4 != NULL);
1086 // iterate over PML4 entries
1087 size_t pml4_index, pdpt_index, pdir_index;
1088 for (pdpt = pml4->u.vnode.children; pdpt != NULL; pdpt = pdpt->next) {
1089 pml4_index = pdpt->entry;
1090 // iterate over pdpt entries
1091 for (pdir = pdpt->u.vnode.children; pdir != NULL; pdir = pdir->next) {
1092 pdpt_index = pdir->entry;
1093 // iterate over pdir entries
1094 for (pt = pdir->u.vnode.children; pt != NULL; pt = pt->next) {
1095 pdir_index = pt->entry;
1096 // iterate over pt entries
1097 for (frame = pt->u.vnode.children; frame != NULL; frame = frame->next) {
1098 if (*items_written < buflen) {
1099 buf_->pml4_index = pml4_index;
1100 buf_->pdpt_index = pdpt_index;
1101 buf_->pdir_index = pdir_index;
1102 buf_->pt_index = frame->entry;
1103 buf_->cap = frame->u.frame.cap;
1104 buf_->offset = frame->u.frame.offset;
1105 buf_->flags = frame->u.frame.flags;
1116 static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
1117 size_t alignment, genvaddr_t *retvaddr)
1119 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
1121 struct vnode *walk_pml4 = x86->root.u.vnode.children;
1122 assert(walk_pml4 != NULL); // assume there's always at least one existing entry
1124 if (alignment == 0) {
1125 alignment = BASE_PAGE_SIZE;
1127 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1129 size = ROUND_UP(size, alignment);
1130 assert(size < 1ul * 1024 * 1024 * 1024); // pml4 size
1132 // try to find free pml4 entry
1134 for (int i = 0; i < 512; i++) {
1137 //debug_printf("entry: %d\n", walk_pml4->entry);
1138 f[walk_pml4->entry] = false;
1140 //debug_printf("looping over pml4 entries\n");
1141 assert(walk_pml4->is_vnode);
1142 f[walk_pml4->entry] = false;
1143 walk_pml4 = walk_pml4->next;
1145 genvaddr_t first_free = 16;
1146 for (; first_free < 512; first_free++) {
1147 //debug_printf("f[%"PRIuGENVADDR"] = %d\n", first_free, f[first_free]);
1148 if (f[first_free]) {
1152 //debug_printf("first_free: %"PRIuGENVADDR"\n", first_free);
1153 if (first_free < 512) {
1154 //debug_printf("first_free: %"PRIuGENVADDR"\n", first_free);
1155 *retvaddr = first_free << 39;
1158 return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
1162 static struct pmap_funcs pmap_funcs = {
1163 .determine_addr = pmap_x86_determine_addr,
1164 .determine_addr_raw = determine_addr_raw,
1168 .modify_flags = modify_flags,
1169 .serialise = pmap_x86_serialise,
1170 .deserialise = pmap_x86_deserialise,
1175 * \brief Initialize a x86 pmap object
1177 * \param pmap Pmap object of type x86
1179 errval_t pmap_x86_64_init(struct pmap *pmap, struct vspace *vspace,
1180 struct capref vnode,
1181 struct slot_allocator *opt_slot_alloc)
1183 struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
1185 /* Generic portion */
1186 pmap->f = pmap_funcs;
1187 pmap->vspace = vspace;
1189 if (opt_slot_alloc != NULL) {
1190 pmap->slot_alloc = opt_slot_alloc;
1191 } else { /* use default allocator for this dispatcher */
1192 pmap->slot_alloc = get_default_slot_allocator();
1195 /* x86 specific portion */
1196 slab_init(&x86->slab, sizeof(struct vnode), NULL);
1197 slab_grow(&x86->slab, x86->slab_buffer,
1198 sizeof(x86->slab_buffer));
1199 x86->refill_slabs = min_refill_slabs;
1201 x86->root.is_vnode = true;
1202 x86->root.u.vnode.cap = vnode;
1203 x86->root.u.vnode.children = NULL;
1204 x86->root.next = NULL;
1206 // choose a minimum mappable VA for most domains; enough to catch NULL
1207 // pointer derefs with suitably large offsets
1208 x86->min_mappable_va = 64 * 1024;
1210 // maximum mappable VA is drived from X86_64_MEMORY_OFFSET in kernel
1211 x86->max_mappable_va = (genvaddr_t)0xffffff8000000000;
1217 * \brief Initialize the current pmap. Reserve space for metadata
1219 * This code is coupled with #vspace_current_init()
1221 errval_t pmap_x86_64_current_init(bool init_domain)
1223 struct pmap_x86 *x86 = (struct pmap_x86*)get_current_pmap();
1225 // To reserve a block of virtual address space,
1226 // a vregion representing the address space is required.
1227 // We construct a superficial one here and add it to the vregion list.
1228 struct vregion *vregion = &x86->vregion;
1229 vregion->vspace = NULL;
1230 vregion->memobj = NULL;
1231 vregion->base = META_DATA_RESERVED_BASE;
1232 vregion->offset = 0;
1233 vregion->size = META_DATA_RESERVED_SIZE;
1235 vregion->next = NULL;
1237 struct vspace *vspace = x86->p.vspace;
1238 assert(!vspace->head);
1239 vspace->head = vregion;
1241 x86->vregion_offset = x86->vregion.base;
1243 // We don't know the vnode layout for the first part of our address space
1244 // (which was setup by the kernel), so we avoid mapping there until told it.
1245 x86->min_mappable_va = META_DATA_RESERVED_BASE;