3 * \brief pmap management
5 * x86_64 specific management of page tables
7 * Warning: This code is coupled with the code in slot_alloc/. and pinned.c
9 * The maximum number of slots required to map a BASE_PAGE_SIZE
10 * sized page is the number of page table levels + 1.
11 * The sum for x86_64 is 4.
13 * Warning: Additional slots will be required to map a BASE_PAGE_SIZE size page,
14 * if we also track the actual frames that are mapped.
15 * Currently this is not the case.
19 * Copyright (c) 2009-2013 ETH Zurich.
20 * Copyright (c) 2014 HP Labs.
21 * All rights reserved.
23 * This file is distributed under the terms in the attached LICENSE file.
24 * If you do not find this file, copies can be found by writing to:
25 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
28 #include <barrelfish/barrelfish.h>
29 #include <barrelfish/dispatch.h>
30 #include "target/x86/pmap_x86.h"
32 #include <barrelfish/cap_predicates.h>
33 #include <pmap_priv.h>
34 #include <pmap_ds.h> // pull in selected pmap datastructure implementation
37 #include <trace/trace.h>
38 #include <trace_definitions/trace_defs.h>
41 * \brief Translate generic vregion flags to architecture specific pmap flags
43 static paging_x86_64_flags_t vregion_to_pmap_flag(vregion_flags_t vregion_flags)
45 paging_x86_64_flags_t pmap_flags =
46 PTABLE_USER_SUPERVISOR | PTABLE_EXECUTE_DISABLE;
48 if (!(vregion_flags & VREGION_FLAGS_GUARD)) {
49 if (vregion_flags & VREGION_FLAGS_WRITE) {
50 pmap_flags |= PTABLE_READ_WRITE;
52 if (vregion_flags & VREGION_FLAGS_EXECUTE) {
53 pmap_flags &= ~PTABLE_EXECUTE_DISABLE;
55 if (vregion_flags & VREGION_FLAGS_NOCACHE) {
56 pmap_flags |= PTABLE_CACHE_DISABLED;
58 else if (vregion_flags & VREGION_FLAGS_WRITE_COMBINING) {
59 // PA4 is configured as write-combining
60 pmap_flags |= PTABLE_ATTR_INDEX;
67 // returns whether va1 and va2 share a page directory entry
68 // not using X86_64_PDIR_BASE() macro as this would give false positives (same
69 // entry in different directories)
70 static inline bool is_same_pdir(genvaddr_t va1, genvaddr_t va2)
72 return (va1>>X86_64_LARGE_PAGE_BITS) == ((va2-1)>>X86_64_LARGE_PAGE_BITS);
74 // returns whether va1 and va2 share a page directory pointer table entry
75 static inline bool is_same_pdpt(genvaddr_t va1, genvaddr_t va2)
77 return (va1>>X86_64_HUGE_PAGE_BITS) == ((va2-1)>>X86_64_HUGE_PAGE_BITS);
79 // returns whether va1 and va2 share a page map level 4 entry
80 static inline bool is_same_pml4(genvaddr_t va1, genvaddr_t va2)
82 // the base macros work here as we only have one pml4.
83 return X86_64_PML4_BASE(va1) == X86_64_PML4_BASE(va2-1);
85 // size indicates how many bits to shift
86 static inline genvaddr_t get_addr_prefix(genvaddr_t va, uint8_t size)
91 static inline bool is_large_page(struct vnode *p)
93 return !p->v.is_vnode && p->v.u.frame.flags & VREGION_FLAGS_LARGE;
96 static inline bool is_huge_page(struct vnode *p)
98 return !p->v.is_vnode && p->v.u.frame.flags & VREGION_FLAGS_HUGE;
102 * \brief Returns the vnode for the pdpt mapping a given vspace address
104 errval_t get_pdpt(struct pmap_x86 *pmap, genvaddr_t base,
105 struct vnode **pdpt);
106 errval_t get_pdpt(struct pmap_x86 *pmap, genvaddr_t base,
110 struct vnode *root = &pmap->root;
111 assert(root != NULL);
114 if((*pdpt = pmap_find_vnode(root, X86_64_PML4_BASE(base))) == NULL) {
115 enum objtype type = type_is_ept(pmap->root.v.type) ?
116 ObjType_VNode_x86_64_ept_pdpt :
117 ObjType_VNode_x86_64_pdpt;
118 err = alloc_vnode(pmap, root, type, X86_64_PML4_BASE(base),
120 errval_t expected_concurrent = err_push(SYS_ERR_VNODE_SLOT_INUSE, LIB_ERR_VNODE_MAP);
121 if (err == expected_concurrent) {
122 if ((*pdpt = pmap_find_vnode(root, X86_64_PML4_BASE(base))) != NULL) {
126 if (err_is_fail(err)) {
127 DEBUG_ERR(err, "alloc_vnode for pdpt");
128 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
136 * \brief Returns the vnode for the page directory mapping a given vspace
139 errval_t get_pdir(struct pmap_x86 *pmap, genvaddr_t base,
140 struct vnode **pdir);
141 errval_t get_pdir(struct pmap_x86 *pmap, genvaddr_t base,
146 err = get_pdpt(pmap, base, &pdpt);
147 if (err_is_fail(err)) {
150 assert(pdpt != NULL);
153 if((*pdir = pmap_find_vnode(pdpt, X86_64_PDPT_BASE(base))) == NULL) {
154 enum objtype type = type_is_ept(pmap->root.v.type) ?
155 ObjType_VNode_x86_64_ept_pdir :
156 ObjType_VNode_x86_64_pdir;
157 err = alloc_vnode(pmap, pdpt, type,
158 X86_64_PDPT_BASE(base), pdir, base);
159 errval_t expected_concurrent = err_push(SYS_ERR_VNODE_SLOT_INUSE, LIB_ERR_VNODE_MAP);
160 if (err == expected_concurrent) {
161 if ((*pdir = pmap_find_vnode(pdpt, X86_64_PDPT_BASE(base))) != NULL) {
165 if (err_is_fail(err)) {
166 DEBUG_ERR(err, "alloc_vnode for pdir");
167 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
175 * \brief Returns the vnode for the pagetable mapping a given vspace address
177 errval_t get_ptable(struct pmap_x86 *pmap, genvaddr_t base,
178 struct vnode **ptable);
179 errval_t get_ptable(struct pmap_x86 *pmap, genvaddr_t base,
180 struct vnode **ptable)
184 err = get_pdir(pmap, base, &pdir);
185 if (err_is_fail(err)) {
188 assert(pdir != NULL);
191 if((*ptable = pmap_find_vnode(pdir, X86_64_PDIR_BASE(base))) == NULL) {
192 enum objtype type = type_is_ept(pmap->root.v.type) ?
193 ObjType_VNode_x86_64_ept_ptable :
194 ObjType_VNode_x86_64_ptable;
195 err = alloc_vnode(pmap, pdir, type,
196 X86_64_PDIR_BASE(base), ptable, base);
197 errval_t expected_concurrent = err_push(SYS_ERR_VNODE_SLOT_INUSE, LIB_ERR_VNODE_MAP);
198 if (err == expected_concurrent) {
199 if ((*ptable = pmap_find_vnode(pdir, X86_64_PDIR_BASE(base))) != NULL) {
203 if (err_is_fail(err)) {
204 DEBUG_ERR(err, "alloc_vnode for ptable");
205 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
213 * \brief Returns the vnode for the page directory pointer table mapping for a
214 * given vspace address
216 static inline struct vnode *find_pdpt(struct pmap_x86 *pmap, genvaddr_t base)
218 struct vnode *root = &pmap->root;
219 assert(root != NULL);
222 return pmap_find_vnode(root, X86_64_PML4_BASE(base));
226 * \brief Returns the vnode for the page directory mapping a given vspace
227 * address, without performing allocations as get_pdir() does
229 static inline struct vnode *find_pdir(struct pmap_x86 *pmap, genvaddr_t base)
231 struct vnode *pdpt = find_pdpt(pmap, base);
235 return pmap_find_vnode(pdpt, X86_64_PDPT_BASE(base));
242 * \brief Returns the vnode for the pagetable mapping a given vspace address,
243 * without performing allocations as get_ptable() does
245 static inline struct vnode *find_ptable(struct pmap_x86 *pmap, genvaddr_t base)
247 struct vnode *pdir = find_pdir(pmap, base);
251 return pmap_find_vnode(pdir, X86_64_PDIR_BASE(base));
257 // TODO: documentation for this feature! -SG,2018-10-18
258 size_t ALL_THE_VNODES_MAX_ENTRIES = (15*4096);
259 struct vnode **ALL_THE_VNODES = NULL;
260 size_t all_the_vnodes_cnt = 0;
262 static errval_t do_single_map(struct pmap_x86 *pmap, genvaddr_t vaddr,
263 genvaddr_t vend, struct capref frame,
264 size_t offset, size_t pte_count,
265 vregion_flags_t flags)
267 if (pte_count == 0) {
268 debug_printf("do_single_map: pte_count == 0, called from %p\n",
269 __builtin_return_address(0));
272 assert(pte_count > 0);
274 paging_x86_64_flags_t pmap_flags = vregion_to_pmap_flag(flags);
276 // Get the paging structure and set paging relevant parameters
277 struct vnode *ptable = NULL;
281 // get the right paging table and address part
282 if (flags & VREGION_FLAGS_LARGE) {
283 //large 2M pages, mapped into pdir
284 err = get_pdir(pmap, vaddr, &ptable);
285 table_base = X86_64_PDIR_BASE(vaddr);
286 } else if (flags & VREGION_FLAGS_HUGE) {
287 //huge 1GB pages, mapped into pdpt
288 err = get_pdpt(pmap, vaddr, &ptable);
289 table_base = X86_64_PDPT_BASE(vaddr);
291 //normal 4K pages, mapped into ptable
292 err = get_ptable(pmap, vaddr, &ptable);
293 table_base = X86_64_PTABLE_BASE(vaddr);
294 if (ALL_THE_VNODES && (all_the_vnodes_cnt+1) < ALL_THE_VNODES_MAX_ENTRIES) {
295 ALL_THE_VNODES[all_the_vnodes_cnt++] = ptable;
298 if (err_is_fail(err)) {
299 return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
301 assert(ptable->v.is_vnode);
303 // check if there is an overlapping mapping
304 if (has_vnode(ptable, table_base, pte_count, false)) {
305 if (has_vnode(ptable, table_base, pte_count, true)) {
306 printf("page already exists in 0x%"
307 PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
308 return LIB_ERR_PMAP_EXISTING_MAPPING;
310 // clean out empty page tables. We do this here because we benefit
311 // from having the page tables in place when doing lots of small
313 remove_empty_vnodes(pmap, ptable, table_base, pte_count);
317 // setup userspace mapping
318 struct vnode *page = slab_alloc(&pmap->p.m.slab);
320 page->v.is_vnode = false;
321 page->is_cloned = false;
322 page->v.entry = table_base;
324 page->v.u.frame.offset = offset;
325 page->v.u.frame.flags = flags;
326 page->v.u.frame.pte_count = pte_count;
327 page->u.frame.vaddr = vaddr;
328 page->u.frame.cloned_count = 0;
330 // only insert after vnode fully initialized
331 pmap_vnode_insert_child(ptable, page);
333 set_mapping_cap(&pmap->p, page, ptable, table_base);
334 pmap->used_cap_slots ++;
337 assert(!capref_is_null(ptable->v.u.vnode.invokable));
338 assert(!capref_is_null(page->v.mapping));
339 err = vnode_map(ptable->v.u.vnode.invokable, frame, table_base,
340 pmap_flags, offset, pte_count, page->v.mapping);
341 if (err_is_fail(err)) {
342 return err_push(err, LIB_ERR_VNODE_MAP);
349 * \brief Called when enough slabs exist for the given mapping
351 errval_t do_map(struct pmap *pmap_gen, genvaddr_t vaddr,
352 struct capref frame, size_t offset, size_t size,
353 vregion_flags_t flags, size_t *retoff, size_t *retsize)
355 struct pmap_x86 *pmap = (struct pmap_x86 *)pmap_gen;
356 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 0);
359 // determine page size and relevant address part
360 size_t page_size = X86_64_BASE_PAGE_SIZE;
361 size_t table_base = X86_64_PTABLE_BASE(vaddr);
362 uint8_t map_bits = X86_64_BASE_PAGE_BITS + X86_64_PTABLE_BITS;
363 bool debug_out = false;
365 // get base address and size of frame
366 struct frame_identity fi;
367 err = cap_identify_mappable(frame, &fi);
368 if (err_is_fail(err)) {
369 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 1);
370 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
373 if ((flags & VREGION_FLAGS_HUGE) &&
374 (vaddr & X86_64_HUGE_PAGE_MASK) == 0 &&
375 fi.bytes >= X86_64_HUGE_PAGE_SIZE &&
376 ((fi.base & X86_64_HUGE_PAGE_MASK) == 0))
378 // huge page branch (1GB)
379 page_size = X86_64_HUGE_PAGE_SIZE;
380 table_base = X86_64_PDPT_BASE(vaddr);
381 map_bits = X86_64_HUGE_PAGE_BITS + X86_64_PTABLE_BITS;
383 // remove large flag, if we're doing huge mapping
384 flags &= ~VREGION_FLAGS_LARGE;
385 } else if ((flags & VREGION_FLAGS_LARGE) &&
386 (vaddr & X86_64_LARGE_PAGE_MASK) == 0 &&
387 fi.bytes >= X86_64_LARGE_PAGE_SIZE &&
388 ((fi.base & X86_64_LARGE_PAGE_MASK) == 0))
390 // large page branch (2MB)
391 page_size = X86_64_LARGE_PAGE_SIZE;
392 table_base = X86_64_PDIR_BASE(vaddr);
393 map_bits = X86_64_LARGE_PAGE_BITS + X86_64_PTABLE_BITS;
396 // remove large/huge flags
397 flags &= ~(VREGION_FLAGS_LARGE|VREGION_FLAGS_HUGE);
400 // round to the next full page and calculate end address and #ptes
401 size = ROUND_UP(size, page_size);
402 size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
403 genvaddr_t vend = vaddr + size;
405 if (offset+size > fi.bytes) {
406 debug_printf("do_map: offset=%zu; size=%zu; frame size=%zu\n",
407 offset, size, fi.bytes);
408 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 1);
409 return LIB_ERR_PMAP_FRAME_SIZE;
413 if (true || debug_out) {
414 genpaddr_t paddr = fi.base + offset;
416 debug_printf("do_map: 0x%"
417 PRIxGENVADDR"--0x%"PRIxGENVADDR" -> 0x%"PRIxGENPADDR
418 "; pte_count = %zd; frame bytes = 0x%zx; page size = 0x%zx\n",
419 vaddr, vend, paddr, pte_count, fi.bytes, page_size);
423 // all mapping on one leaf table?
424 if (is_same_pdir(vaddr, vend) ||
425 (flags & VREGION_FLAGS_LARGE && is_same_pdpt(vaddr, vend)) ||
426 (flags & VREGION_FLAGS_HUGE && is_same_pml4(vaddr, vend))) {
429 debug_printf(" do_map: fast path: %zd\n", pte_count);
431 err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
432 if (err_is_fail(err)) {
433 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 1);
434 return err_push(err, LIB_ERR_PMAP_DO_MAP);
437 else { // multiple leaf page tables
439 uint32_t c = X86_64_PTABLE_SIZE - table_base;
441 debug_printf(" do_map: slow path: first leaf %"PRIu32"\n", c);
443 genvaddr_t temp_end = vaddr + c * page_size;
444 err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
445 if (err_is_fail(err)) {
446 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 1);
447 return err_push(err, LIB_ERR_PMAP_DO_MAP);
451 while (get_addr_prefix(temp_end, map_bits) <
452 get_addr_prefix(vend, map_bits))
456 temp_end = vaddr + X86_64_PTABLE_SIZE * page_size;
457 offset += c * page_size;
458 c = X86_64_PTABLE_SIZE;
462 debug_printf(" do_map: slow path: full leaf\n");
464 err = do_single_map(pmap, vaddr, temp_end, frame, offset,
465 X86_64_PTABLE_SIZE, flags);
466 if (err_is_fail(err)) {
467 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 1);
468 return err_push(err, LIB_ERR_PMAP_DO_MAP);
472 // map remaining part
473 offset += c * page_size;
475 // calculate remaining pages (subtract ptable bits from map_bits to
476 // get #ptes of last-level instead of 2nd-to-last).
477 c = get_addr_prefix(vend, map_bits-X86_64_PTABLE_BITS) -
478 get_addr_prefix(temp_end, map_bits-X86_64_PTABLE_BITS);
483 debug_printf("do_map: slow path: last leaf %"PRIu32"\n", c);
485 err = do_single_map(pmap, temp_end, vend, frame, offset, c, flags);
486 if (err_is_fail(err)) {
487 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 1);
488 return err_push(err, LIB_ERR_PMAP_DO_MAP);
500 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_DO_MAP, 1);
504 /// Computer upper limit on number of slabs required to perform a mapping
505 static size_t max_slabs_for_mapping(size_t bytes)
507 size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_BASE_PAGE_SIZE);
508 size_t max_ptable = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
509 size_t max_pdir = DIVIDE_ROUND_UP(max_ptable, X86_64_PTABLE_SIZE);
510 size_t max_pdpt = DIVIDE_ROUND_UP(max_pdir, X86_64_PTABLE_SIZE);
511 // Worst case, our mapping spans over two pdpts
512 return 2 * (max_ptable + max_pdir + max_pdpt);
515 static size_t max_slabs_for_mapping_large(size_t bytes)
517 size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_LARGE_PAGE_SIZE);
518 size_t max_pdir = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
519 size_t max_pdpt = DIVIDE_ROUND_UP(max_pdir, X86_64_PTABLE_SIZE);
520 // Worst case, our mapping spans over two pdpts
521 return 2 * (max_pdir + max_pdpt);
524 static size_t max_slabs_for_mapping_huge(size_t bytes)
526 size_t max_pages = DIVIDE_ROUND_UP(bytes, X86_64_HUGE_PAGE_SIZE);
527 size_t max_pdpt = DIVIDE_ROUND_UP(max_pages, X86_64_PTABLE_SIZE);
528 // Worst case, our mapping spans over two pdpts
532 size_t max_slabs_required(size_t bytes)
534 return max_slabs_for_mapping(bytes);
538 * \brief Create page mappings
540 * \param pmap The pmap object
541 * \param vaddr The virtual address to create the mapping for
542 * \param frame The frame cap to map in
543 * \param offset Offset into the frame cap
544 * \param size Size of the mapping
545 * \param flags Flags for the mapping
546 * \param retoff If non-NULL, filled in with adjusted offset of mapped region
547 * \param retsize If non-NULL, filled in with adjusted size of mapped region
549 static errval_t map(struct pmap *pmap, genvaddr_t vaddr, struct capref frame,
550 size_t offset, size_t size, vregion_flags_t flags,
551 size_t *retoff, size_t *retsize)
555 struct capability cap;
556 err = cap_direct_identify(frame, &cap);
557 if (err_is_fail(err)) {
558 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
560 struct frame_identity fi;
561 fi.base = get_address(&cap);
562 fi.bytes = get_size(&cap);
565 // Adjust the parameters to page boundaries
566 // TODO: overestimating needed slabs shouldn't hurt much in the long run,
567 // and would keep the code easier to read and possibly faster due to less
569 if ((flags & VREGION_FLAGS_LARGE) &&
570 (vaddr & X86_64_LARGE_PAGE_MASK) == 0 &&
571 (fi.base & X86_64_LARGE_PAGE_MASK) == 0 &&
572 fi.bytes >= offset+size) {
573 //case large pages (2MB)
574 size += LARGE_PAGE_OFFSET(offset);
575 size = ROUND_UP(size, LARGE_PAGE_SIZE);
576 offset -= LARGE_PAGE_OFFSET(offset);
577 max_slabs = max_slabs_for_mapping_large(size);
578 } else if ((flags & VREGION_FLAGS_HUGE) &&
579 (vaddr & X86_64_HUGE_PAGE_MASK) == 0 &&
580 (fi.base & X86_64_HUGE_PAGE_MASK) == 0 &&
581 fi.bytes >= offset+size) {
582 // case huge pages (1GB)
583 size += HUGE_PAGE_OFFSET(offset);
584 size = ROUND_UP(size, HUGE_PAGE_SIZE);
585 offset -= HUGE_PAGE_OFFSET(offset);
586 max_slabs = max_slabs_for_mapping_huge(size);
588 //case normal pages (4KB)
589 size += BASE_PAGE_OFFSET(offset);
590 size = ROUND_UP(size, BASE_PAGE_SIZE);
591 offset -= BASE_PAGE_OFFSET(offset);
592 max_slabs = max_slabs_for_mapping(size);
595 max_slabs += 6; // minimum amount required to map a region spanning 2 ptables
597 err = pmap_refill_slabs(pmap, max_slabs);
598 if (err_is_fail(err)) {
602 err = do_map(pmap, vaddr, frame, offset, size, flags, retoff, retsize);
606 struct find_mapping_info {
607 struct vnode *page_table;
615 * \brief Find mapping for `vaddr` in `pmap`.
616 * \arg pmap the pmap to search in
617 * \arg vaddr the virtual address to search for
618 * \arg pt the last-level page table meta-data we found if any
619 * \arg page the page meta-data we found if any
620 * \returns `true` iff we found a mapping for vaddr
622 static bool find_mapping(struct pmap_x86 *pmap, genvaddr_t vaddr,
623 struct find_mapping_info *info)
625 struct vnode *pdpt = NULL, *pdir = NULL, *pt = NULL, *page = NULL;
627 size_t page_size = 0;
628 size_t table_base = 0;
629 uint8_t map_bits = 0;
631 // find page and last-level page table (can be pdir or pdpt)
632 if ((pdpt = find_pdpt(pmap, vaddr)) != NULL) {
633 page = pmap_find_vnode(pdpt, X86_64_PDPT_BASE(vaddr));
634 if (page && page->v.is_vnode) { // not 1G pages
636 page = pmap_find_vnode(pdir, X86_64_PDIR_BASE(vaddr));
637 if (page && page->v.is_vnode) { // not 2M pages
639 page = pmap_find_vnode(pt, X86_64_PTABLE_BASE(vaddr));
640 page_size = X86_64_BASE_PAGE_SIZE;
641 table_base = X86_64_PTABLE_BASE(vaddr);
642 map_bits = X86_64_BASE_PAGE_BITS + X86_64_PTABLE_BITS;
644 assert(is_large_page(page));
646 page_size = X86_64_LARGE_PAGE_SIZE;
647 table_base = X86_64_PDIR_BASE(vaddr);
648 map_bits = X86_64_LARGE_PAGE_BITS + X86_64_PTABLE_BITS;
651 assert(is_huge_page(page));
653 page_size = X86_64_HUGE_PAGE_SIZE;
654 table_base = X86_64_PDPT_BASE(vaddr);
655 map_bits = X86_64_HUGE_PAGE_BITS + X86_64_PTABLE_BITS;
659 info->page_table = pt;
661 info->page_size = page_size;
662 info->table_base = table_base;
663 info->map_bits = map_bits;
672 static errval_t do_single_unmap(struct pmap_x86 *pmap, genvaddr_t vaddr,
676 struct find_mapping_info info;
678 if (!find_mapping(pmap, vaddr, &info)) {
679 return LIB_ERR_PMAP_FIND_VNODE;
681 assert(info.page_table && info.page_table->v.is_vnode && info.page && !info.page->v.is_vnode);
683 if (info.page->v.u.frame.pte_count == pte_count) {
684 err = vnode_unmap(info.page_table->v.cap, info.page->v.mapping);
685 if (err_is_fail(err)) {
686 debug_printf("vnode_unmap returned error: %s (%d)\n",
687 err_getstring(err), err_no(err));
688 return err_push(err, LIB_ERR_VNODE_UNMAP);
691 // delete&free page->v.mapping after doing vnode_unmap()
692 err = cap_delete(info.page->v.mapping);
693 if (err_is_fail(err)) {
694 return err_push(err, LIB_ERR_CAP_DELETE);
697 err = pmap->p.slot_alloc->free(pmap->p.slot_alloc, info.page->v.mapping);
698 if (err_is_fail(err)) {
699 debug_printf("remove_empty_vnodes: slot_free (mapping): %s\n",
703 assert(pmap->used_cap_slots > 0);
704 pmap->used_cap_slots --;
705 // Free up the resources
706 pmap_remove_vnode(info.page_table, info.page);
707 slab_free(&pmap->p.m.slab, info.page);
714 * \brief Remove page mappings
716 * \param pmap The pmap object
717 * \param vaddr The start of the virtual region to remove
718 * \param size The size of virtual region to remove
719 * \param retsize If non-NULL, filled in with the actual size removed
721 static errval_t unmap(struct pmap *pmap, genvaddr_t vaddr, size_t size,
724 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 0);
725 //printf("[unmap] 0x%"PRIxGENVADDR", %zu\n", vaddr, size);
726 errval_t err, ret = SYS_ERR_OK;
727 struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
729 //determine if we unmap a larger page
730 struct find_mapping_info info;
732 if (!find_mapping(x86, vaddr, &info)) {
733 //TODO: better error --> LIB_ERR_PMAP_NOT_MAPPED
734 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 1);
735 return LIB_ERR_PMAP_UNMAP;
738 assert(!info.page->v.is_vnode);
740 if (info.page->v.entry > info.table_base) {
741 debug_printf("trying to partially unmap region\n");
743 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 1);
744 return LIB_ERR_PMAP_FIND_VNODE;
747 // TODO: match new policy of map when implemented
748 size = ROUND_UP(size, info.page_size);
749 genvaddr_t vend = vaddr + size;
751 if (is_same_pdir(vaddr, vend) ||
752 (is_same_pdpt(vaddr, vend) && is_large_page(info.page)) ||
753 (is_same_pml4(vaddr, vend) && is_huge_page(info.page)))
756 err = do_single_unmap(x86, vaddr, size / info.page_size);
757 if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
758 printf("error fast path\n");
759 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 1);
760 return err_push(err, LIB_ERR_PMAP_UNMAP);
765 uint32_t c = X86_64_PTABLE_SIZE - info.table_base;
767 err = do_single_unmap(x86, vaddr, c);
768 if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
769 printf("error first leaf\n");
770 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 1);
771 return err_push(err, LIB_ERR_PMAP_UNMAP);
775 vaddr += c * info.page_size;
776 while (get_addr_prefix(vaddr, info.map_bits) < get_addr_prefix(vend, info.map_bits)) {
777 c = X86_64_PTABLE_SIZE;
778 err = do_single_unmap(x86, vaddr, X86_64_PTABLE_SIZE);
779 if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
780 printf("error while loop\n");
781 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 1);
782 return err_push(err, LIB_ERR_PMAP_UNMAP);
784 vaddr += c * info.page_size;
787 // unmap remaining part
788 // subtracting ptable bits from map_bits to get #ptes in last-level table
789 // instead of 2nd-to-last.
790 c = get_addr_prefix(vend, info.map_bits - X86_64_PTABLE_BITS) -
791 get_addr_prefix(vaddr, info.map_bits - X86_64_PTABLE_BITS);
792 assert(c < X86_64_PTABLE_SIZE);
794 err = do_single_unmap(x86, vaddr, c);
795 if (err_is_fail(err) && err_no(err) != LIB_ERR_PMAP_FIND_VNODE) {
796 printf("error remaining part\n");
797 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 1);
798 return err_push(err, LIB_ERR_PMAP_UNMAP);
807 //printf("[unmap] exiting\n");
808 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_UNMAP, 1);
812 int pmap_selective_flush = 0;
813 static errval_t do_single_modify_flags(struct pmap_x86 *pmap, genvaddr_t vaddr,
814 size_t pages, vregion_flags_t flags)
816 errval_t err = SYS_ERR_OK;
818 struct find_mapping_info info;
820 if (!find_mapping(pmap, vaddr, &info)) {
821 return LIB_ERR_PMAP_FIND_VNODE;
824 assert(info.page_table && info.page_table->v.is_vnode && info.page && !info.page->v.is_vnode);
825 assert(pages <= PTABLE_SIZE);
827 if (pmap_inside_region(info.page_table, info.table_base, pages)) {
828 // we're modifying part of a valid mapped region
829 // arguments to invocation: invoke frame cap, first affected
830 // page (as offset from first page in mapping), #affected
831 // pages, new flags. Invocation mask flags based on capability
832 // access permissions.
833 size_t off = info.table_base - info.page->v.entry;
834 paging_x86_64_flags_t pmap_flags = vregion_to_pmap_flag(flags);
835 // calculate TLB flushing hint
836 genvaddr_t va_hint = 0;
837 if (pmap_selective_flush == 3) {
838 // always do full flush
840 } else if (pmap_selective_flush == 2) {
841 // always do assisted selective flush
842 va_hint = vaddr & ~(info.page_size - 1);
843 } else if (pmap_selective_flush == 1) {
844 // always do computed selective flush
848 * default strategy is to only use selective flushing for single page
851 // do assisted selective flush for single page
852 va_hint = vaddr & ~(info.page_size - 1);
855 err = invoke_mapping_modify_flags(info.page->v.mapping, off, pages,
856 pmap_flags, va_hint);
859 // overlaps some region border
860 // XXX: need better error
861 return LIB_ERR_PMAP_EXISTING_MAPPING;
869 * \brief Modify page mapping
871 * \param pmap The pmap object
872 * \param vaddr The first virtual address for which to change the flags
873 * \param size The length of the region to change in bytes
874 * \param flags New flags for the mapping
875 * \param retsize If non-NULL, filled in with the actual size modified
877 static errval_t modify_flags(struct pmap *pmap, genvaddr_t vaddr, size_t size,
878 vregion_flags_t flags, size_t *retsize)
880 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_MODIFY, 0);
882 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
884 //determine if we unmap a larger page
885 struct find_mapping_info info;
887 if (!find_mapping(x86, vaddr, &info)) {
888 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_MODIFY, 1);
889 return LIB_ERR_PMAP_NOT_MAPPED;
892 assert(info.page && !info.page->v.is_vnode);
893 // XXX: be more graceful about size == 0? -SG, 2017-11-28.
896 // TODO: match new policy of map when implemented
897 size = ROUND_UP(size, info.page_size);
898 genvaddr_t vend = vaddr + size;
900 size_t pages = size / info.page_size;
902 // vaddr and vend specify begin and end of the region (inside a mapping)
903 // that should receive the new set of flags
904 if (is_same_pdir(vaddr, vend) ||
905 (is_same_pdpt(vaddr, vend) && is_large_page(info.page)) ||
906 (is_same_pml4(vaddr, vend) && is_huge_page(info.page))) {
908 assert(pages <= PTABLE_SIZE);
909 err = do_single_modify_flags(x86, vaddr, pages, flags);
910 if (err_is_fail(err)) {
911 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_MODIFY, 1);
912 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
917 uint32_t c = X86_64_PTABLE_SIZE - info.table_base;
918 assert(c <= PTABLE_SIZE);
919 err = do_single_modify_flags(x86, vaddr, c, flags);
920 if (err_is_fail(err)) {
921 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_MODIFY, 1);
922 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
925 // modify full leaves
926 vaddr += c * info.page_size;
927 while (get_addr_prefix(vaddr, info.map_bits) < get_addr_prefix(vend, info.map_bits)) {
928 c = X86_64_PTABLE_SIZE;
929 err = do_single_modify_flags(x86, vaddr, X86_64_PTABLE_SIZE, flags);
930 if (err_is_fail(err)) {
931 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_MODIFY, 1);
932 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
934 vaddr += c * info.page_size;
937 // modify remaining part
938 c = get_addr_prefix(vend, info.map_bits - X86_64_PTABLE_BITS) -
939 get_addr_prefix(vaddr, info.map_bits - X86_64_PTABLE_BITS);
941 assert(c <= PTABLE_SIZE);
942 err = do_single_modify_flags(x86, vaddr, c, flags);
943 if (err_is_fail(err)) {
944 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_MODIFY, 1);
945 return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
954 //printf("[modify_flags] exiting\n");
955 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_MODIFY, 1);
960 * \brief Query existing page mapping
962 * \param pmap The pmap object
963 * \param vaddr The virtual address to query
964 * \param retvaddr Returns the base virtual address of the mapping
965 * \param retsize Returns the actual size of the mapping
966 * \param retcap Returns the cap mapped at this address
967 * \param retoffset Returns the offset within the cap that is mapped
968 * \param retflags Returns the flags for this mapping
970 * All of the ret parameters are optional.
972 static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
973 struct pmap_mapping_info *info)
975 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_LOOKUP, 0);
976 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
978 struct find_mapping_info find_info;
979 bool found = find_mapping(x86, vaddr, &find_info);
982 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_LOOKUP, 1);
983 return LIB_ERR_PMAP_FIND_VNODE;
987 info->vaddr = vaddr & ~(genvaddr_t)(find_info.page_size - 1);
988 info->size = find_info.page_size;
989 info->cap = find_info.page->v.cap;
990 info->offset = find_info.page->v.u.frame.offset;
991 info->flags = find_info.page->v.u.frame.flags;
992 info->mapping = find_info.page->v.mapping;
994 trace_event(TRACE_SUBSYS_MEMORY, TRACE_EVENT_MEMORY_LOOKUP, 1);
1000 #if defined(PMAP_LL)
1001 static errval_t dump(struct pmap *pmap, struct pmap_dump_info *buf, size_t buflen, size_t *items_written)
1003 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
1004 struct pmap_dump_info *buf_ = buf;
1006 struct vnode *pml4 = &x86->root;
1007 struct vnode *pdpt, *pdir, *pt, *frame;
1008 assert(pml4 != NULL);
1012 // iterate over PML4 entries
1013 size_t pml4_index, pdpt_index, pdir_index;
1014 for (pdpt = pml4->v.u.vnode.children; pdpt != NULL; pdpt = pdpt->v.meta.next) {
1015 pml4_index = pdpt->v.entry;
1016 // iterate over pdpt entries
1017 for (pdir = pdpt->v.u.vnode.children; pdir != NULL; pdir = pdir->v.meta.next) {
1018 pdpt_index = pdir->v.entry;
1019 // iterate over pdir entries
1020 for (pt = pdir->v.u.vnode.children; pt != NULL; pt = pt->v.meta.next) {
1021 pdir_index = pt->v.entry;
1022 // iterate over pt entries
1023 for (frame = pt->v.u.vnode.children; frame != NULL; frame = frame->v.meta.next) {
1024 if (*items_written < buflen) {
1025 buf_->pml4_index = pml4_index;
1026 buf_->pdpt_index = pdpt_index;
1027 buf_->pdir_index = pdir_index;
1028 buf_->pt_index = frame->v.entry;
1029 buf_->cap = frame->v.cap;
1030 buf_->offset = frame->v.u.frame.offset;
1031 buf_->flags = frame->v.u.frame.flags;
1041 #elif defined(PMAP_ARRAY)
1042 static errval_t dump(struct pmap *pmap, struct pmap_dump_info *buf, size_t buflen, size_t *items_written)
1044 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
1045 struct pmap_dump_info *buf_ = buf;
1047 struct vnode *pml4 = &x86->root;
1048 struct vnode *pdpt, *pdir, *pt, *frame;
1049 assert(pml4 != NULL);
1053 // iterate over PML4 entries
1054 size_t pml4_index, pdpt_index, pdir_index, pt_index;
1055 for (pml4_index = 0; pml4_index < X86_64_PTABLE_SIZE; pml4_index++) {
1056 if (!(pdpt = pml4->v.u.vnode.children[pml4_index])) {
1057 // skip empty entries
1060 // iterate over pdpt entries
1061 for (pdpt_index = 0; pdpt_index < X86_64_PTABLE_SIZE; pdpt_index++) {
1062 if (!(pdir = pdpt->v.u.vnode.children[pdpt_index])) {
1063 // skip empty entries
1066 // iterate over pdir entries
1067 for (pdir_index = 0; pdir_index < X86_64_PTABLE_SIZE; pdir_index++) {
1068 if (!(pt = pdir->v.u.vnode.children[pdir_index])) {
1069 // skip empty entries
1072 // iterate over pt entries
1073 for (pt_index = 0; pt_index < X86_64_PTABLE_SIZE; pt_index++) {
1074 if (!(frame = pt->v.u.vnode.children[pt_index])) {
1075 // skip empty entries
1078 if (*items_written < buflen) {
1079 buf_->pml4_index = pml4_index;
1080 buf_->pdpt_index = pdpt_index;
1081 buf_->pdir_index = pdir_index;
1082 buf_->pt_index = pt_index;
1083 buf_->cap = frame->v.cap;
1084 buf_->offset = frame->v.u.frame.offset;
1085 buf_->flags = frame->v.u.frame.flags;
1096 #error Invalid pmap datastructure
1101 * creates pinned page table entries
1103 static errval_t create_pts_pinned(struct pmap *pmap, genvaddr_t vaddr, size_t bytes,
1104 vregion_flags_t flags)
1106 errval_t err = SYS_ERR_OK;
1107 struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
1111 /* work out the number of vnodes we may need and grow the slabs*/
1113 if ((flags & VREGION_FLAGS_LARGE)) {
1114 assert(!(vaddr & (LARGE_PAGE_SIZE -1)));
1115 assert(!(bytes & (LARGE_PAGE_SIZE -1)));
1116 pagesize = HUGE_PAGE_SIZE;
1117 max_slabs = max_slabs_for_mapping_huge(bytes);
1118 } else if ((flags & VREGION_FLAGS_HUGE)) {
1119 // case huge pages (1GB)
1120 assert(!(vaddr & (HUGE_PAGE_SIZE -1)));
1121 assert(!(bytes & (HUGE_PAGE_SIZE -1)));
1122 pagesize = HUGE_PAGE_SIZE * 512UL;
1123 max_slabs = (bytes / HUGE_PAGE_SIZE) + 1;
1125 //case normal pages (4KB)
1126 assert(!(vaddr & (BASE_PAGE_SIZE -1)));
1127 assert(!(bytes & (BASE_PAGE_SIZE -1)));
1128 pagesize = LARGE_PAGE_SIZE;
1129 max_slabs = max_slabs_for_mapping_large(bytes);
1132 max_slabs += 6; // minimum amount required to map a region spanning 2 ptables
1134 // Refill slab allocator if necessary
1135 err = pmap_refill_slabs(pmap, max_slabs);
1136 if (err_is_fail(err)) {
1140 /* do the actual creation of the page tables */
1141 for (size_t va = vaddr; va < (vaddr + bytes); va += pagesize) {
1142 struct vnode *vnode;
1143 if ((flags & VREGION_FLAGS_LARGE)) {
1144 err = get_pdir(x86, va, &vnode);
1145 } else if ((flags & VREGION_FLAGS_HUGE)) {
1146 err = get_pdpt(x86, va, &vnode);
1148 err = get_ptable(x86, va, &vnode);
1150 if (err_is_fail(err)) {
1154 /* map the page-table read only for access to status bits */
1155 genvaddr_t genvaddr = pmap->m.vregion_offset;
1156 pmap->m.vregion_offset += (genvaddr_t)4096;
1158 assert(pmap->m.vregion_offset < vregion_get_base_addr(&pmap->m.vregion) +
1159 vregion_get_size(&pmap->m.vregion));
1161 /* copy the page-table capability */
1162 /* XXX: this should be somewhere in struct vnode */
1164 err = x86->p.slot_alloc->alloc(x86->p.slot_alloc, &slot);
1165 if (err_is_fail(err)) {
1166 return err_push(err, LIB_ERR_SLOT_ALLOC);
1169 err = cap_copy(slot, vnode->v.cap);
1170 if (err_is_fail(err)) {
1171 x86->p.slot_alloc->free(x86->p.slot_alloc, slot);
1175 /* get slot for mapping */
1176 /* XXX: this should be in struct vnode somewhere! */
1177 struct capref mapping;
1178 err = x86->p.slot_alloc->alloc(x86->p.slot_alloc, &mapping);
1179 if (err_is_fail(err)) {
1180 return err_push(err, LIB_ERR_SLOT_ALLOC);
1183 /* get the page table of the reserved range and map the PT */
1184 struct vnode *ptable;
1185 err = get_ptable(x86, genvaddr, &ptable);
1186 err = vnode_map(ptable->v.cap, slot, X86_64_PTABLE_BASE(genvaddr),
1187 vregion_to_pmap_flag(VREGION_FLAGS_READ), 0, 1, mapping);
1189 if (err_is_fail(err)) {
1190 return err_push(err, LIB_ERR_PMAP_DO_MAP);
1193 /* update the vnode structure */
1194 vnode->is_pinned = 1;
1195 vnode->u.vnode.virt_base = genvaddr;
1203 * returns the virtual address of the leaf pagetable for a mapping
1205 static errval_t get_leaf_pt(struct pmap *pmap, genvaddr_t vaddr, lvaddr_t *ret_va)
1209 /* walk down the pt hierarchy and stop at the leaf */
1211 struct vnode *parent = NULL, *current = NULL;
1212 // find page and last-level page table (can be pdir or pdpt)
1213 if ((current = find_pdpt((struct pmap_x86 *)pmap, vaddr)) == NULL) {
1218 if ((current = pmap_find_vnode(parent, X86_64_PDPT_BASE(vaddr))) == NULL) {
1224 if ((current = pmap_find_vnode(parent, X86_64_PDIR_BASE(vaddr))) == NULL) {
1230 assert(current && current->v.is_vnode);
1232 *ret_va = current->u.vnode.virt_base;
1236 static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
1237 size_t alignment, genvaddr_t *retvaddr)
1239 struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
1241 if (alignment == 0) {
1242 alignment = BASE_PAGE_SIZE;
1244 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1246 size = ROUND_UP(size, alignment);
1247 assert(size < 512ul * 1024 * 1024 * 1024); // pml4 size
1249 #if defined(PMAP_LL)
1250 struct vnode *walk_pml4 = x86->root.v.u.vnode.children;
1251 assert(walk_pml4 != NULL); // assume there's always at least one existing entry
1253 // try to find free pml4 entry
1255 for (int i = 0; i < 512; i++) {
1258 //debug_printf("entry: %d\n", walk_pml4->entry);
1259 f[walk_pml4->v.entry] = false;
1261 //debug_printf("looping over pml4 entries\n");
1262 assert(walk_pml4->v.is_vnode);
1263 f[walk_pml4->v.entry] = false;
1264 walk_pml4 = walk_pml4->v.meta.next;
1266 genvaddr_t first_free = 16;
1267 for (; first_free < 512; first_free++) {
1268 //debug_printf("f[%"PRIuGENVADDR"] = %d\n", first_free, f[first_free]);
1269 if (f[first_free]) {
1273 #elif defined(PMAP_ARRAY)
1274 genvaddr_t first_free = 16;
1275 for (; first_free < X86_64_PTABLE_SIZE; first_free++) {
1276 if (!x86->root.v.u.vnode.children[first_free]) {
1281 #error Invalid pmap datastructure
1283 //debug_printf("first_free: %"PRIuGENVADDR"\n", first_free);
1284 if (first_free < X86_64_PTABLE_SIZE) {
1285 //debug_printf("first_free: %"PRIuGENVADDR"\n", first_free);
1286 *retvaddr = first_free << 39;
1289 return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
1293 static struct pmap_funcs pmap_funcs = {
1294 .determine_addr = pmap_x86_determine_addr,
1295 .determine_addr_raw = determine_addr_raw,
1299 .modify_flags = modify_flags,
1300 .serialise = pmap_serialise,
1301 .deserialise = pmap_deserialise,
1303 .create_pts_pinned = create_pts_pinned,
1304 .get_leaf_pt = get_leaf_pt,
1305 .measure_res = pmap_x86_measure_res,
1309 * \brief Initialize a x86 pmap object
1311 * \param pmap Pmap object of type x86
1313 errval_t pmap_x86_64_init(struct pmap *pmap, struct vspace *vspace,
1314 struct capref vnode,
1315 struct slot_allocator *opt_slot_alloc)
1317 struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
1319 /* Generic portion */
1320 pmap->f = pmap_funcs;
1321 pmap->vspace = vspace;
1323 if (opt_slot_alloc != NULL) {
1324 pmap->slot_alloc = opt_slot_alloc;
1325 } else { /* use default allocator for this dispatcher */
1326 pmap->slot_alloc = get_default_slot_allocator();
1328 x86->used_cap_slots = 0;
1331 err = pmap_vnode_mgmt_init(pmap);
1332 if (err_is_fail(err)) {
1333 return err_push(err, LIB_ERR_PMAP_INIT);
1336 x86->root.v.type = ObjType_VNode_x86_64_pml4;
1337 x86->root.v.is_vnode = true;
1338 x86->root.v.cap = vnode;
1339 x86->root.v.u.vnode.invokable = vnode;
1340 if (get_croot_addr(vnode) != CPTR_ROOTCN) {
1341 err = slot_alloc(&x86->root.v.u.vnode.invokable);
1342 assert(err_is_ok(err));
1343 x86->used_cap_slots ++;
1344 err = cap_copy(x86->root.v.u.vnode.invokable, vnode);
1345 assert(err_is_ok(err));
1347 assert(!capref_is_null(x86->root.v.cap));
1348 assert(!capref_is_null(x86->root.v.u.vnode.invokable));
1349 pmap_vnode_init(pmap, &x86->root);
1350 x86->root.u.vnode.virt_base = 0;
1351 x86->root.u.vnode.page_table_frame = NULL_CAP;
1354 if (pmap == get_current_pmap()) {
1356 * for now, for our own pmap, we use the left over slot allocator cnode to
1357 * provide the mapping cnode for the first half of the root page table as
1358 * we cannot allocate CNodes before establishing a connection to the
1361 x86->root.u.vnode.mcn[0].cnode = cnode_root;
1362 x86->root.u.vnode.mcn[0].slot = ROOTCN_SLOT_ROOT_MAPPING;
1363 x86->root.u.vnode.mcnode[0].croot = CPTR_ROOTCN;
1364 x86->root.u.vnode.mcnode[0].cnode = ROOTCN_SLOT_ADDR(ROOTCN_SLOT_ROOT_MAPPING);
1365 x86->root.u.vnode.mcnode[0].level = CNODE_TYPE_OTHER;
1367 err = cnode_create_l2(&x86->root.u.vnode.mcn[0], &x86->root.u.vnode.mcnode[0]);
1368 if (err_is_fail(err)) {
1369 return err_push(err, LIB_ERR_PMAP_ALLOC_CNODE);
1374 // choose a minimum mappable VA for most domains; enough to catch NULL
1375 // pointer derefs with suitably large offsets
1376 x86->min_mappable_va = 64 * 1024;
1378 // maximum mappable VA is derived from X86_64_MEMORY_OFFSET in kernel
1379 x86->max_mappable_va = (genvaddr_t)0xffffff8000000000;
1384 errval_t pmap_x86_64_init_ept(struct pmap *pmap, struct vspace *vspace,
1385 struct capref vnode,
1386 struct slot_allocator *opt_slot_alloc)
1389 err = pmap_x86_64_init(pmap, vspace, vnode, opt_slot_alloc);
1390 struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
1392 x86->root.v.type = ObjType_VNode_x86_64_ept_pml4;
1398 * \brief Initialize the current pmap. Reserve space for metadata
1400 * This code is coupled with #vspace_current_init()
1402 errval_t pmap_x86_64_current_init(bool init_domain)
1404 struct pmap_x86 *x86 = (struct pmap_x86*)get_current_pmap();
1406 pmap_vnode_mgmt_current_init((struct pmap *)x86);
1408 // We don't know the vnode layout for the first part of our address space
1409 // (which was setup by the kernel), so we avoid mapping there until told it.
1410 x86->min_mappable_va = x86->p.m.vregion.base;