3 * \brief pmap management
7 * Copyright (c) 2010-2015 ETH Zurich.
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
16 * There was some minor difficulty here with mapping the cpus native
17 * page table arrangement onto Barrelfish. The problem lies with
18 * resource bootstrapping. The bootstrap ram allocator allocates pages.
21 * The natural division of bits is 12/10/12, corresponding to 4K
22 * L1 entries in the L1 table and 256 L2 entries per L2
23 * table. Unfortunately 256 entries consumes 1KB rather than a
24 * page (4KB) so we pretend here and in the kernel caps page
25 * code that the L1 has 1024 entries and L2 tables are 4KB in
26 * size. The 4KB constraint comes from ram_alloc_fixed
27 * allocating single pages and the difficulty in bootstrapping
28 * cap slots (alloc_node takes a single slot.
30 * For now this suffices, but might need to be revisited in future.
32 * An earlier cut at this, used the first 1KB from each
33 * allocation made from ram_alloc_fixed and wasted the remaining
34 * space. Aside from the space wasted it entailed a couple of minor
35 * platform ifdefs to work around the discrepency.
37 * Alternative fixes discussed include:
39 * 1. avoid the need to create vnodes before connecting to a
40 * real allocator (probably not plausible).
42 * 2. somehow make ram_alloc_fixed handle sub-page allocations
43 * (it's clunky, but perhaps we can give each domain a separate
44 * cnode full of 1k- sized RAM caps?)
46 * 3. handle the problem at the level of vnode_create (can't see how to
49 * 4. waste the space -- doing this cleanly will require a new parameter
50 * to retype to prevent all 4 caps being created
52 * 5. introduce a new arm-specific version of vnode_create that creates
53 * 4 1k vnodes, and is only called from the ARM VM code.
57 #include <barrelfish/barrelfish.h>
58 #include <barrelfish/caddr.h>
59 #include <barrelfish/invocations_arch.h>
62 // Location of VSpace managed by this system.
63 #ifdef __ARM_ARCH_7M__
64 //virtual section 0x40000000-0x40100000 can not be used as regular memory
65 //because of "bit-banding".
66 //0x42000000-0x44000000 is also dangerous, so we start after that
67 //XXX: there are more virtual regions we
68 //are not allowed to use -> find out where to reserve those
69 #define VSPACE_BEGIN ((lvaddr_t)(1UL*1024*1024*1024 + 64UL*1024*1024)) //0x44000000
70 #else //"normal" arm architectures
71 #define VSPACE_BEGIN ((lvaddr_t)1UL*1024*1024*1024) //0x40000000
75 // Amount of virtual address space reserved for mapping frames
76 // backing refill_slabs.
77 //#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 128) // 64
78 #define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 256)
79 // increased above value from 128 for pandaboard port
81 // Convenience macros to figure out user space page table indices
82 // we use 10 bits for both L1 and L2 tables in user space, even though
83 // in hardware we use 12 bits for L1 and 8 bits for L2.
84 #define ARM_USER_L1_OFFSET(addr) ((uintptr_t)(addr >> 22) & 0x3ffu)
85 #define ARM_USER_L2_OFFSET(addr) ((uintptr_t)(addr >> 12) & 0x3ffu)
87 static inline uintptr_t
88 vregion_flags_to_kpi_paging_flags(vregion_flags_t flags)
90 STATIC_ASSERT(0x1ff == VREGION_FLAGS_MASK, "");
91 STATIC_ASSERT(0x0f == KPI_PAGING_FLAGS_MASK, "");
92 STATIC_ASSERT(VREGION_FLAGS_READ == KPI_PAGING_FLAGS_READ, "");
93 STATIC_ASSERT(VREGION_FLAGS_WRITE == KPI_PAGING_FLAGS_WRITE, "");
94 STATIC_ASSERT(VREGION_FLAGS_EXECUTE == KPI_PAGING_FLAGS_EXECUTE, "");
95 STATIC_ASSERT(VREGION_FLAGS_NOCACHE == KPI_PAGING_FLAGS_NOCACHE, "");
96 if ((flags & VREGION_FLAGS_MPB) != 0) {
97 // XXX: ignore MPB flag on ARM, otherwise the assert below fires -AB
98 flags &= ~VREGION_FLAGS_MPB;
100 if ((flags & VREGION_FLAGS_WRITE_COMBINING) != 0) {
101 // XXX mask out write-combining flag on ARM
102 flags &= ~VREGION_FLAGS_WRITE_COMBINING;
104 if ((flags & VREGION_FLAGS_GUARD) != 0) {
107 assert(0 == (~KPI_PAGING_FLAGS_MASK & (uintptr_t)flags));
108 return (uintptr_t)flags;
111 // debug print preprocessor flag for this file
112 //#define LIBBARRELFISH_DEBUG_PMAP
115 * \brief check whether region A = [start_a .. end_a) overlaps
116 * region B = [start_b .. end_b).
117 * \return true iff A overlaps B
119 static bool is_overlapping(uint16_t start_a, uint16_t end_a, uint16_t start_b, uint16_t end_b)
122 // B strict subset of A
123 (start_a < start_b && end_a >= end_b)
125 || (start_a >= start_b && start_a < end_b)
127 || (end_a > start_b && end_a < end_b);
131 * \brief Check whether vnode `root' has entries in between [entry ..
133 * \param root the vnode to look at
134 * \param entry first entry of the region to check
135 * \param len length of the region to check
136 * \param only_pages true == do not report previously allocated lower-level
137 * page tables that are empty
138 * \return true iff entries exist in region.
140 #if defined(LIBBARRELFISH_DEBUG_PMAP)
141 #define DEBUG_HAS_VNODE
143 static bool has_vnode(struct vnode *root, uint32_t entry, size_t len,
146 assert(root != NULL);
147 assert(root->is_vnode);
150 uint32_t end_entry = entry + len;
151 #ifdef DEBUG_HAS_VNODE
152 debug_printf("%s: checking region [%"PRIu32"--%"PRIu32"], only_pages = %d\n",
153 __FUNCTION__, entry, end_entry, only_pages);
156 bool found_pages = false;
157 for (n = root->u.vnode.children; n; n = n->next) {
158 // region to check [entry .. end_entry)
160 #ifdef DEBUG_HAS_VNODE
161 debug_printf("%s: looking at vnode: entry = %d, mapped = %"PRIx8"\n",
162 __FUNCTION__, n->entry, n->u.vnode.mapped);
164 // n is page table, we need to check if each mapped 1k L2 overlaps
165 // with the region to check [entry .. end_entry)
166 for (int i = 0; i < L2_PER_PAGE; i++) {
167 // ith 1k L2 is mapped
168 #ifdef DEBUG_HAS_VNODE
169 debug_printf("%s: n->u.vnode.mapped & (1 << %d) == %d\n",
170 __FUNCTION__, i, n->u.vnode.mapped & (1 << i));
172 if (L2_IS_MAPPED(n, i)) {
173 #ifdef DEBUG_HAS_VNODE
174 debug_printf("%s: check overlapping: %"PRIu32"--%"PRIu32
175 " <> %"PRIu32"--%"PRIu32"\n",
176 __FUNCTION__, entry, end_entry,
177 n->entry + i, n->entry + i + 1);
179 if (is_overlapping(entry, end_entry, n->entry + i, n->entry + i + 1)) {
181 uint16_t rec_start = i * PTABLE_SIZE;
182 #ifdef DEBUG_HAS_VNODE
183 debug_printf("%s: checking recursively in %"PRIu16"--%"PRIu16"\n",
184 __FUNCTION__, rec_start, rec_start + PTABLE_SIZE);
186 found_pages = found_pages
187 || has_vnode(n, rec_start, PTABLE_SIZE, true);
199 uint32_t end = n->entry + n->u.frame.pte_count;
200 #ifdef DEBUG_HAS_VNODE
201 debug_printf("%s: looking at region: [%"PRIu32"--%"PRIu32"]\n",
202 __FUNCTION__, n->entry, end);
206 if (is_overlapping(entry, end_entry, n->entry, end)) {
216 * \brief Starting at a given root, return the vnode with entry equal to #entry
217 * \return vnode at index `entry` or NULL
219 #ifdef LIBBARRELFISH_DEBUG_PMAP
220 #define DEBUG_FIND_VNODE
222 static struct vnode *find_vnode(struct vnode *root, uint16_t entry)
224 assert(root != NULL);
225 assert(root->is_vnode);
228 #ifdef DEBUG_FIND_VNODE
229 debug_printf("%s: looking for %"PRIu16"\n", __FUNCTION__, entry);
232 for(n = root->u.vnode.children; n != NULL; n = n->next) {
234 is_overlapping(entry, entry + 1, n->entry, n->entry + L2_PER_PAGE)) {
235 #ifdef DEBUG_FIND_VNODE
236 debug_printf("%s: found ptable at [%"PRIu16"--%"PRIu16"]\n",
237 __FUNCTION__, n->entry, n->entry + L2_PER_PAGE);
241 else if (n->is_vnode) {
242 assert(!is_overlapping(entry, entry + 1, n->entry, n->entry + L2_PER_PAGE));
243 // ignore all other vnodes;
248 assert(!n->is_vnode);
249 uint16_t end = n->entry + n->u.frame.pte_count;
250 #ifdef DEBUG_FIND_VNODE
251 debug_printf("%s: looking at section [%"PRIu16"--%"PRIu16"]\n", __FUNCTION__, n->entry, end);
253 if (n->entry <= entry && entry < end) {
254 #ifdef DEBUG_FIND_VNODE
255 debug_printf("%d \\in [%d, %d]\n", entry, n->entry, end);
264 * \brief check whether region [entry, entry+npages) is contained in a child
267 static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
269 assert(root != NULL);
270 assert(root->is_vnode);
274 for (n = root->u.vnode.children; n; n = n->next) {
276 uint16_t end = n->entry + n->u.frame.pte_count;
277 if (n->entry <= entry && entry + npages <= end) {
287 * \brief remove vnode `item` from linked list of children of `root`
289 static void remove_vnode(struct vnode *root, struct vnode *item)
291 assert(root->is_vnode);
292 struct vnode *walk = root->u.vnode.children;
293 struct vnode *prev = NULL;
297 prev->next = walk->next;
300 root->u.vnode.children = walk->next;
307 USER_PANIC("Should not get here");
310 static void unmap_l2_table(struct vnode *root, struct vnode *n, uint16_t e)
313 uint32_t entry = ROUND_DOWN(n->entry, L2_PER_PAGE) + e;
314 if (L2_IS_MAPPED(n, e)) {
315 err = vnode_unmap(root->u.vnode.cap[0], n->u.vnode.cap[e],
317 if (err_is_fail(err)) {
318 debug_printf("remove_empty_vnodes: vnode_unmap: %s\n",
323 // delete capability, if not entry 0
325 err = cap_destroy(n->u.vnode.cap[e]);
326 if (err_is_fail(err)) {
327 debug_printf("remove_empty_vnodes: cap_destroy: %s\n",
336 * \brief (recursively) remove empty page tables in region [entry ..
337 * entry+len) in vnode `root`.
339 #ifdef LIBBARRELFISH_DEBUG_PMAP
340 #define DEBUG_REMOVE_EMPTY_VNODES
342 static void remove_empty_vnodes(struct slab_allocator *vnode_alloc, struct vnode *root,
343 uint32_t entry, size_t len)
345 // precondition: root does not have pages in [entry, entry+len)
346 assert(!has_vnode(root, entry, len, true));
349 uint32_t end_entry = entry + len;
350 for (struct vnode *n = root->u.vnode.children; n; n = n->next) {
351 // sanity check and skip leaf entries
355 // here we know that all vnodes we're interested in are
359 if (entry < n->entry && end_entry >= n->entry + L2_PER_PAGE) {
360 // whole entry in region: completely unmap&free L2 page
361 for (int i = 0; i < L2_PER_PAGE; i++) {
362 unmap_l2_table(root, n, i);
365 // delete last copy of pt cap
366 err = cap_destroy(n->u.vnode.cap[0]);
367 assert(err_is_ok(err));
369 // remove vnode from list
370 remove_vnode(root, n);
371 slab_free(vnode_alloc, n);
372 } else if (entry >= n->entry && entry < n->entry + L2_PER_PAGE) {
373 // tail end of vnode in region:
374 uint16_t e = entry - n->entry;
375 #ifdef DEBUG_REMOVE_EMPTY_VNODES
376 debug_printf("overlap: %"PRIu16" entries\n", e);
378 for (; e < L2_PER_PAGE; e++) {
379 unmap_l2_table(root, n, e);
381 } else if (end_entry > n->entry && end_entry < n->entry + L2_PER_PAGE) {
382 // start of vnode in region
383 uint16_t e = end_entry - n->entry;
384 #ifdef DEBUG_REMOVE_EMPTY_VNODES
385 debug_printf("overlap: %"PRIu16" entries\n", e);
387 for (int i = 0; i < e; i++) {
388 unmap_l2_table(root, n, i);
395 * \brief Allocates a new VNode, adding it to the page table and our metadata
397 static errval_t alloc_vnode(struct pmap_arm *pmap_arm, struct vnode *root,
398 enum objtype type, uint32_t entry,
399 struct vnode **retvnode)
401 assert(root->is_vnode);
404 struct vnode *newvnode = slab_alloc(&pmap_arm->slab);
405 if (newvnode == NULL) {
406 return LIB_ERR_SLAB_ALLOC_FAIL;
408 newvnode->is_vnode = true;
410 // The VNode capability
411 err = slot_alloc(&newvnode->u.vnode.cap[0]);
412 if (err_is_fail(err)) {
413 return err_push(err, LIB_ERR_SLOT_ALLOC);
416 err = vnode_create(newvnode->u.vnode.cap[0], type);
417 if (err_is_fail(err)) {
418 return err_push(err, LIB_ERR_VNODE_CREATE);
420 for (int i = 1; i < L2_PER_PAGE; i++) {
421 newvnode->u.vnode.cap[i] = NULL_CAP;
424 // The VNode meta data
425 newvnode->entry = ROUND_DOWN(entry, L2_PER_PAGE);
426 assert(newvnode->entry % L2_PER_PAGE == 0);
427 newvnode->next = root->u.vnode.children;
428 newvnode->u.vnode.mapped = 0x0; // no entries mapped
429 root->u.vnode.children = newvnode;
430 newvnode->u.vnode.children = NULL;
433 *retvnode = newvnode;
439 * \brief Returns the vnode for the pagetable mapping a given vspace address
441 #ifdef LIBBARRELFISH_DEBUG_PMAP
442 #define DEBUG_GET_PTABLE
444 static errval_t get_ptable(struct pmap_arm *pmap,
446 struct vnode **ptable)
448 // NB Strictly there are 12 bits in the ARM L1, but allocations unit
449 // of L2 is 1 page of L2 entries (4 tables) so we use 10 bits for the L1
451 uintptr_t idx = ARM_L1_OFFSET(vaddr);
452 uintptr_t page_idx = L2_PAGE_IDX(idx);
453 if ((*ptable = find_vnode(&pmap->root, idx)) == NULL)
455 // L1 table entries point to L2 tables so allocate an L2
456 // table for this L1 entry.
458 struct vnode *tmp = NULL; // Tmp variable for passing to alloc_vnode
460 #ifdef DEBUG_GET_PTABLE
461 uintptr_t fidx = ROUND_DOWN(idx, L2_PER_PAGE);
462 debug_printf("allocating 4 x L2, entries = %"PRIuPTR"--%z"PRIuPTR"\n",
465 errval_t err = alloc_vnode(pmap, &pmap->root, ObjType_VNode_ARM_l2,
467 if (err_is_fail(err)) {
468 DEBUG_ERR(err, "alloc_vnode");
472 *ptable = tmp; // Set argument to received value
474 if (err_is_fail(err)) {
475 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
479 struct vnode *pt = *ptable;
481 debug_printf("found section @%d, trying to get ptable for %d\n",
484 assert(pt->is_vnode);
485 #ifdef DEBUG_GET_PTABLE
486 debug_printf("have ptable: %p\n", pt);
487 debug_printf("mapped = %x\n", pt->u.vnode.mapped);
488 debug_printf("page_idx = %d\n", page_idx);
489 debug_printf("l2_is_mapped: %d\n", L2_IS_MAPPED(pt, page_idx));
491 if (!L2_IS_MAPPED(pt, page_idx)) {
492 #ifdef DEBUG_GET_PTABLE
493 debug_printf("need to map entry %d\n", page_idx);
496 uintptr_t offset = L2_PAGE_OFFSET(idx);
497 #ifdef DEBUG_GET_PTABLE
498 debug_printf("mapping L1 entry %d at offset %"PRIuPTR"\n", idx, offset);
501 // create copy of ptable cap for this index, if it doesn't exist
503 if (capref_is_null(pt->u.vnode.cap[page_idx])) {
504 #ifdef DEBUG_GET_PTABLE
505 debug_printf("allocating slot for chunk %d\n", page_idx);
507 err = slot_alloc(&pt->u.vnode.cap[page_idx]);
508 if (err_is_fail(err)) {
509 return err_push(err, LIB_ERR_VNODE_MAP);
512 #ifdef DEBUG_GET_PTABLE
513 debug_printf("creating copy for chunk %d\n", page_idx);
515 err = cap_copy(pt->u.vnode.cap[page_idx], pt->u.vnode.cap[0]);
516 if (err_is_fail(err)) {
517 return err_push(err, LIB_ERR_VNODE_MAP);
521 #ifdef DEBUG_GET_PTABLE
522 debug_printf("calling vnode_map() for chunk %d\n", page_idx);
524 // map single 1k ptable
525 err = vnode_map(pmap->root.u.vnode.cap[0], pt->u.vnode.cap[page_idx], idx,
526 KPI_PAGING_FLAGS_READ | KPI_PAGING_FLAGS_WRITE, offset, 1);
528 if (err_is_fail(err)) {
529 return err_push(err, LIB_ERR_VNODE_MAP);
532 // set 1k ptable as mapped
533 pt->u.vnode.mapped |= 1 << page_idx;
539 static struct vnode *find_ptable(struct pmap_arm *pmap,
542 // NB Strictly there are 12 bits in the ARM L1, but allocations unit
543 // of L2 is 1 page of L2 entries (4 tables) so
544 uintptr_t idx = ARM_L1_OFFSET(vaddr);
545 return find_vnode(&pmap->root, idx);
548 static errval_t do_single_map(struct pmap_arm *pmap, genvaddr_t vaddr, genvaddr_t vend,
549 struct capref frame, size_t offset, size_t pte_count,
550 vregion_flags_t flags)
552 errval_t err = SYS_ERR_OK;
553 // Get the page table
554 struct vnode *ptable;
556 bool is_large = false;
558 struct frame_identity fi;
559 err = invoke_frame_identify(frame, &fi);
560 if (err_is_fail(err)) {
561 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
564 if (flags & VREGION_FLAGS_LARGE &&
565 (vaddr & LARGE_PAGE_MASK) == 0 &&
566 fi.bits >= LARGE_PAGE_BITS &&
567 (fi.base & LARGE_PAGE_MASK) == 0) {
568 //section mapping (1MB)
569 //mapped in the L1 table at root
571 ptable = &pmap->root;
572 entry = ARM_L1_OFFSET(vaddr);
574 #ifdef LIBBARRELFISH_DEBUG_PMAP
575 printf("do_single_map: large path: entry=%zu\n", entry);
578 #ifdef LIBBARRELFISH_DEBUG_PMAP
579 debug_printf("%s: 4k path: mapping %"PRIxGENVADDR"\n", __FUNCTION__, vaddr);
580 debug_printf("4k path: L1 entry: %zu\n", ARM_USER_L1_OFFSET(vaddr));
583 // XXX: reassess the following note -SG
584 // NOTE: strictly speaking a l2 entry only has 8 bits, while a l1 entry
585 // has 12 bits, but due to the way Barrelfish allocates l1 and l2 tables,
586 // we use 10 bits for the entry here and in the map syscall
587 err = get_ptable(pmap, vaddr, &ptable);
588 if (err_is_fail(err)) {
589 DEBUG_ERR(err, "get_ptable() in do_single_map");
590 return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
592 entry = ARM_USER_L2_OFFSET(vaddr);
593 #ifdef LIBBARRELFISH_DEBUG_PMAP
594 debug_printf("%s: 4k path: L2 entry=%zu\n", __FUNCTION__, entry);
595 debug_printf("%s: ptable->is_vnode = %d\n",
596 __FUNCTION__, ptable->is_vnode);
601 flags &= ~(VREGION_FLAGS_LARGE | VREGION_FLAGS_HUGE);
602 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
604 uintptr_t user_pte_count = pte_count;
606 user_pte_count = DIVIDE_ROUND_UP(pte_count, L2_PER_PAGE);
609 // check if there is an overlapping mapping
610 if (has_vnode(ptable, entry, pte_count, false)) {
611 #ifdef LIBBARRELFISH_DEBUG_PMAP
612 debug_printf("has_vnode, only_pages=false returned true\n");
614 if (has_vnode(ptable, entry, pte_count, true)) {
615 printf("page already exists in 0x%"
616 PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
617 return LIB_ERR_PMAP_EXISTING_MAPPING;
619 #ifdef LIBBARRELFISH_DEBUG_PMAP
620 debug_printf("has_vnode, only_pages=true returned false, cleaning up empty ptables\n");
622 // clean out empty page tables. We do this here because we benefit
623 // from having the page tables in place when doing lots of small
625 // XXX: TODO: fix this + mapping of L2 to work on single 1k
627 remove_empty_vnodes(&pmap->slab, ptable, entry, pte_count);
631 // Create user level datastructure for the mapping
632 struct vnode *page = slab_alloc(&pmap->slab);
634 page->is_vnode = false;
636 page->next = ptable->u.vnode.children;
637 ptable->u.vnode.children = page;
638 page->u.frame.cap = frame;
639 page->u.frame.flags = flags;
640 page->u.frame.pte_count = user_pte_count;
641 page->u.frame.kernel_pte_count = pte_count;
643 // Map entry into the page table
644 err = vnode_map(ptable->u.vnode.cap[0], frame, entry,
645 pmap_flags, offset, pte_count);
646 if (err_is_fail(err)) {
647 return err_push(err, LIB_ERR_VNODE_MAP);
652 static errval_t do_map(struct pmap_arm *pmap, genvaddr_t vaddr,
653 struct capref frame, size_t offset, size_t size,
654 vregion_flags_t flags, size_t *retoff, size_t *retsize)
660 // get base address and size of frame
661 struct frame_identity fi;
662 err = invoke_frame_identify(frame, &fi);
663 if (err_is_fail(err)) {
664 return err_push(err, LIB_ERR_PMAP_DO_MAP);
667 // determine mapping specific parts
668 if (flags & VREGION_FLAGS_LARGE &&
669 (vaddr & LARGE_PAGE_MASK) == 0 &&
670 fi.bits >= LARGE_PAGE_BITS &&
671 (fi.base & LARGE_PAGE_MASK) == 0) {
672 //section mapping (1MB)
673 page_size = LARGE_PAGE_SIZE;
674 offset_level = ARM_L1_OFFSET(vaddr);
675 #ifdef LIBBARRELFISH_DEBUG_PMAP
676 printf("do_map: large path\n");
677 printf("page_size: %zx, size: %zx\n", page_size, size);
681 page_size = BASE_PAGE_SIZE;
682 offset_level = ARM_L2_OFFSET(vaddr);
685 size = ROUND_UP(size, page_size);
686 size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
687 if (flags & VREGION_FLAGS_LARGE) {
688 #ifdef LIBBARRELFISH_DEBUG_PMAP
689 printf("#pages: 0x%zu\n", pte_count);
692 genvaddr_t vend = vaddr + size;
694 if ((1UL << fi.bits) < size) {
695 return LIB_ERR_PMAP_FRAME_SIZE;
698 //should be trivially true for section mappings
699 if ((ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend)) ||
700 flags & VREGION_FLAGS_LARGE) {
702 err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
703 if (err_is_fail(err)) {
704 DEBUG_ERR(err, "[do_map] in fast path");
705 return err_push(err, LIB_ERR_PMAP_DO_MAP);
707 } else { // multiple leaf page tables
709 uint32_t c = ARM_L2_MAX_ENTRIES - offset_level;
710 genvaddr_t temp_end = vaddr + c * page_size;
711 err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
712 if (err_is_fail(err)) {
713 return err_push(err, LIB_ERR_PMAP_DO_MAP);
717 while (ARM_L1_OFFSET(temp_end) < ARM_L1_OFFSET(vend)) { // update vars
719 temp_end = vaddr + ARM_L2_MAX_ENTRIES * page_size;
720 offset += c * page_size;
721 c = ARM_L2_MAX_ENTRIES;
724 err = slot_alloc(&next);
725 if (err_is_fail(err)) {
726 return err_push(err, LIB_ERR_PMAP_DO_MAP);
728 err = cap_copy(next, frame);
729 if (err_is_fail(err)) {
730 return err_push(err, LIB_ERR_PMAP_DO_MAP);
735 err = do_single_map(pmap, vaddr, temp_end, frame, offset, ARM_L2_MAX_ENTRIES, flags);
736 if (err_is_fail(err)) {
737 return err_push(err, LIB_ERR_PMAP_DO_MAP);
741 // map remaining part
742 offset += c * page_size;
743 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(temp_end);
747 err = slot_alloc(&next);
748 if (err_is_fail(err)) {
749 return err_push(err, LIB_ERR_PMAP_DO_MAP);
751 err = cap_copy(next, frame);
752 if (err_is_fail(err)) {
753 return err_push(err, LIB_ERR_PMAP_DO_MAP);
757 err = do_single_map(pmap, temp_end, vend, next, offset, c, flags);
758 if (err_is_fail(err)) {
759 return err_push(err, LIB_ERR_PMAP_DO_MAP);
769 //has_vnode_debug = false;
773 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
775 for (size_t i = offset; i < offset + size; i += BASE_PAGE_SIZE) {
777 vaddr += BASE_PAGE_SIZE;
791 max_slabs_required(size_t bytes)
793 // Perform a slab allocation for every page (do_map -> slab_alloc)
794 size_t pages = DIVIDE_ROUND_UP(bytes, BASE_PAGE_SIZE);
795 // Perform a slab allocation for every L2 (get_ptable -> find_vnode)
796 size_t l2entries = DIVIDE_ROUND_UP(pages, 256 * 4);
797 // Perform a slab allocation for every L1 (do_map -> find_vnode)
798 size_t l1entries = DIVIDE_ROUND_UP(l2entries, 1024);
799 return pages + l2entries + l1entries;
801 static size_t max_slabs_required_large(size_t bytes)
803 // always need only one slab, as we can represent any size section mapping
804 // in a single struct vnode.
809 * \brief Refill slabs used for metadata
811 * \param pmap The pmap to refill in
812 * \param request The number of slabs the allocator must have
813 * when the function returns
815 * When the current pmap is initialized,
816 * it reserves some virtual address space for metadata.
817 * This reserved address space is used here
819 * Can only be called for the current pmap
820 * Will recursively call into itself till it has enough slabs
823 static errval_t refill_slabs(struct pmap_arm *pmap, size_t request)
827 /* Keep looping till we have #request slabs */
828 while (slab_freecount(&pmap->slab) < request) {
829 // Amount of bytes required for #request
830 size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab),
831 sizeof(struct vnode));
833 /* Get a frame of that size */
835 err = frame_alloc(&cap, bytes, &bytes);
836 if (err_is_fail(err)) {
837 return err_push(err, LIB_ERR_FRAME_ALLOC);
840 /* If we do not have enough slabs to map the frame in, recurse */
841 size_t required_slabs_for_frame = max_slabs_required(bytes);
842 if (slab_freecount(&pmap->slab) < required_slabs_for_frame) {
843 // If we recurse, we require more slabs than to map a single page
844 assert(required_slabs_for_frame > 4);
846 err = refill_slabs(pmap, required_slabs_for_frame);
847 if (err_is_fail(err)) {
848 return err_push(err, LIB_ERR_SLAB_REFILL);
852 /* Perform mapping */
853 genvaddr_t genvaddr = pmap->vregion_offset;
854 pmap->vregion_offset += (genvaddr_t)bytes;
856 // if this assert fires, increase META_DATA_RESERVED_SPACE
857 assert(pmap->vregion_offset < (vregion_get_base_addr(&pmap->vregion) +
858 vregion_get_size(&pmap->vregion)));
860 err = do_map(pmap, genvaddr, cap, 0, bytes,
861 VREGION_FLAGS_READ_WRITE, NULL, NULL);
862 if (err_is_fail(err)) {
863 return err_push(err, LIB_ERR_PMAP_DO_MAP);
867 lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
868 slab_grow(&pmap->slab, (void*)buf, bytes);
875 * \brief Create page mappings
877 * \param pmap The pmap object
878 * \param vaddr The virtual address to create the mapping for
879 * \param frame The frame cap to map in
880 * \param offset Offset into the frame cap
881 * \param size Size of the mapping
882 * \param flags Flags for the mapping
883 * \param retoff If non-NULL, filled in with adjusted offset of mapped region
884 * \param retsize If non-NULL, filled in with adjusted size of mapped region
887 map(struct pmap *pmap,
892 vregion_flags_t flags,
896 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
901 size_t slabs_required;
903 struct frame_identity fi;
904 err = invoke_frame_identify(frame, &fi);
905 if (err_is_fail(err)) {
906 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
909 // adjust the mapping to be on page boundaries
910 if (flags & VREGION_FLAGS_LARGE &&
911 (vaddr & LARGE_PAGE_MASK) == 0 &&
912 fi.bits >= LARGE_PAGE_BITS &&
913 (fi.base & LARGE_PAGE_MASK) == 0) {
914 //section mapping (1MB)
915 base = LARGE_PAGE_OFFSET(offset);
916 page_size = LARGE_PAGE_SIZE;
917 slabs_required = max_slabs_required_large(size);
918 #ifdef LIBBARRELFISH_DEBUG_PMAP
919 size_t frame_sz = 1ULL<<fi.bits;
920 printf("map: large path, page_size: %i, base: %i, slabs: %i, size: %i,"
921 "frame size: %zu\n", page_size, base, slabs_required, size, frame_sz);
925 base = BASE_PAGE_OFFSET(offset);
926 page_size = BASE_PAGE_SIZE;
927 slabs_required = max_slabs_required(size);
930 size = ROUND_UP(size, page_size);
933 const size_t slabs_reserve = 3; // == max_slabs_required(1)
934 uint64_t slabs_free = slab_freecount(&pmap_arm->slab);
936 slabs_required += slabs_reserve;
938 if (slabs_required > slabs_free) {
939 if (get_current_pmap() == pmap) {
940 err = refill_slabs(pmap_arm, slabs_required);
941 if (err_is_fail(err)) {
942 return err_push(err, LIB_ERR_SLAB_REFILL);
946 size_t bytes = SLAB_STATIC_SIZE(slabs_required - slabs_free,
947 sizeof(struct vnode));
948 void *buf = malloc(bytes);
950 return LIB_ERR_MALLOC_FAIL;
952 slab_grow(&pmap_arm->slab, buf, bytes);
956 return do_map(pmap_arm, vaddr, frame, offset, size, flags,
960 static errval_t do_single_unmap(struct pmap_arm *pmap, genvaddr_t vaddr,
961 size_t pte_count, bool delete_cap)
964 struct vnode *pt = find_ptable(pmap, vaddr);
965 // pt->is_vnode == non-large mapping
966 if (pt && pt->is_vnode) {
967 // analog to do_single_map we use 10 bits for tracking pages in user space -SG
968 struct vnode *page = find_vnode(pt, ARM_USER_L2_OFFSET(vaddr));
969 if (page && page->u.frame.pte_count == pte_count) {
970 err = vnode_unmap(pt->u.vnode.cap[0], page->u.frame.cap,
971 page->entry, page->u.frame.pte_count);
972 if (err_is_fail(err)) {
973 DEBUG_ERR(err, "vnode_unmap");
974 return err_push(err, LIB_ERR_VNODE_UNMAP);
977 // Free up the resources
979 err = cap_destroy(page->u.frame.cap);
980 if (err_is_fail(err)) {
981 return err_push(err, LIB_ERR_PMAP_DO_SINGLE_UNMAP);
984 remove_vnode(pt, page);
985 slab_free(&pmap->slab, page);
988 return LIB_ERR_PMAP_FIND_VNODE;
991 #ifdef LIBBARRELFISH_DEBUG_PMAP
992 debug_printf("section unmap: entry = %zu, pte_count = %zu\n",
993 pt->entry, pt->u.frame.kernel_pte_count);
995 err = vnode_unmap(pmap->root.u.vnode.cap[0], pt->u.frame.cap,
996 pt->entry, pt->u.frame.kernel_pte_count);
997 if (err_is_fail(err)) {
998 DEBUG_ERR(err, "vnode_unmap");
999 return err_push(err, LIB_ERR_VNODE_UNMAP);
1002 remove_vnode(&pmap->root, pt);
1003 slab_free(&pmap->slab, pt);
1005 return LIB_ERR_PMAP_FIND_VNODE;
1012 * \brief Remove page mappings
1014 * \param pmap The pmap object
1015 * \param vaddr The start of the virtual addres to remove
1016 * \param size The size of virtual address to remove
1017 * \param retsize If non-NULL, filled in with the actual size removed
1020 unmap(struct pmap *pmap,
1025 errval_t err, ret = SYS_ERR_OK;
1026 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
1027 size = ROUND_UP(size, BASE_PAGE_SIZE);
1028 size_t pte_count = size / BASE_PAGE_SIZE;
1029 genvaddr_t vend = vaddr + size;
1031 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
1033 err = do_single_unmap(pmap_arm, vaddr, pte_count, false);
1034 if (err_is_fail(err)) {
1035 return err_push(err, LIB_ERR_PMAP_UNMAP);
1037 } else { // slow path
1039 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
1040 err = do_single_unmap(pmap_arm, vaddr, c, false);
1041 if (err_is_fail(err)) {
1042 return err_push(err, LIB_ERR_PMAP_UNMAP);
1045 // unmap full leaves
1046 vaddr += c * BASE_PAGE_SIZE;
1047 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
1048 c = ARM_L2_MAX_ENTRIES;
1049 err = do_single_unmap(pmap_arm, vaddr, c, true);
1050 if (err_is_fail(err)) {
1051 return err_push(err, LIB_ERR_PMAP_UNMAP);
1053 vaddr += c * BASE_PAGE_SIZE;
1056 // unmap remaining part
1057 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
1059 err = do_single_unmap(pmap_arm, vaddr, c, true);
1060 if (err_is_fail(err)) {
1061 return err_push(err, LIB_ERR_PMAP_UNMAP);
1074 * \brief Determine a suitable address for a given memory object
1076 * \param pmap The pmap object
1077 * \param memobj The memory object to determine the address for
1078 * \param alignment Minimum alignment
1079 * \param vaddr Pointer to return the determined address
1081 * Relies on vspace.c code maintaining an ordered list of vregions
1084 determine_addr(struct pmap *pmap,
1085 struct memobj *memobj,
1089 assert(pmap->vspace->head);
1091 if (alignment == 0) {
1092 alignment = BASE_PAGE_SIZE;
1094 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1096 size_t size = ROUND_UP(memobj->size, alignment);
1098 struct vregion *walk = pmap->vspace->head;
1099 while (walk->next) { // Try to insert between existing mappings
1100 genvaddr_t walk_base = vregion_get_base_addr(walk);
1101 genvaddr_t walk_size = ROUND_UP(vregion_get_size(walk), BASE_PAGE_SIZE);
1102 genvaddr_t walk_end = ROUND_UP(walk_base + walk_size, alignment);
1103 genvaddr_t next_base = vregion_get_base_addr(walk->next);
1105 if (next_base > walk_end + size &&
1106 walk_base + walk_size > VSPACE_BEGIN) { // Ensure mappings are larger than VSPACE_BEGIN
1113 *vaddr = ROUND_UP((vregion_get_base_addr(walk)
1114 + ROUND_UP(vregion_get_size(walk), alignment)),
1119 /** \brief Retrieves an address that can currently be used for large mappings
1122 static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
1123 size_t alignment, genvaddr_t *retvaddr)
1125 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
1127 struct vnode *walk_pdir = pmap_arm->root.u.vnode.children;
1128 assert(walk_pdir != NULL); // assume there's always at least one existing entry
1130 if (alignment == 0) {
1131 alignment = BASE_PAGE_SIZE;
1133 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1135 size = ROUND_UP(size, alignment);
1137 size_t free_count = DIVIDE_ROUND_UP(size, LARGE_PAGE_SIZE);
1138 //debug_printf("need %zu contiguous free pdirs\n", free_count);
1140 // compile pdir free list
1141 // barrelfish treats L1 as 1024 entries
1143 for (int i = 0; i < 1024; i++) {
1146 f[walk_pdir->entry] = false;
1148 assert(walk_pdir->is_vnode);
1149 f[walk_pdir->entry] = false;
1150 walk_pdir = walk_pdir->next;
1152 genvaddr_t first_free = 384;
1153 for (; first_free < 512; first_free++) {
1154 if (f[first_free]) {
1155 for (int i = 1; i < free_count; i++) {
1156 if (!f[first_free + i]) {
1158 first_free = first_free+i;
1165 assert(1 == 1);// make compiler shut up about label
1167 //printf("first free: %li\n", (uint32_t)first_free);
1168 if (first_free + free_count <= 512) {
1169 *retvaddr = first_free << 22;
1172 return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
1178 static errval_t do_single_modify_flags(struct pmap_arm *pmap, genvaddr_t vaddr,
1179 size_t pages, vregion_flags_t flags)
1181 errval_t err = SYS_ERR_OK;
1182 struct vnode *ptable = find_ptable(pmap, vaddr);
1183 uint16_t ptentry = ARM_USER_L2_OFFSET(vaddr);
1185 struct vnode *page = find_vnode(ptable, ptentry);
1187 if (inside_region(ptable, ptentry, pages)) {
1188 // we're modifying part of a valid mapped region
1189 // arguments to invocation: invoke frame cap, first affected
1190 // page (as offset from first page in mapping), #affected
1191 // pages, new flags. Invocation should check compatibility of
1192 // new set of flags with cap permissions.
1193 size_t off = ptentry - page->entry;
1194 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
1195 err = invoke_frame_modify_flags(page->u.frame.cap, off, pages, pmap_flags);
1196 printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n",
1197 err_getstring(err), err);
1200 // overlaps some region border
1201 return LIB_ERR_PMAP_EXISTING_MAPPING;
1209 * \brief Modify page mapping
1211 * \param pmap The pmap object
1212 * \param vaddr The virtual address to unmap
1213 * \param flags New flags for the mapping
1214 * \param retsize If non-NULL, filled in with the actual size modified
1217 modify_flags(struct pmap *pmap,
1220 vregion_flags_t flags,
1223 errval_t err, ret = SYS_ERR_OK;
1224 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
1225 size = ROUND_UP(size, BASE_PAGE_SIZE);
1226 size_t pte_count = size / BASE_PAGE_SIZE;
1227 genvaddr_t vend = vaddr + size;
1229 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
1231 err = do_single_modify_flags(pmap_arm, vaddr, pte_count, false);
1232 if (err_is_fail(err)) {
1233 return err_push(err, LIB_ERR_PMAP_UNMAP);
1238 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
1239 err = do_single_modify_flags(pmap_arm, vaddr, c, false);
1240 if (err_is_fail(err)) {
1241 return err_push(err, LIB_ERR_PMAP_UNMAP);
1244 // unmap full leaves
1245 vaddr += c * BASE_PAGE_SIZE;
1246 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
1247 c = ARM_L2_MAX_ENTRIES;
1248 err = do_single_modify_flags(pmap_arm, vaddr, c, true);
1249 if (err_is_fail(err)) {
1250 return err_push(err, LIB_ERR_PMAP_UNMAP);
1252 vaddr += c * BASE_PAGE_SIZE;
1255 // unmap remaining part
1256 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
1258 err = do_single_modify_flags(pmap_arm, vaddr, c, true);
1259 if (err_is_fail(err)) {
1260 return err_push(err, LIB_ERR_PMAP_UNMAP);
1273 * \brief Query existing page mapping
1275 * \param pmap The pmap object
1276 * \param vaddr The virtual address to query
1277 * \param retvaddr Returns the base virtual address of the mapping
1278 * \param retsize Returns the actual size of the mapping
1279 * \param retcap Returns the cap mapped at this address
1280 * \param retoffset Returns the offset within the cap that is mapped
1281 * \param retflags Returns the flags for this mapping
1283 * All of the ret parameters are optional.
1285 static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
1286 genvaddr_t *retvaddr, size_t *retsize,
1287 struct capref *retcap, genvaddr_t *retoffset,
1288 vregion_flags_t *retflags)
1296 serialise(struct pmap *pmap, void *buf, size_t buflen)
1298 // Unimplemented: ignored
1303 deserialise(struct pmap *pmap, void *buf, size_t buflen)
1305 // Unimplemented: we start with an empty pmap, and avoid the bottom of the A/S
1309 static struct pmap_funcs pmap_funcs = {
1310 .determine_addr = determine_addr,
1311 .determine_addr_raw = determine_addr_raw,
1314 .modify_flags = modify_flags,
1316 .serialise = serialise,
1317 .deserialise = deserialise,
1321 * \brief Initialize the pmap object
1324 pmap_init(struct pmap *pmap,
1325 struct vspace *vspace,
1326 struct capref vnode,
1327 struct slot_allocator *opt_slot_alloc)
1329 struct pmap_arm* pmap_arm = (struct pmap_arm*)pmap;
1331 /* Generic portion */
1332 pmap->f = pmap_funcs;
1333 pmap->vspace = vspace;
1335 // Slab allocator for vnodes
1336 slab_init(&pmap_arm->slab, sizeof(struct vnode), NULL);
1337 slab_grow(&pmap_arm->slab,
1338 pmap_arm->slab_buffer,
1339 sizeof(pmap_arm->slab_buffer));
1341 pmap_arm->root.is_vnode = true;
1342 pmap_arm->root.u.vnode.cap[0] = vnode;
1343 pmap_arm->root.next = NULL;
1344 pmap_arm->root.u.vnode.children = NULL;
1349 errval_t pmap_current_init(bool init_domain)
1351 struct pmap_arm *pmap_arm = (struct pmap_arm*)get_current_pmap();
1353 // To reserve a block of virtual address space,
1354 // a vregion representing the address space is required.
1355 // We construct a superficial one here and add it to the vregion list.
1356 struct vregion *vregion = &pmap_arm->vregion;
1357 assert((void*)vregion > (void*)pmap_arm);
1358 assert((void*)vregion < (void*)(pmap_arm + 1));
1359 vregion->vspace = NULL;
1360 vregion->memobj = NULL;
1361 vregion->base = VSPACE_BEGIN;
1362 vregion->offset = 0;
1363 vregion->size = META_DATA_RESERVED_SPACE;
1365 vregion->next = NULL;
1367 struct vspace *vspace = pmap_arm->p.vspace;
1368 assert(!vspace->head);
1369 vspace->head = vregion;
1371 pmap_arm->vregion_offset = pmap_arm->vregion.base;