3 * \brief pmap management
7 * Copyright (c) 2010-2015 ETH Zurich.
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
16 * There was some minor difficulty here with mapping the cpus native
17 * page table arrangement onto Barrelfish. The problem lies with
18 * resource bootstrapping. The bootstrap ram allocator allocates pages.
21 * The natural division of bits is 12/10/12, corresponding to 4K
22 * L1 entries in the L1 table and 256 L2 entries per L2
23 * table. Unfortunately 256 entries consumes 1KB rather than a
24 * page (4KB) so we pretend here and in the kernel caps page
25 * code that the L1 has 1024 entries and L2 tables are 4KB in
26 * size. The 4KB constraint comes from ram_alloc_fixed
27 * allocating single pages and the difficulty in bootstrapping
28 * cap slots (alloc_node takes a single slot.
30 * For now this suffices, but might need to be revisited in future.
32 * An earlier cut at this, used the first 1KB from each
33 * allocation made from ram_alloc_fixed and wasted the remaining
34 * space. Aside from the space wasted it entailed a couple of minor
35 * platform ifdefs to work around the discrepency.
37 * Alternative fixes discussed include:
39 * 1. avoid the need to create vnodes before connecting to a
40 * real allocator (probably not plausible).
42 * 2. somehow make ram_alloc_fixed handle sub-page allocations
43 * (it's clunky, but perhaps we can give each domain a separate
44 * cnode full of 1k- sized RAM caps?)
46 * 3. handle the problem at the level of vnode_create (can't see how to
49 * 4. waste the space -- doing this cleanly will require a new parameter
50 * to retype to prevent all 4 caps being created
52 * 5. introduce a new arm-specific version of vnode_create that creates
53 * 4 1k vnodes, and is only called from the ARM VM code.
57 #include <barrelfish/barrelfish.h>
58 #include <barrelfish/caddr.h>
59 #include <barrelfish/invocations_arch.h>
62 // Location of VSpace managed by this system.
63 #ifdef __ARM_ARCH_7M__
64 //virtual section 0x40000000-0x40100000 can not be used as regular memory
65 //because of "bit-banding".
66 //0x42000000-0x44000000 is also dangerous, so we start after that
67 //XXX: there are more virtual regions we
68 //are not allowed to use -> find out where to reserve those
69 #define VSPACE_BEGIN ((lvaddr_t)(1UL*1024*1024*1024 + 64UL*1024*1024)) //0x44000000
70 #else //"normal" arm architectures
71 #define VSPACE_BEGIN ((lvaddr_t)1UL*1024*1024*1024) //0x40000000
75 // Amount of virtual address space reserved for mapping frames
76 // backing refill_slabs.
77 //#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 128) // 64
78 #define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 256)
79 // increased above value from 128 for pandaboard port
81 // Convenience macros to figure out user space page table indices
82 // we use 10 bits for both L1 and L2 tables in user space, even though
83 // in hardware we use 12 bits for L1 and 8 bits for L2.
84 #define ARM_USER_L1_OFFSET(addr) ((uintptr_t)(addr >> 22) & 0x3ffu)
85 #define ARM_USER_L2_OFFSET(addr) ((uintptr_t)(addr >> 12) & 0x3ffu)
87 static inline uintptr_t
88 vregion_flags_to_kpi_paging_flags(vregion_flags_t flags)
90 STATIC_ASSERT(0x1ff == VREGION_FLAGS_MASK, "");
91 STATIC_ASSERT(0x0f == KPI_PAGING_FLAGS_MASK, "");
92 STATIC_ASSERT(VREGION_FLAGS_READ == KPI_PAGING_FLAGS_READ, "");
93 STATIC_ASSERT(VREGION_FLAGS_WRITE == KPI_PAGING_FLAGS_WRITE, "");
94 STATIC_ASSERT(VREGION_FLAGS_EXECUTE == KPI_PAGING_FLAGS_EXECUTE, "");
95 STATIC_ASSERT(VREGION_FLAGS_NOCACHE == KPI_PAGING_FLAGS_NOCACHE, "");
96 if ((flags & VREGION_FLAGS_MPB) != 0) {
97 // XXX: ignore MPB flag on ARM, otherwise the assert below fires -AB
98 flags &= ~VREGION_FLAGS_MPB;
100 if ((flags & VREGION_FLAGS_WRITE_COMBINING) != 0) {
101 // XXX mask out write-combining flag on ARM
102 flags &= ~VREGION_FLAGS_WRITE_COMBINING;
104 if ((flags & VREGION_FLAGS_VTD_SNOOP) != 0) {
105 // XXX mask out vtd-snooping flag on ARM
106 flags &= ~VREGION_FLAGS_VTD_SNOOP;
108 if ((flags & VREGION_FLAGS_GUARD) != 0) {
111 assert(0 == (~KPI_PAGING_FLAGS_MASK & (uintptr_t)flags));
112 return (uintptr_t)flags;
115 // debug print preprocessor flag for this file
116 //#define LIBBARRELFISH_DEBUG_PMAP
119 * \brief check whether region A = [start_a .. end_a) overlaps
120 * region B = [start_b .. end_b).
121 * \return true iff A overlaps B
123 static bool is_overlapping(uint16_t start_a, uint16_t end_a, uint16_t start_b, uint16_t end_b)
126 // B strict subset of A
127 (start_a < start_b && end_a >= end_b)
129 || (start_a >= start_b && start_a < end_b)
131 || (end_a > start_b && end_a < end_b);
135 * \brief Check whether vnode `root' has entries in between [entry ..
137 * \param root the vnode to look at
138 * \param entry first entry of the region to check
139 * \param len length of the region to check
140 * \param only_pages true == do not report previously allocated lower-level
141 * page tables that are empty
142 * \return true iff entries exist in region.
144 #if defined(LIBBARRELFISH_DEBUG_PMAP)
145 #define DEBUG_HAS_VNODE
147 static bool has_vnode(struct vnode *root, uint32_t entry, size_t len,
150 assert(root != NULL);
151 assert(root->is_vnode);
154 uint32_t end_entry = entry + len;
155 #ifdef DEBUG_HAS_VNODE
156 debug_printf("%s: checking region [%"PRIu32"--%"PRIu32"], only_pages = %d\n",
157 __FUNCTION__, entry, end_entry, only_pages);
160 bool found_pages = false;
161 for (n = root->u.vnode.children; n; n = n->next) {
162 // region to check [entry .. end_entry)
164 #ifdef DEBUG_HAS_VNODE
165 debug_printf("%s: looking at vnode: entry = %d, mapped = %"PRIx8"\n",
166 __FUNCTION__, n->entry, n->u.vnode.mapped);
168 // n is page table, we need to check if each mapped 1k L2 overlaps
169 // with the region to check [entry .. end_entry)
170 for (int i = 0; i < L2_PER_PAGE; i++) {
171 // ith 1k L2 is mapped
172 #ifdef DEBUG_HAS_VNODE
173 debug_printf("%s: n->u.vnode.mapped & (1 << %d) == %d\n",
174 __FUNCTION__, i, n->u.vnode.mapped & (1 << i));
176 if (L2_IS_MAPPED(n, i)) {
177 #ifdef DEBUG_HAS_VNODE
178 debug_printf("%s: check overlapping: %"PRIu32"--%"PRIu32
179 " <> %"PRIu32"--%"PRIu32"\n",
180 __FUNCTION__, entry, end_entry,
181 n->entry + i, n->entry + i + 1);
183 if (is_overlapping(entry, end_entry, n->entry + i, n->entry + i + 1)) {
185 uint16_t rec_start = i * PTABLE_SIZE;
186 #ifdef DEBUG_HAS_VNODE
187 debug_printf("%s: checking recursively in %"PRIu16"--%"PRIu16"\n",
188 __FUNCTION__, rec_start, rec_start + PTABLE_SIZE);
190 found_pages = found_pages
191 || has_vnode(n, rec_start, PTABLE_SIZE, true);
203 uint32_t end = n->entry + n->u.frame.pte_count;
204 #ifdef DEBUG_HAS_VNODE
205 debug_printf("%s: looking at region: [%"PRIu32"--%"PRIu32"]\n",
206 __FUNCTION__, n->entry, end);
210 if (is_overlapping(entry, end_entry, n->entry, end)) {
220 * \brief Starting at a given root, return the vnode with entry equal to #entry
221 * \return vnode at index `entry` or NULL
223 #ifdef LIBBARRELFISH_DEBUG_PMAP
224 #define DEBUG_FIND_VNODE
226 static struct vnode *find_vnode(struct vnode *root, uint16_t entry)
228 assert(root != NULL);
229 assert(root->is_vnode);
232 #ifdef DEBUG_FIND_VNODE
233 debug_printf("%s: looking for %"PRIu16"\n", __FUNCTION__, entry);
236 for(n = root->u.vnode.children; n != NULL; n = n->next) {
238 is_overlapping(entry, entry + 1, n->entry, n->entry + L2_PER_PAGE)) {
239 #ifdef DEBUG_FIND_VNODE
240 debug_printf("%s: found ptable at [%"PRIu16"--%"PRIu16"]\n",
241 __FUNCTION__, n->entry, n->entry + L2_PER_PAGE);
245 else if (n->is_vnode) {
246 assert(!is_overlapping(entry, entry + 1, n->entry, n->entry + L2_PER_PAGE));
247 // ignore all other vnodes;
252 assert(!n->is_vnode);
253 uint16_t end = n->entry + n->u.frame.pte_count;
254 #ifdef DEBUG_FIND_VNODE
255 debug_printf("%s: looking at section [%"PRIu16"--%"PRIu16"]\n", __FUNCTION__, n->entry, end);
257 if (n->entry <= entry && entry < end) {
258 #ifdef DEBUG_FIND_VNODE
259 debug_printf("%d \\in [%d, %d]\n", entry, n->entry, end);
268 * \brief check whether region [entry, entry+npages) is contained in a child
271 static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
273 assert(root != NULL);
274 assert(root->is_vnode);
278 for (n = root->u.vnode.children; n; n = n->next) {
280 uint16_t end = n->entry + n->u.frame.pte_count;
281 if (n->entry <= entry && entry + npages <= end) {
291 * \brief remove vnode `item` from linked list of children of `root`
293 static void remove_vnode(struct vnode *root, struct vnode *item)
295 assert(root->is_vnode);
296 struct vnode *walk = root->u.vnode.children;
297 struct vnode *prev = NULL;
301 prev->next = walk->next;
304 root->u.vnode.children = walk->next;
311 USER_PANIC("Should not get here");
314 static void unmap_l2_table(struct vnode *root, struct vnode *n, uint16_t e)
317 uint32_t entry = ROUND_DOWN(n->entry, L2_PER_PAGE) + e;
318 if (L2_IS_MAPPED(n, e)) {
319 err = vnode_unmap(root->u.vnode.cap[0], n->u.vnode.cap[e],
321 if (err_is_fail(err)) {
322 debug_printf("remove_empty_vnodes: vnode_unmap: %s\n",
327 // delete capability, if not entry 0
329 err = cap_destroy(n->u.vnode.cap[e]);
330 if (err_is_fail(err)) {
331 debug_printf("remove_empty_vnodes: cap_destroy: %s\n",
340 * \brief (recursively) remove empty page tables in region [entry ..
341 * entry+len) in vnode `root`.
343 #ifdef LIBBARRELFISH_DEBUG_PMAP
344 #define DEBUG_REMOVE_EMPTY_VNODES
346 static void remove_empty_vnodes(struct slab_allocator *vnode_alloc, struct vnode *root,
347 uint32_t entry, size_t len)
349 // precondition: root does not have pages in [entry, entry+len)
350 assert(!has_vnode(root, entry, len, true));
353 uint32_t end_entry = entry + len;
354 for (struct vnode *n = root->u.vnode.children; n; n = n->next) {
355 // sanity check and skip leaf entries
359 // here we know that all vnodes we're interested in are
363 if (entry < n->entry && end_entry >= n->entry + L2_PER_PAGE) {
364 // whole entry in region: completely unmap&free L2 page
365 for (int i = 0; i < L2_PER_PAGE; i++) {
366 unmap_l2_table(root, n, i);
369 // delete last copy of pt cap
370 err = cap_destroy(n->u.vnode.cap[0]);
371 assert(err_is_ok(err));
373 // remove vnode from list
374 remove_vnode(root, n);
375 slab_free(vnode_alloc, n);
376 } else if (entry >= n->entry && entry < n->entry + L2_PER_PAGE) {
377 // tail end of vnode in region:
378 uint16_t e = entry - n->entry;
379 #ifdef DEBUG_REMOVE_EMPTY_VNODES
380 debug_printf("overlap: %"PRIu16" entries\n", e);
382 for (; e < L2_PER_PAGE; e++) {
383 unmap_l2_table(root, n, e);
385 } else if (end_entry > n->entry && end_entry < n->entry + L2_PER_PAGE) {
386 // start of vnode in region
387 uint16_t e = end_entry - n->entry;
388 #ifdef DEBUG_REMOVE_EMPTY_VNODES
389 debug_printf("overlap: %"PRIu16" entries\n", e);
391 for (int i = 0; i < e; i++) {
392 unmap_l2_table(root, n, i);
399 * \brief Allocates a new VNode, adding it to the page table and our metadata
401 static errval_t alloc_vnode(struct pmap_arm *pmap_arm, struct vnode *root,
402 enum objtype type, uint32_t entry,
403 struct vnode **retvnode)
405 assert(root->is_vnode);
408 struct vnode *newvnode = slab_alloc(&pmap_arm->slab);
409 if (newvnode == NULL) {
410 return LIB_ERR_SLAB_ALLOC_FAIL;
412 newvnode->is_vnode = true;
414 // The VNode capability
415 err = slot_alloc(&newvnode->u.vnode.cap[0]);
416 if (err_is_fail(err)) {
417 return err_push(err, LIB_ERR_SLOT_ALLOC);
420 err = vnode_create(newvnode->u.vnode.cap[0], type);
421 if (err_is_fail(err)) {
422 return err_push(err, LIB_ERR_VNODE_CREATE);
424 for (int i = 1; i < L2_PER_PAGE; i++) {
425 newvnode->u.vnode.cap[i] = NULL_CAP;
428 // The VNode meta data
429 newvnode->entry = ROUND_DOWN(entry, L2_PER_PAGE);
430 assert(newvnode->entry % L2_PER_PAGE == 0);
431 newvnode->next = root->u.vnode.children;
432 newvnode->u.vnode.mapped = 0x0; // no entries mapped
433 root->u.vnode.children = newvnode;
434 newvnode->u.vnode.children = NULL;
437 *retvnode = newvnode;
443 * \brief Returns the vnode for the pagetable mapping a given vspace address
445 #ifdef LIBBARRELFISH_DEBUG_PMAP
446 #define DEBUG_GET_PTABLE
448 static errval_t get_ptable(struct pmap_arm *pmap,
450 struct vnode **ptable)
452 // NB Strictly there are 12 bits in the ARM L1, but allocations unit
453 // of L2 is 1 page of L2 entries (4 tables) so we use 10 bits for the L1
455 uintptr_t idx = ARM_L1_OFFSET(vaddr);
456 uintptr_t page_idx = L2_PAGE_IDX(idx);
457 if ((*ptable = find_vnode(&pmap->root, idx)) == NULL)
459 // L1 table entries point to L2 tables so allocate an L2
460 // table for this L1 entry.
462 struct vnode *tmp = NULL; // Tmp variable for passing to alloc_vnode
464 #ifdef DEBUG_GET_PTABLE
465 uintptr_t fidx = ROUND_DOWN(idx, L2_PER_PAGE);
466 debug_printf("allocating 4 x L2, entries = %"PRIuPTR"--%z"PRIuPTR"\n",
469 errval_t err = alloc_vnode(pmap, &pmap->root, ObjType_VNode_ARM_l2,
471 if (err_is_fail(err)) {
472 DEBUG_ERR(err, "alloc_vnode");
476 *ptable = tmp; // Set argument to received value
478 if (err_is_fail(err)) {
479 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
483 struct vnode *pt = *ptable;
485 debug_printf("found section @%d, trying to get ptable for %d\n",
488 assert(pt->is_vnode);
489 #ifdef DEBUG_GET_PTABLE
490 debug_printf("have ptable: %p\n", pt);
491 debug_printf("mapped = %x\n", pt->u.vnode.mapped);
492 debug_printf("page_idx = %d\n", page_idx);
493 debug_printf("l2_is_mapped: %d\n", L2_IS_MAPPED(pt, page_idx));
495 if (!L2_IS_MAPPED(pt, page_idx)) {
496 #ifdef DEBUG_GET_PTABLE
497 debug_printf("need to map entry %d\n", page_idx);
500 uintptr_t offset = L2_PAGE_OFFSET(idx);
501 #ifdef DEBUG_GET_PTABLE
502 debug_printf("mapping L1 entry %d at offset %"PRIuPTR"\n", idx, offset);
505 // create copy of ptable cap for this index, if it doesn't exist
507 if (capref_is_null(pt->u.vnode.cap[page_idx])) {
508 #ifdef DEBUG_GET_PTABLE
509 debug_printf("allocating slot for chunk %d\n", page_idx);
511 err = slot_alloc(&pt->u.vnode.cap[page_idx]);
512 if (err_is_fail(err)) {
513 return err_push(err, LIB_ERR_VNODE_MAP);
516 #ifdef DEBUG_GET_PTABLE
517 debug_printf("creating copy for chunk %d\n", page_idx);
519 err = cap_copy(pt->u.vnode.cap[page_idx], pt->u.vnode.cap[0]);
520 if (err_is_fail(err)) {
521 return err_push(err, LIB_ERR_VNODE_MAP);
525 #ifdef DEBUG_GET_PTABLE
526 debug_printf("calling vnode_map() for chunk %d\n", page_idx);
528 // map single 1k ptable
529 err = vnode_map(pmap->root.u.vnode.cap[0], pt->u.vnode.cap[page_idx], idx,
530 KPI_PAGING_FLAGS_READ | KPI_PAGING_FLAGS_WRITE, offset, 1);
532 if (err_is_fail(err)) {
533 return err_push(err, LIB_ERR_VNODE_MAP);
536 // set 1k ptable as mapped
537 pt->u.vnode.mapped |= 1 << page_idx;
543 static struct vnode *find_ptable(struct pmap_arm *pmap,
546 // NB Strictly there are 12 bits in the ARM L1, but allocations unit
547 // of L2 is 1 page of L2 entries (4 tables) so
548 uintptr_t idx = ARM_L1_OFFSET(vaddr);
549 return find_vnode(&pmap->root, idx);
552 static errval_t do_single_map(struct pmap_arm *pmap, genvaddr_t vaddr, genvaddr_t vend,
553 struct capref frame, size_t offset, size_t pte_count,
554 vregion_flags_t flags)
556 errval_t err = SYS_ERR_OK;
557 // Get the page table
558 struct vnode *ptable;
560 bool is_large = false;
562 struct frame_identity fi;
563 err = invoke_frame_identify(frame, &fi);
564 if (err_is_fail(err)) {
565 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
568 if (flags & VREGION_FLAGS_LARGE &&
569 (vaddr & LARGE_PAGE_MASK) == 0 &&
570 fi.bits >= LARGE_PAGE_BITS &&
571 (fi.base & LARGE_PAGE_MASK) == 0) {
572 //section mapping (1MB)
573 //mapped in the L1 table at root
575 ptable = &pmap->root;
576 entry = ARM_L1_OFFSET(vaddr);
578 #ifdef LIBBARRELFISH_DEBUG_PMAP
579 printf("do_single_map: large path: entry=%zu\n", entry);
582 #ifdef LIBBARRELFISH_DEBUG_PMAP
583 debug_printf("%s: 4k path: mapping %"PRIxGENVADDR"\n", __FUNCTION__, vaddr);
584 debug_printf("4k path: L1 entry: %zu\n", ARM_USER_L1_OFFSET(vaddr));
587 // XXX: reassess the following note -SG
588 // NOTE: strictly speaking a l2 entry only has 8 bits, while a l1 entry
589 // has 12 bits, but due to the way Barrelfish allocates l1 and l2 tables,
590 // we use 10 bits for the entry here and in the map syscall
591 err = get_ptable(pmap, vaddr, &ptable);
592 if (err_is_fail(err)) {
593 DEBUG_ERR(err, "get_ptable() in do_single_map");
594 return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
596 entry = ARM_USER_L2_OFFSET(vaddr);
597 #ifdef LIBBARRELFISH_DEBUG_PMAP
598 debug_printf("%s: 4k path: L2 entry=%zu\n", __FUNCTION__, entry);
599 debug_printf("%s: ptable->is_vnode = %d\n",
600 __FUNCTION__, ptable->is_vnode);
605 flags &= ~(VREGION_FLAGS_LARGE | VREGION_FLAGS_HUGE);
606 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
608 uintptr_t user_pte_count = pte_count;
610 user_pte_count = DIVIDE_ROUND_UP(pte_count, L2_PER_PAGE);
613 // check if there is an overlapping mapping
614 if (has_vnode(ptable, entry, pte_count, false)) {
615 #ifdef LIBBARRELFISH_DEBUG_PMAP
616 debug_printf("has_vnode, only_pages=false returned true\n");
618 if (has_vnode(ptable, entry, pte_count, true)) {
619 printf("page already exists in 0x%"
620 PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
621 return LIB_ERR_PMAP_EXISTING_MAPPING;
623 #ifdef LIBBARRELFISH_DEBUG_PMAP
624 debug_printf("has_vnode, only_pages=true returned false, cleaning up empty ptables\n");
626 // clean out empty page tables. We do this here because we benefit
627 // from having the page tables in place when doing lots of small
629 // XXX: TODO: fix this + mapping of L2 to work on single 1k
631 remove_empty_vnodes(&pmap->slab, ptable, entry, pte_count);
635 // Create user level datastructure for the mapping
636 struct vnode *page = slab_alloc(&pmap->slab);
638 page->is_vnode = false;
640 page->next = ptable->u.vnode.children;
641 ptable->u.vnode.children = page;
642 page->u.frame.cap = frame;
643 page->u.frame.flags = flags;
644 page->u.frame.pte_count = user_pte_count;
645 page->u.frame.kernel_pte_count = pte_count;
647 // Map entry into the page table
648 err = vnode_map(ptable->u.vnode.cap[0], frame, entry,
649 pmap_flags, offset, pte_count);
650 if (err_is_fail(err)) {
651 return err_push(err, LIB_ERR_VNODE_MAP);
656 static errval_t do_map(struct pmap_arm *pmap, genvaddr_t vaddr,
657 struct capref frame, size_t offset, size_t size,
658 vregion_flags_t flags, size_t *retoff, size_t *retsize)
664 // get base address and size of frame
665 struct frame_identity fi;
666 err = invoke_frame_identify(frame, &fi);
667 if (err_is_fail(err)) {
668 return err_push(err, LIB_ERR_PMAP_DO_MAP);
671 // determine mapping specific parts
672 if (flags & VREGION_FLAGS_LARGE &&
673 (vaddr & LARGE_PAGE_MASK) == 0 &&
674 fi.bits >= LARGE_PAGE_BITS &&
675 (fi.base & LARGE_PAGE_MASK) == 0) {
676 //section mapping (1MB)
677 page_size = LARGE_PAGE_SIZE;
678 offset_level = ARM_L1_OFFSET(vaddr);
679 #ifdef LIBBARRELFISH_DEBUG_PMAP
680 printf("do_map: large path\n");
681 printf("page_size: %zx, size: %zx\n", page_size, size);
685 page_size = BASE_PAGE_SIZE;
686 offset_level = ARM_L2_OFFSET(vaddr);
689 size = ROUND_UP(size, page_size);
690 size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
691 if (flags & VREGION_FLAGS_LARGE) {
692 #ifdef LIBBARRELFISH_DEBUG_PMAP
693 printf("#pages: 0x%zu\n", pte_count);
696 genvaddr_t vend = vaddr + size;
698 if ((1UL << fi.bits) < size) {
699 return LIB_ERR_PMAP_FRAME_SIZE;
702 //should be trivially true for section mappings
703 if ((ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend)) ||
704 flags & VREGION_FLAGS_LARGE) {
706 err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
707 if (err_is_fail(err)) {
708 DEBUG_ERR(err, "[do_map] in fast path");
709 return err_push(err, LIB_ERR_PMAP_DO_MAP);
711 } else { // multiple leaf page tables
713 uint32_t c = ARM_L2_MAX_ENTRIES - offset_level;
714 genvaddr_t temp_end = vaddr + c * page_size;
715 err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
716 if (err_is_fail(err)) {
717 return err_push(err, LIB_ERR_PMAP_DO_MAP);
721 while (ARM_L1_OFFSET(temp_end) < ARM_L1_OFFSET(vend)) { // update vars
723 temp_end = vaddr + ARM_L2_MAX_ENTRIES * page_size;
724 offset += c * page_size;
725 c = ARM_L2_MAX_ENTRIES;
728 err = slot_alloc(&next);
729 if (err_is_fail(err)) {
730 return err_push(err, LIB_ERR_PMAP_DO_MAP);
732 err = cap_copy(next, frame);
733 if (err_is_fail(err)) {
734 return err_push(err, LIB_ERR_PMAP_DO_MAP);
739 err = do_single_map(pmap, vaddr, temp_end, frame, offset, ARM_L2_MAX_ENTRIES, flags);
740 if (err_is_fail(err)) {
741 return err_push(err, LIB_ERR_PMAP_DO_MAP);
745 // map remaining part
746 offset += c * page_size;
747 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(temp_end);
751 err = slot_alloc(&next);
752 if (err_is_fail(err)) {
753 return err_push(err, LIB_ERR_PMAP_DO_MAP);
755 err = cap_copy(next, frame);
756 if (err_is_fail(err)) {
757 return err_push(err, LIB_ERR_PMAP_DO_MAP);
761 err = do_single_map(pmap, temp_end, vend, next, offset, c, flags);
762 if (err_is_fail(err)) {
763 return err_push(err, LIB_ERR_PMAP_DO_MAP);
773 //has_vnode_debug = false;
777 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
779 for (size_t i = offset; i < offset + size; i += BASE_PAGE_SIZE) {
781 vaddr += BASE_PAGE_SIZE;
795 max_slabs_required(size_t bytes)
797 // Perform a slab allocation for every page (do_map -> slab_alloc)
798 size_t pages = DIVIDE_ROUND_UP(bytes, BASE_PAGE_SIZE);
799 // Perform a slab allocation for every L2 (get_ptable -> find_vnode)
800 size_t l2entries = DIVIDE_ROUND_UP(pages, 256 * 4);
801 // Perform a slab allocation for every L1 (do_map -> find_vnode)
802 size_t l1entries = DIVIDE_ROUND_UP(l2entries, 1024);
803 return pages + l2entries + l1entries;
805 static size_t max_slabs_required_large(size_t bytes)
807 // always need only one slab, as we can represent any size section mapping
808 // in a single struct vnode.
813 * \brief Refill slabs used for metadata
815 * \param pmap The pmap to refill in
816 * \param request The number of slabs the allocator must have
817 * when the function returns
819 * When the current pmap is initialized,
820 * it reserves some virtual address space for metadata.
821 * This reserved address space is used here
823 * Can only be called for the current pmap
824 * Will recursively call into itself till it has enough slabs
827 static errval_t refill_slabs(struct pmap_arm *pmap, size_t request)
831 /* Keep looping till we have #request slabs */
832 while (slab_freecount(&pmap->slab) < request) {
833 // Amount of bytes required for #request
834 size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab),
835 sizeof(struct vnode));
837 /* Get a frame of that size */
839 err = frame_alloc(&cap, bytes, &bytes);
840 if (err_is_fail(err)) {
841 return err_push(err, LIB_ERR_FRAME_ALLOC);
844 /* If we do not have enough slabs to map the frame in, recurse */
845 size_t required_slabs_for_frame = max_slabs_required(bytes);
846 if (slab_freecount(&pmap->slab) < required_slabs_for_frame) {
847 // If we recurse, we require more slabs than to map a single page
848 assert(required_slabs_for_frame > 4);
850 err = refill_slabs(pmap, required_slabs_for_frame);
851 if (err_is_fail(err)) {
852 return err_push(err, LIB_ERR_SLAB_REFILL);
856 /* Perform mapping */
857 genvaddr_t genvaddr = pmap->vregion_offset;
858 pmap->vregion_offset += (genvaddr_t)bytes;
860 // if this assert fires, increase META_DATA_RESERVED_SPACE
861 assert(pmap->vregion_offset < (vregion_get_base_addr(&pmap->vregion) +
862 vregion_get_size(&pmap->vregion)));
864 err = do_map(pmap, genvaddr, cap, 0, bytes,
865 VREGION_FLAGS_READ_WRITE, NULL, NULL);
866 if (err_is_fail(err)) {
867 return err_push(err, LIB_ERR_PMAP_DO_MAP);
871 lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
872 slab_grow(&pmap->slab, (void*)buf, bytes);
879 * \brief Create page mappings
881 * \param pmap The pmap object
882 * \param vaddr The virtual address to create the mapping for
883 * \param frame The frame cap to map in
884 * \param offset Offset into the frame cap
885 * \param size Size of the mapping
886 * \param flags Flags for the mapping
887 * \param retoff If non-NULL, filled in with adjusted offset of mapped region
888 * \param retsize If non-NULL, filled in with adjusted size of mapped region
891 map(struct pmap *pmap,
896 vregion_flags_t flags,
900 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
905 size_t slabs_required;
907 struct frame_identity fi;
908 err = invoke_frame_identify(frame, &fi);
909 if (err_is_fail(err)) {
910 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
913 // adjust the mapping to be on page boundaries
914 if (flags & VREGION_FLAGS_LARGE &&
915 (vaddr & LARGE_PAGE_MASK) == 0 &&
916 fi.bits >= LARGE_PAGE_BITS &&
917 (fi.base & LARGE_PAGE_MASK) == 0) {
918 //section mapping (1MB)
919 base = LARGE_PAGE_OFFSET(offset);
920 page_size = LARGE_PAGE_SIZE;
921 slabs_required = max_slabs_required_large(size);
922 #ifdef LIBBARRELFISH_DEBUG_PMAP
923 size_t frame_sz = 1ULL<<fi.bits;
924 printf("map: large path, page_size: %i, base: %i, slabs: %i, size: %i,"
925 "frame size: %zu\n", page_size, base, slabs_required, size, frame_sz);
929 base = BASE_PAGE_OFFSET(offset);
930 page_size = BASE_PAGE_SIZE;
931 slabs_required = max_slabs_required(size);
934 size = ROUND_UP(size, page_size);
937 const size_t slabs_reserve = 3; // == max_slabs_required(1)
938 uint64_t slabs_free = slab_freecount(&pmap_arm->slab);
940 slabs_required += slabs_reserve;
942 if (slabs_required > slabs_free) {
943 if (get_current_pmap() == pmap) {
944 err = refill_slabs(pmap_arm, slabs_required);
945 if (err_is_fail(err)) {
946 return err_push(err, LIB_ERR_SLAB_REFILL);
950 size_t bytes = SLAB_STATIC_SIZE(slabs_required - slabs_free,
951 sizeof(struct vnode));
952 void *buf = malloc(bytes);
954 return LIB_ERR_MALLOC_FAIL;
956 slab_grow(&pmap_arm->slab, buf, bytes);
960 return do_map(pmap_arm, vaddr, frame, offset, size, flags,
964 static errval_t do_single_unmap(struct pmap_arm *pmap, genvaddr_t vaddr,
965 size_t pte_count, bool delete_cap)
968 struct vnode *pt = find_ptable(pmap, vaddr);
969 // pt->is_vnode == non-large mapping
970 if (pt && pt->is_vnode) {
971 // analog to do_single_map we use 10 bits for tracking pages in user space -SG
972 struct vnode *page = find_vnode(pt, ARM_USER_L2_OFFSET(vaddr));
973 if (page && page->u.frame.pte_count == pte_count) {
974 err = vnode_unmap(pt->u.vnode.cap[0], page->u.frame.cap,
975 page->entry, page->u.frame.pte_count);
976 if (err_is_fail(err)) {
977 DEBUG_ERR(err, "vnode_unmap");
978 return err_push(err, LIB_ERR_VNODE_UNMAP);
981 // Free up the resources
983 err = cap_destroy(page->u.frame.cap);
984 if (err_is_fail(err)) {
985 return err_push(err, LIB_ERR_PMAP_DO_SINGLE_UNMAP);
988 remove_vnode(pt, page);
989 slab_free(&pmap->slab, page);
992 return LIB_ERR_PMAP_FIND_VNODE;
995 #ifdef LIBBARRELFISH_DEBUG_PMAP
996 debug_printf("section unmap: entry = %zu, pte_count = %zu\n",
997 pt->entry, pt->u.frame.kernel_pte_count);
999 err = vnode_unmap(pmap->root.u.vnode.cap[0], pt->u.frame.cap,
1000 pt->entry, pt->u.frame.kernel_pte_count);
1001 if (err_is_fail(err)) {
1002 DEBUG_ERR(err, "vnode_unmap");
1003 return err_push(err, LIB_ERR_VNODE_UNMAP);
1006 remove_vnode(&pmap->root, pt);
1007 slab_free(&pmap->slab, pt);
1009 return LIB_ERR_PMAP_FIND_VNODE;
1016 * \brief Remove page mappings
1018 * \param pmap The pmap object
1019 * \param vaddr The start of the virtual addres to remove
1020 * \param size The size of virtual address to remove
1021 * \param retsize If non-NULL, filled in with the actual size removed
1024 unmap(struct pmap *pmap,
1029 errval_t err, ret = SYS_ERR_OK;
1030 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
1031 size = ROUND_UP(size, BASE_PAGE_SIZE);
1032 size_t pte_count = size / BASE_PAGE_SIZE;
1033 genvaddr_t vend = vaddr + size;
1035 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
1037 err = do_single_unmap(pmap_arm, vaddr, pte_count, false);
1038 if (err_is_fail(err)) {
1039 return err_push(err, LIB_ERR_PMAP_UNMAP);
1041 } else { // slow path
1043 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
1044 err = do_single_unmap(pmap_arm, vaddr, c, false);
1045 if (err_is_fail(err)) {
1046 return err_push(err, LIB_ERR_PMAP_UNMAP);
1049 // unmap full leaves
1050 vaddr += c * BASE_PAGE_SIZE;
1051 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
1052 c = ARM_L2_MAX_ENTRIES;
1053 err = do_single_unmap(pmap_arm, vaddr, c, true);
1054 if (err_is_fail(err)) {
1055 return err_push(err, LIB_ERR_PMAP_UNMAP);
1057 vaddr += c * BASE_PAGE_SIZE;
1060 // unmap remaining part
1061 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
1063 err = do_single_unmap(pmap_arm, vaddr, c, true);
1064 if (err_is_fail(err)) {
1065 return err_push(err, LIB_ERR_PMAP_UNMAP);
1078 * \brief Determine a suitable address for a given memory object
1080 * \param pmap The pmap object
1081 * \param memobj The memory object to determine the address for
1082 * \param alignment Minimum alignment
1083 * \param vaddr Pointer to return the determined address
1085 * Relies on vspace.c code maintaining an ordered list of vregions
1088 determine_addr(struct pmap *pmap,
1089 struct memobj *memobj,
1093 assert(pmap->vspace->head);
1095 if (alignment == 0) {
1096 alignment = BASE_PAGE_SIZE;
1098 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1100 size_t size = ROUND_UP(memobj->size, alignment);
1102 struct vregion *walk = pmap->vspace->head;
1103 while (walk->next) { // Try to insert between existing mappings
1104 genvaddr_t walk_base = vregion_get_base_addr(walk);
1105 genvaddr_t walk_size = ROUND_UP(vregion_get_size(walk), BASE_PAGE_SIZE);
1106 genvaddr_t walk_end = ROUND_UP(walk_base + walk_size, alignment);
1107 genvaddr_t next_base = vregion_get_base_addr(walk->next);
1109 if (next_base > walk_end + size &&
1110 walk_base + walk_size > VSPACE_BEGIN) { // Ensure mappings are larger than VSPACE_BEGIN
1117 *vaddr = ROUND_UP((vregion_get_base_addr(walk)
1118 + ROUND_UP(vregion_get_size(walk), alignment)),
1123 /** \brief Retrieves an address that can currently be used for large mappings
1126 static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
1127 size_t alignment, genvaddr_t *retvaddr)
1129 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
1131 struct vnode *walk_pdir = pmap_arm->root.u.vnode.children;
1132 assert(walk_pdir != NULL); // assume there's always at least one existing entry
1134 if (alignment == 0) {
1135 alignment = BASE_PAGE_SIZE;
1137 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1139 size = ROUND_UP(size, alignment);
1141 size_t free_count = DIVIDE_ROUND_UP(size, LARGE_PAGE_SIZE);
1142 //debug_printf("need %zu contiguous free pdirs\n", free_count);
1144 // compile pdir free list
1145 // barrelfish treats L1 as 1024 entries
1147 for (int i = 0; i < 1024; i++) {
1150 f[walk_pdir->entry] = false;
1152 assert(walk_pdir->is_vnode);
1153 f[walk_pdir->entry] = false;
1154 walk_pdir = walk_pdir->next;
1156 genvaddr_t first_free = 384;
1157 for (; first_free < 512; first_free++) {
1158 if (f[first_free]) {
1159 for (int i = 1; i < free_count; i++) {
1160 if (!f[first_free + i]) {
1162 first_free = first_free+i;
1169 assert(1 == 1);// make compiler shut up about label
1171 //printf("first free: %li\n", (uint32_t)first_free);
1172 if (first_free + free_count <= 512) {
1173 *retvaddr = first_free << 22;
1176 return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
1182 static errval_t do_single_modify_flags(struct pmap_arm *pmap, genvaddr_t vaddr,
1183 size_t pages, vregion_flags_t flags)
1185 errval_t err = SYS_ERR_OK;
1186 struct vnode *ptable = find_ptable(pmap, vaddr);
1187 uint16_t ptentry = ARM_USER_L2_OFFSET(vaddr);
1189 struct vnode *page = find_vnode(ptable, ptentry);
1191 if (inside_region(ptable, ptentry, pages)) {
1192 // we're modifying part of a valid mapped region
1193 // arguments to invocation: invoke frame cap, first affected
1194 // page (as offset from first page in mapping), #affected
1195 // pages, new flags. Invocation should check compatibility of
1196 // new set of flags with cap permissions.
1197 size_t off = ptentry - page->entry;
1198 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
1199 err = invoke_frame_modify_flags(page->u.frame.cap, off, pages, pmap_flags);
1200 printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n",
1201 err_getstring(err), err);
1204 // overlaps some region border
1205 return LIB_ERR_PMAP_EXISTING_MAPPING;
1213 * \brief Modify page mapping
1215 * \param pmap The pmap object
1216 * \param vaddr The virtual address to unmap
1217 * \param flags New flags for the mapping
1218 * \param retsize If non-NULL, filled in with the actual size modified
1221 modify_flags(struct pmap *pmap,
1224 vregion_flags_t flags,
1227 errval_t err, ret = SYS_ERR_OK;
1228 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
1229 size = ROUND_UP(size, BASE_PAGE_SIZE);
1230 size_t pte_count = size / BASE_PAGE_SIZE;
1231 genvaddr_t vend = vaddr + size;
1233 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
1235 err = do_single_modify_flags(pmap_arm, vaddr, pte_count, false);
1236 if (err_is_fail(err)) {
1237 return err_push(err, LIB_ERR_PMAP_UNMAP);
1242 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
1243 err = do_single_modify_flags(pmap_arm, vaddr, c, false);
1244 if (err_is_fail(err)) {
1245 return err_push(err, LIB_ERR_PMAP_UNMAP);
1248 // unmap full leaves
1249 vaddr += c * BASE_PAGE_SIZE;
1250 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
1251 c = ARM_L2_MAX_ENTRIES;
1252 err = do_single_modify_flags(pmap_arm, vaddr, c, true);
1253 if (err_is_fail(err)) {
1254 return err_push(err, LIB_ERR_PMAP_UNMAP);
1256 vaddr += c * BASE_PAGE_SIZE;
1259 // unmap remaining part
1260 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
1262 err = do_single_modify_flags(pmap_arm, vaddr, c, true);
1263 if (err_is_fail(err)) {
1264 return err_push(err, LIB_ERR_PMAP_UNMAP);
1277 * \brief Query existing page mapping
1279 * \param pmap The pmap object
1280 * \param vaddr The virtual address to query
1281 * \param retvaddr Returns the base virtual address of the mapping
1282 * \param retsize Returns the actual size of the mapping
1283 * \param retcap Returns the cap mapped at this address
1284 * \param retoffset Returns the offset within the cap that is mapped
1285 * \param retflags Returns the flags for this mapping
1287 * All of the ret parameters are optional.
1289 static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
1290 genvaddr_t *retvaddr, size_t *retsize,
1291 struct capref *retcap, genvaddr_t *retoffset,
1292 vregion_flags_t *retflags)
1300 serialise(struct pmap *pmap, void *buf, size_t buflen)
1302 // Unimplemented: ignored
1307 deserialise(struct pmap *pmap, void *buf, size_t buflen)
1309 // Unimplemented: we start with an empty pmap, and avoid the bottom of the A/S
1313 static struct pmap_funcs pmap_funcs = {
1314 .determine_addr = determine_addr,
1315 .determine_addr_raw = determine_addr_raw,
1318 .modify_flags = modify_flags,
1320 .serialise = serialise,
1321 .deserialise = deserialise,
1325 * \brief Initialize the pmap object
1328 pmap_init(struct pmap *pmap,
1329 struct vspace *vspace,
1330 struct capref vnode,
1331 struct slot_allocator *opt_slot_alloc)
1333 struct pmap_arm* pmap_arm = (struct pmap_arm*)pmap;
1335 /* Generic portion */
1336 pmap->f = pmap_funcs;
1337 pmap->vspace = vspace;
1339 // Slab allocator for vnodes
1340 slab_init(&pmap_arm->slab, sizeof(struct vnode), NULL);
1341 slab_grow(&pmap_arm->slab,
1342 pmap_arm->slab_buffer,
1343 sizeof(pmap_arm->slab_buffer));
1345 pmap_arm->root.is_vnode = true;
1346 pmap_arm->root.u.vnode.cap[0] = vnode;
1347 pmap_arm->root.next = NULL;
1348 pmap_arm->root.u.vnode.children = NULL;
1353 errval_t pmap_current_init(bool init_domain)
1355 struct pmap_arm *pmap_arm = (struct pmap_arm*)get_current_pmap();
1357 // To reserve a block of virtual address space,
1358 // a vregion representing the address space is required.
1359 // We construct a superficial one here and add it to the vregion list.
1360 struct vregion *vregion = &pmap_arm->vregion;
1361 assert((void*)vregion > (void*)pmap_arm);
1362 assert((void*)vregion < (void*)(pmap_arm + 1));
1363 vregion->vspace = NULL;
1364 vregion->memobj = NULL;
1365 vregion->base = VSPACE_BEGIN;
1366 vregion->offset = 0;
1367 vregion->size = META_DATA_RESERVED_SPACE;
1369 vregion->next = NULL;
1371 struct vspace *vspace = pmap_arm->p.vspace;
1372 assert(!vspace->head);
1373 vspace->head = vregion;
1375 pmap_arm->vregion_offset = pmap_arm->vregion.base;