3 * \brief pmap management
7 * Copyright (c) 2010-2015 ETH Zurich.
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
16 * There was some minor difficulty here with mapping the cpus native
17 * page table arrangement onto Barrelfish. The problem lies with
18 * resource bootstrapping. The bootstrap ram allocator allocates pages.
20 * After reworking retype to be range based, we can now select to create a
21 * single 1kB vnode from a 4kB frame, so we currently waste 3kB when creating
22 * ARM l2 vnodes before we have a connection to the memory server.
26 #include <barrelfish/barrelfish.h>
27 #include <barrelfish/caddr.h>
28 #include <barrelfish/invocations_arch.h>
31 // Location of VSpace managed by this system.
32 #define VSPACE_BEGIN ((lvaddr_t)1UL*1024*1024*1024) //0x40000000
34 // Amount of virtual address space reserved for mapping frames
35 // backing refill_slabs.
36 //#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 128) // 64
37 #define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 1024)
38 // increased above value from 128 for pandaboard port
40 static inline uintptr_t
41 vregion_flags_to_kpi_paging_flags(vregion_flags_t flags)
43 STATIC_ASSERT(0x1ff == VREGION_FLAGS_MASK, "");
44 STATIC_ASSERT(0x0f == KPI_PAGING_FLAGS_MASK, "");
45 STATIC_ASSERT(VREGION_FLAGS_READ == KPI_PAGING_FLAGS_READ, "");
46 STATIC_ASSERT(VREGION_FLAGS_WRITE == KPI_PAGING_FLAGS_WRITE, "");
47 STATIC_ASSERT(VREGION_FLAGS_EXECUTE == KPI_PAGING_FLAGS_EXECUTE, "");
48 STATIC_ASSERT(VREGION_FLAGS_NOCACHE == KPI_PAGING_FLAGS_NOCACHE, "");
49 if ((flags & VREGION_FLAGS_MPB) != 0) {
50 // XXX: ignore MPB flag on ARM, otherwise the assert below fires -AB
51 flags &= ~VREGION_FLAGS_MPB;
53 if ((flags & VREGION_FLAGS_WRITE_COMBINING) != 0) {
54 // XXX mask out write-combining flag on ARM
55 flags &= ~VREGION_FLAGS_WRITE_COMBINING;
57 if ((flags & VREGION_FLAGS_VTD_SNOOP) != 0) {
58 // XXX mask out vtd-snooping flag on ARM
59 flags &= ~VREGION_FLAGS_VTD_SNOOP;
61 if ((flags & VREGION_FLAGS_GUARD) != 0) {
64 assert(0 == (~KPI_PAGING_FLAGS_MASK & (uintptr_t)flags));
65 return (uintptr_t)flags;
68 // debug print preprocessor flag for this file
69 //#define LIBBARRELFISH_DEBUG_PMAP
72 * \brief check whether region A = [start_a .. end_a) overlaps
73 * region B = [start_b .. end_b).
74 * \return true iff A overlaps B
76 static bool is_overlapping(uint16_t start_a, uint16_t end_a, uint16_t start_b, uint16_t end_b)
79 // B strict subset of A
80 (start_a < start_b && end_a >= end_b)
82 || (start_a >= start_b && start_a < end_b)
84 || (end_a > start_b && end_a < end_b);
88 * \brief Check whether vnode `root' has entries in between [entry ..
90 * \param root the vnode to look at
91 * \param entry first entry of the region to check
92 * \param len length of the region to check
93 * \param only_pages true == do not report previously allocated lower-level
94 * page tables that are empty
95 * \return true iff entries exist in region.
97 #if defined(LIBBARRELFISH_DEBUG_PMAP)
98 #define DEBUG_HAS_VNODE
100 static bool has_vnode(struct vnode *root, uint32_t entry, size_t len,
103 assert(root != NULL);
104 assert(root->is_vnode);
107 uint32_t end_entry = entry + len;
108 #ifdef DEBUG_HAS_VNODE
109 debug_printf("%s: checking region [%"PRIu32"--%"PRIu32"], only_pages = %d\n",
110 __FUNCTION__, entry, end_entry, only_pages);
113 for (n = root->u.vnode.children; n; n = n->next) {
114 // region to check [entry .. end_entry)
115 if (n->is_vnode && n->entry >= entry && n->entry < end_entry) {
117 return has_vnode(n, 0, ARM_L2_TABLE_BYTES, true);
119 #ifdef LIBBARRELFISH_DEBUG_PMAP
120 debug_printf("1: found page table inside our region\n");
123 } else if (n->is_vnode) {
124 // all other vnodes do not overlap with us, so go to next
125 assert(n->entry < entry || n->entry >= end_entry);
129 uint32_t end = n->entry + n->u.frame.pte_count;
130 #ifdef DEBUG_HAS_VNODE
131 debug_printf("%s: looking at region: [%"PRIu32"--%"PRIu32"]\n",
132 __FUNCTION__, n->entry, end);
136 if (is_overlapping(entry, end_entry, n->entry, end)) {
146 * \brief Starting at a given root, return the vnode with entry equal to #entry
147 * \return vnode at index `entry` or NULL
149 #ifdef LIBBARRELFISH_DEBUG_PMAP
150 #define DEBUG_FIND_VNODE
152 static struct vnode *find_vnode(struct vnode *root, uint16_t entry)
154 assert(root != NULL);
155 assert(root->is_vnode);
158 #ifdef DEBUG_FIND_VNODE
159 debug_printf("%s: looking for %"PRIu16"\n", __FUNCTION__, entry);
162 for(n = root->u.vnode.children; n != NULL; n = n->next) {
164 is_overlapping(entry, entry + 1, n->entry, n->entry + 1)) {
165 #ifdef DEBUG_FIND_VNODE
166 debug_printf("%s: found ptable at [%"PRIu16"--%"PRIu16"]\n",
167 __FUNCTION__, n->entry, n->entry + 1);
171 else if (n->is_vnode) {
172 assert(!is_overlapping(entry, entry + 1, n->entry, n->entry + 1));
173 // ignore all other vnodes;
178 assert(!n->is_vnode);
179 uint16_t end = n->entry + n->u.frame.pte_count;
180 #ifdef DEBUG_FIND_VNODE
181 debug_printf("%s: looking at section [%"PRIu16"--%"PRIu16"]\n", __FUNCTION__, n->entry, end);
183 if (n->entry <= entry && entry < end) {
184 #ifdef DEBUG_FIND_VNODE
185 debug_printf("%d \\in [%d, %d]\n", entry, n->entry, end);
194 * \brief check whether region [entry, entry+npages) is contained in a child
197 static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
199 assert(root != NULL);
200 assert(root->is_vnode);
204 for (n = root->u.vnode.children; n; n = n->next) {
206 uint16_t end = n->entry + n->u.frame.pte_count;
207 if (n->entry <= entry && entry + npages <= end) {
217 * \brief remove vnode `item` from linked list of children of `root`
219 static void remove_vnode(struct vnode *root, struct vnode *item)
221 assert(root->is_vnode);
222 struct vnode *walk = root->u.vnode.children;
223 struct vnode *prev = NULL;
227 prev->next = walk->next;
230 root->u.vnode.children = walk->next;
237 USER_PANIC("Should not get here");
241 * \brief (recursively) remove empty page tables in region [entry ..
242 * entry+len) in vnode `root`.
244 #ifdef LIBBARRELFISH_DEBUG_PMAP
245 #define DEBUG_REMOVE_EMPTY_VNODES
247 static void remove_empty_vnodes(struct slab_allocator *vnode_alloc, struct vnode *root,
248 uint32_t entry, size_t len)
250 // precondition: root does not have pages in [entry, entry+len)
251 assert(!has_vnode(root, entry, len, true));
254 uint32_t end_entry = entry + len;
255 for (struct vnode *n = root->u.vnode.children; n; n = n->next) {
256 // sanity check and skip leaf entries
260 // here we know that all vnodes we're interested in are
264 // Unmap vnode if it is in range [entry .. entry+len)
265 if (n->entry >= entry && n->entry < end_entry) {
266 err = vnode_unmap(root->u.vnode.invokable, n->mapping);
267 assert(err_is_ok(err));
269 if (!capcmp(n->u.vnode.cap, n->u.vnode.invokable)) {
270 // delete invokable pt cap if it's a real copy
271 err =cap_destroy(n->u.vnode.invokable);
272 assert(err_is_ok(err));
275 // delete last copy of pt cap
276 err = cap_destroy(n->u.vnode.cap);
277 assert(err_is_ok(err));
279 // remove vnode from list
280 remove_vnode(root, n);
281 slab_free(vnode_alloc, n);
287 * \brief Allocates a new VNode, adding it to the page table and our metadata
289 static errval_t alloc_vnode(struct pmap_arm *pmap_arm, struct vnode *root,
290 enum objtype type, uint32_t entry,
291 struct vnode **retvnode)
293 assert(root->is_vnode);
296 struct vnode *newvnode = slab_alloc(&pmap_arm->slab);
297 if (newvnode == NULL) {
298 return LIB_ERR_SLAB_ALLOC_FAIL;
300 newvnode->is_vnode = true;
302 // The VNode capability
303 err = slot_alloc(&newvnode->u.vnode.cap);
304 if (err_is_fail(err)) {
305 return err_push(err, LIB_ERR_SLOT_ALLOC);
308 err = vnode_create(newvnode->u.vnode.cap, type);
309 if (err_is_fail(err)) {
310 return err_push(err, LIB_ERR_VNODE_CREATE);
313 // XXX: do we need to put master copy in other cspace?
314 newvnode->u.vnode.invokable = newvnode->u.vnode.cap;
316 // The VNode meta data
317 newvnode->entry = entry;
318 newvnode->next = root->u.vnode.children;
319 root->u.vnode.children = newvnode;
320 newvnode->u.vnode.children = NULL;
322 err = slot_alloc(&newvnode->mapping);
323 if (err_is_fail(err)) {
324 return err_push(err, LIB_ERR_SLOT_ALLOC);
327 err = vnode_map(root->u.vnode.invokable, newvnode->u.vnode.cap,
328 entry, KPI_PAGING_FLAGS_READ | KPI_PAGING_FLAGS_WRITE, 0, 1,
330 if (err_is_fail(err)) {
331 return err_push(err, LIB_ERR_PMAP_MAP);
335 *retvnode = newvnode;
341 * \brief Returns the vnode for the pagetable mapping a given vspace address
343 #ifdef LIBBARRELFISH_DEBUG_PMAP
344 #define DEBUG_GET_PTABLE
346 static errval_t get_ptable(struct pmap_arm *pmap,
348 struct vnode **ptable)
350 // NB Strictly there are 12 bits in the ARM L1, but allocations unit
351 // of L2 is 1 page of L2 entries (4 tables) so we use 10 bits for the L1
353 uintptr_t idx = ARM_L1_OFFSET(vaddr);
354 if ((*ptable = find_vnode(&pmap->root, idx)) == NULL)
356 // L1 table entries point to L2 tables so allocate an L2
357 // table for this L1 entry.
359 struct vnode *tmp = NULL; // Tmp variable for passing to alloc_vnode
361 errval_t err = alloc_vnode(pmap, &pmap->root, ObjType_VNode_ARM_l2,
363 if (err_is_fail(err)) {
364 DEBUG_ERR(err, "alloc_vnode");
368 *ptable = tmp; // Set argument to received value
370 if (err_is_fail(err)) {
371 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
375 struct vnode *pt = *ptable;
377 debug_printf("found section @%d, trying to get ptable for %d\n",
380 assert(pt->is_vnode);
381 #ifdef DEBUG_GET_PTABLE
382 debug_printf("have ptable: %p\n", pt);
388 static struct vnode *find_ptable(struct pmap_arm *pmap,
391 // NB Strictly there are 12 bits in the ARM L1, but allocations unit
392 // of L2 is 1 page of L2 entries (4 tables) so
393 uintptr_t idx = ARM_L1_OFFSET(vaddr);
394 return find_vnode(&pmap->root, idx);
397 static errval_t do_single_map(struct pmap_arm *pmap, genvaddr_t vaddr, genvaddr_t vend,
398 struct capref frame, size_t offset, size_t pte_count,
399 vregion_flags_t flags)
401 errval_t err = SYS_ERR_OK;
402 // Get the page table
403 struct vnode *ptable;
405 bool is_large = false;
407 struct frame_identity fi;
408 err = frame_identify(frame, &fi);
409 if (err_is_fail(err)) {
410 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
413 if (flags & VREGION_FLAGS_LARGE &&
414 (vaddr & LARGE_PAGE_MASK) == 0 &&
415 fi.bytes >= LARGE_PAGE_SIZE &&
416 (fi.base & LARGE_PAGE_MASK) == 0) {
417 //section mapping (1MB)
418 //mapped in the L1 table at root
420 ptable = &pmap->root;
421 entry = ARM_L1_OFFSET(vaddr);
423 #ifdef LIBBARRELFISH_DEBUG_PMAP
424 debug_printf("do_single_map: large path: entry=%zu\n", entry);
427 #ifdef LIBBARRELFISH_DEBUG_PMAP
428 debug_printf("%s: 4k path: mapping %"PRIxGENVADDR", %zu entries\n", __FUNCTION__, vaddr, pte_count);
429 debug_printf("4k path: L1 entry: %zu\n", ARM_L1_OFFSET(vaddr));
432 // XXX: reassess the following note -SG
433 // NOTE: strictly speaking a l2 entry only has 8 bits, while a l1 entry
434 // has 12 bits, but due to the way Barrelfish allocates l1 and l2 tables,
435 // we use 10 bits for the entry here and in the map syscall
436 err = get_ptable(pmap, vaddr, &ptable);
437 if (err_is_fail(err)) {
438 DEBUG_ERR(err, "get_ptable() in do_single_map");
439 return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
441 entry = ARM_L2_OFFSET(vaddr);
442 #ifdef LIBBARRELFISH_DEBUG_PMAP
443 debug_printf("%s: 4k path: L2 entry=%zu\n", __FUNCTION__, entry);
444 debug_printf("%s: ptable->is_vnode = %d\n",
445 __FUNCTION__, ptable->is_vnode);
450 flags &= ~(VREGION_FLAGS_LARGE | VREGION_FLAGS_HUGE);
451 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
453 // check if there is an overlapping mapping
454 if (has_vnode(ptable, entry, pte_count, false)) {
455 #ifdef LIBBARRELFISH_DEBUG_PMAP
456 debug_printf("has_vnode, only_pages=false returned true\n");
458 if (has_vnode(ptable, entry, pte_count, true)) {
459 printf("page already exists in 0x%"
460 PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
461 return LIB_ERR_PMAP_EXISTING_MAPPING;
463 #ifdef LIBBARRELFISH_DEBUG_PMAP
464 debug_printf("has_vnode, only_pages=true returned false, cleaning up empty ptables\n");
466 // clean out empty page tables. We do this here because we benefit
467 // from having the page tables in place when doing lots of small
469 // XXX: TODO: fix this + mapping of L2 to work on single 1k
471 remove_empty_vnodes(&pmap->slab, ptable, entry, pte_count);
475 // Create user level datastructure for the mapping
476 struct vnode *page = slab_alloc(&pmap->slab);
478 page->is_vnode = false;
480 page->next = ptable->u.vnode.children;
481 ptable->u.vnode.children = page;
482 page->u.frame.cap = frame;
483 page->u.frame.flags = flags;
484 page->u.frame.pte_count = pte_count;
486 err = slot_alloc(&page->mapping);
487 if (err_is_fail(err)) {
488 return err_push(err, LIB_ERR_SLOT_ALLOC);
491 // Map entry into the page table
492 err = vnode_map(ptable->u.vnode.invokable, frame, entry,
493 pmap_flags, offset, pte_count,
495 if (err_is_fail(err)) {
496 errval_t err2 = slot_free(page->mapping);
497 if (err_is_fail(err2)) {
498 err = err_push(err, err2);
500 return err_push(err, LIB_ERR_VNODE_MAP);
505 static errval_t do_map(struct pmap_arm *pmap, genvaddr_t vaddr,
506 struct capref frame, size_t offset, size_t size,
507 vregion_flags_t flags, size_t *retoff, size_t *retsize)
513 // get base address and size of frame
514 struct frame_identity fi;
515 err = frame_identify(frame, &fi);
516 if (err_is_fail(err)) {
517 return err_push(err, LIB_ERR_PMAP_DO_MAP);
520 // determine mapping specific parts
521 if (flags & VREGION_FLAGS_LARGE &&
522 (vaddr & LARGE_PAGE_MASK) == 0 &&
523 fi.bytes >= LARGE_PAGE_SIZE &&
524 (fi.base & LARGE_PAGE_MASK) == 0) {
525 //section mapping (1MB)
526 page_size = LARGE_PAGE_SIZE;
527 offset_level = ARM_L1_OFFSET(vaddr);
528 #ifdef LIBBARRELFISH_DEBUG_PMAP
529 printf("do_map: large path\n");
530 printf("page_size: %zx, size: %zx\n", page_size, size);
534 page_size = BASE_PAGE_SIZE;
535 offset_level = ARM_L2_OFFSET(vaddr);
538 size = ROUND_UP(size, page_size);
539 size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
540 if (flags & VREGION_FLAGS_LARGE) {
541 #ifdef LIBBARRELFISH_DEBUG_PMAP
542 printf("#pages: 0x%zu\n", pte_count);
545 genvaddr_t vend = vaddr + size;
547 if (fi.bytes < size) {
548 return LIB_ERR_PMAP_FRAME_SIZE;
551 #ifdef LIBBARRELFISH_DEBUG_PMAP
552 printf("do_map: mapping %zu pages (size=%zx), from %zu.%zu\n",
553 pte_count, page_size, ARM_L1_OFFSET(vaddr), ARM_L2_OFFSET(vaddr));
554 printf("page_size: %zx, size: %zx\n", page_size, size);
557 //should be trivially true for section mappings
558 if ((ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend)) ||
559 flags & VREGION_FLAGS_LARGE) {
561 err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
562 if (err_is_fail(err)) {
563 DEBUG_ERR(err, "[do_map] in fast path");
564 return err_push(err, LIB_ERR_PMAP_DO_MAP);
566 } else { // multiple leaf page tables
568 uint32_t c = ARM_L2_MAX_ENTRIES - offset_level;
569 genvaddr_t temp_end = vaddr + c * page_size;
570 err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
571 if (err_is_fail(err)) {
572 return err_push(err, LIB_ERR_PMAP_DO_MAP);
576 while (ARM_L1_OFFSET(temp_end) < ARM_L1_OFFSET(vend)) { // update vars
578 temp_end = vaddr + ARM_L2_MAX_ENTRIES * page_size;
579 offset += c * page_size;
580 c = ARM_L2_MAX_ENTRIES;
583 err = do_single_map(pmap, vaddr, temp_end, frame, offset, ARM_L2_MAX_ENTRIES, flags);
584 if (err_is_fail(err)) {
585 return err_push(err, LIB_ERR_PMAP_DO_MAP);
589 // map remaining part
590 offset += c * page_size;
591 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(temp_end);
595 err = do_single_map(pmap, temp_end, vend, frame, offset, c, flags);
596 if (err_is_fail(err)) {
597 return err_push(err, LIB_ERR_PMAP_DO_MAP);
607 //has_vnode_debug = false;
611 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
613 for (size_t i = offset; i < offset + size; i += BASE_PAGE_SIZE) {
615 vaddr += BASE_PAGE_SIZE;
629 max_slabs_required(size_t bytes)
631 // Perform a slab allocation for every page (do_map -> slab_alloc)
632 size_t pages = DIVIDE_ROUND_UP(bytes, BASE_PAGE_SIZE);
633 // Perform a slab allocation for every L2 (get_ptable -> find_vnode)
634 size_t l2entries = DIVIDE_ROUND_UP(pages, ARM_L2_MAX_ENTRIES);
635 // Perform a slab allocation for every L1 (do_map -> find_vnode)
636 size_t l1entries = DIVIDE_ROUND_UP(l2entries, ARM_L1_MAX_ENTRIES);
637 return pages + l2entries + l1entries;
639 static size_t max_slabs_required_large(size_t bytes)
641 // always need only one slab, as we can represent any size section mapping
642 // in a single struct vnode.
647 * \brief Refill slabs used for metadata
649 * \param pmap The pmap to refill in
650 * \param request The number of slabs the allocator must have
651 * when the function returns
653 * When the current pmap is initialized,
654 * it reserves some virtual address space for metadata.
655 * This reserved address space is used here
657 * Can only be called for the current pmap
658 * Will recursively call into itself till it has enough slabs
661 static errval_t refill_slabs(struct pmap_arm *pmap, size_t request)
665 /* Keep looping till we have #request slabs */
666 while (slab_freecount(&pmap->slab) < request) {
667 // Amount of bytes required for #request
668 size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab),
669 sizeof(struct vnode));
671 /* Get a frame of that size */
673 err = frame_alloc(&cap, bytes, &bytes);
674 if (err_is_fail(err)) {
675 return err_push(err, LIB_ERR_FRAME_ALLOC);
678 /* If we do not have enough slabs to map the frame in, recurse */
679 size_t required_slabs_for_frame = max_slabs_required(bytes);
680 if (slab_freecount(&pmap->slab) < required_slabs_for_frame) {
681 // If we recurse, we require more slabs than to map a single page
682 assert(required_slabs_for_frame > 4);
684 err = refill_slabs(pmap, required_slabs_for_frame);
685 if (err_is_fail(err)) {
686 return err_push(err, LIB_ERR_SLAB_REFILL);
690 /* Perform mapping */
691 genvaddr_t genvaddr = pmap->vregion_offset;
692 pmap->vregion_offset += (genvaddr_t)bytes;
694 // if this assert fires, increase META_DATA_RESERVED_SPACE
695 assert(pmap->vregion_offset < (vregion_get_base_addr(&pmap->vregion) +
696 vregion_get_size(&pmap->vregion)));
698 err = do_map(pmap, genvaddr, cap, 0, bytes,
699 VREGION_FLAGS_READ_WRITE, NULL, NULL);
700 if (err_is_fail(err)) {
701 return err_push(err, LIB_ERR_PMAP_DO_MAP);
705 lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
706 slab_grow(&pmap->slab, (void*)buf, bytes);
713 * \brief Create page mappings
715 * \param pmap The pmap object
716 * \param vaddr The virtual address to create the mapping for
717 * \param frame The frame cap to map in
718 * \param offset Offset into the frame cap
719 * \param size Size of the mapping
720 * \param flags Flags for the mapping
721 * \param retoff If non-NULL, filled in with adjusted offset of mapped region
722 * \param retsize If non-NULL, filled in with adjusted size of mapped region
725 map(struct pmap *pmap,
730 vregion_flags_t flags,
734 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
739 size_t slabs_required;
741 struct frame_identity fi;
742 err = frame_identify(frame, &fi);
743 if (err_is_fail(err)) {
744 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
747 // adjust the mapping to be on page boundaries
748 if (flags & VREGION_FLAGS_LARGE &&
749 (vaddr & LARGE_PAGE_MASK) == 0 &&
750 fi.bytes >= LARGE_PAGE_SIZE &&
751 (fi.base & LARGE_PAGE_MASK) == 0) {
752 //section mapping (1MB)
753 base = LARGE_PAGE_OFFSET(offset);
754 page_size = LARGE_PAGE_SIZE;
755 slabs_required = max_slabs_required_large(size);
756 #ifdef LIBBARRELFISH_DEBUG_PMAP
757 printf("map: large path, page_size: %i, base: %i, slabs: %i, size: %i,"
758 "frame size: %zu\n", page_size, base, slabs_required, size, fi.bytes);
762 base = BASE_PAGE_OFFSET(offset);
763 page_size = BASE_PAGE_SIZE;
764 slabs_required = max_slabs_required(size);
767 size = ROUND_UP(size, page_size);
770 const size_t slabs_reserve = 3; // == max_slabs_required(1)
771 uint64_t slabs_free = slab_freecount(&pmap_arm->slab);
773 slabs_required += slabs_reserve;
775 if (slabs_required > slabs_free) {
776 if (get_current_pmap() == pmap) {
777 err = refill_slabs(pmap_arm, slabs_required);
778 if (err_is_fail(err)) {
779 return err_push(err, LIB_ERR_SLAB_REFILL);
783 size_t bytes = SLAB_STATIC_SIZE(slabs_required - slabs_free,
784 sizeof(struct vnode));
785 void *buf = malloc(bytes);
787 return LIB_ERR_MALLOC_FAIL;
789 slab_grow(&pmap_arm->slab, buf, bytes);
793 return do_map(pmap_arm, vaddr, frame, offset, size, flags,
797 static errval_t do_single_unmap(struct pmap_arm *pmap, genvaddr_t vaddr,
800 #ifdef LIBBARRELFISH_DEBUG_PMAP
801 debug_printf("%s: vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
802 __FUNCTION__, vaddr, pte_count);
805 struct vnode *pt = find_ptable(pmap, vaddr);
806 // pt->is_vnode == non-large mapping
807 if (pt && pt->is_vnode) {
808 // analog to do_single_map we use 10 bits for tracking pages in user space -SG
809 struct vnode *page = find_vnode(pt, ARM_L2_OFFSET(vaddr));
810 if (page && page->u.frame.pte_count == pte_count) {
811 #ifdef LIBBARRELFISH_DEBUG_PMAP
812 debug_printf("page unmap: pt entry: %zu, entry = %zu, pte_count = %hu\n",
813 pt->entry, page->entry, page->u.frame.pte_count);
815 err = vnode_unmap(pt->u.vnode.cap, page->mapping);
816 if (err_is_fail(err)) {
817 DEBUG_ERR(err, "vnode_unmap");
818 return err_push(err, LIB_ERR_VNODE_UNMAP);
821 // cleanup mapping cap
822 err = cap_delete(page->mapping);
823 if (err_is_fail(err)) {
824 DEBUG_ERR(err, "cap_delete");
825 return err_push(err, LIB_ERR_CAP_DELETE);
827 err = slot_free(page->mapping);
828 if (err_is_fail(err)) {
829 return err_push(err, LIB_ERR_SLOT_FREE);
832 remove_vnode(pt, page);
833 slab_free(&pmap->slab, page);
836 return LIB_ERR_PMAP_FIND_VNODE;
839 #ifdef LIBBARRELFISH_DEBUG_PMAP
840 debug_printf("section unmap: entry = %zu, pte_count = %zu\n",
841 pt->entry, pt->u.frame.kernel_pte_count);
843 err = vnode_unmap(pmap->root.u.vnode.cap, pt->mapping);
844 if (err_is_fail(err)) {
845 DEBUG_ERR(err, "vnode_unmap");
846 return err_push(err, LIB_ERR_VNODE_UNMAP);
849 // cleanup mapping cap
850 err = cap_delete(pt->mapping);
851 if (err_is_fail(err)) {
852 DEBUG_ERR(err, "cap_delete");
853 return err_push(err, LIB_ERR_CAP_DELETE);
855 err = slot_free(pt->mapping);
856 if (err_is_fail(err)) {
857 return err_push(err, LIB_ERR_SLOT_FREE);
860 remove_vnode(&pmap->root, pt);
861 slab_free(&pmap->slab, pt);
863 return LIB_ERR_PMAP_FIND_VNODE;
870 * \brief Remove page mappings
872 * \param pmap The pmap object
873 * \param vaddr The start of the virtual addres to remove
874 * \param size The size of virtual address to remove
875 * \param retsize If non-NULL, filled in with the actual size removed
878 unmap(struct pmap *pmap,
883 errval_t err, ret = SYS_ERR_OK;
884 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
885 size = ROUND_UP(size, BASE_PAGE_SIZE);
886 size_t pte_count = size / BASE_PAGE_SIZE;
887 genvaddr_t vend = vaddr + size;
889 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
891 #ifdef LIBBARRELFISH_DEBUG_PMAP
892 debug_printf("%s: fast path vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
893 __FUNCTION__, vaddr, pte_count);
895 err = do_single_unmap(pmap_arm, vaddr, pte_count);
896 if (err_is_fail(err)) {
897 return err_push(err, LIB_ERR_PMAP_UNMAP);
899 } else { // slow path
901 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
902 #ifdef LIBBARRELFISH_DEBUG_PMAP
903 debug_printf("%s: slow path 1st leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
904 __FUNCTION__, vaddr, c);
906 err = do_single_unmap(pmap_arm, vaddr, c);
907 if (err_is_fail(err)) {
908 return err_push(err, LIB_ERR_PMAP_UNMAP);
912 vaddr += c * BASE_PAGE_SIZE;
913 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
914 c = ARM_L2_MAX_ENTRIES;
915 #ifdef LIBBARRELFISH_DEBUG_PMAP
916 debug_printf("%s: slow path full leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
917 __FUNCTION__, vaddr, c);
919 err = do_single_unmap(pmap_arm, vaddr, c);
920 if (err_is_fail(err)) {
921 return err_push(err, LIB_ERR_PMAP_UNMAP);
923 vaddr += c * BASE_PAGE_SIZE;
926 // unmap remaining part
927 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
929 #ifdef LIBBARRELFISH_DEBUG_PMAP
930 debug_printf("%s: slow path last leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
931 __FUNCTION__, vaddr, c);
933 err = do_single_unmap(pmap_arm, vaddr, c);
934 if (err_is_fail(err)) {
935 return err_push(err, LIB_ERR_PMAP_UNMAP);
948 * \brief Determine a suitable address for a given memory object
950 * \param pmap The pmap object
951 * \param memobj The memory object to determine the address for
952 * \param alignment Minimum alignment
953 * \param vaddr Pointer to return the determined address
955 * Relies on vspace.c code maintaining an ordered list of vregions
958 determine_addr(struct pmap *pmap,
959 struct memobj *memobj,
963 assert(pmap->vspace->head);
965 if (alignment == 0) {
966 alignment = BASE_PAGE_SIZE;
968 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
970 size_t size = ROUND_UP(memobj->size, alignment);
972 struct vregion *walk = pmap->vspace->head;
973 while (walk->next) { // Try to insert between existing mappings
974 genvaddr_t walk_base = vregion_get_base_addr(walk);
975 genvaddr_t walk_size = ROUND_UP(vregion_get_size(walk), BASE_PAGE_SIZE);
976 genvaddr_t walk_end = ROUND_UP(walk_base + walk_size, alignment);
977 genvaddr_t next_base = vregion_get_base_addr(walk->next);
979 if (next_base > walk_end + size &&
980 walk_base + walk_size > VSPACE_BEGIN) { // Ensure mappings are larger than VSPACE_BEGIN
987 *vaddr = ROUND_UP((vregion_get_base_addr(walk)
988 + ROUND_UP(vregion_get_size(walk), alignment)),
993 /** \brief Retrieves an address that can currently be used for large mappings
996 static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
997 size_t alignment, genvaddr_t *retvaddr)
999 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
1001 struct vnode *walk_pdir = pmap_arm->root.u.vnode.children;
1002 assert(walk_pdir != NULL); // assume there's always at least one existing entry
1004 if (alignment == 0) {
1005 alignment = BASE_PAGE_SIZE;
1007 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1009 size = ROUND_UP(size, alignment);
1011 size_t free_count = DIVIDE_ROUND_UP(size, LARGE_PAGE_SIZE);
1012 //debug_printf("need %zu contiguous free pdirs\n", free_count);
1014 // compile pdir free list
1015 // barrelfish treats L1 as 1024 entries
1016 bool f[ARM_L1_MAX_ENTRIES];
1017 for (int i = 0; i < ARM_L1_MAX_ENTRIES; i++) {
1020 f[walk_pdir->entry] = false;
1022 assert(walk_pdir->is_vnode);
1023 f[walk_pdir->entry] = false;
1024 walk_pdir = walk_pdir->next;
1026 genvaddr_t first_free = 384;
1027 for (; first_free < 512; first_free++) {
1028 if (f[first_free]) {
1029 for (int i = 1; i < free_count; i++) {
1030 if (!f[first_free + i]) {
1032 first_free = first_free+i;
1039 assert(1 == 1);// make compiler shut up about label
1041 //printf("first free: %li\n", (uint32_t)first_free);
1042 if (first_free + free_count <= 512) {
1043 *retvaddr = first_free << 22;
1046 return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
1052 static errval_t do_single_modify_flags(struct pmap_arm *pmap, genvaddr_t vaddr,
1053 size_t pages, vregion_flags_t flags)
1055 errval_t err = SYS_ERR_OK;
1056 struct vnode *ptable = find_ptable(pmap, vaddr);
1057 uint16_t ptentry = ARM_L2_OFFSET(vaddr);
1059 struct vnode *page = find_vnode(ptable, ptentry);
1061 if (inside_region(ptable, ptentry, pages)) {
1062 // we're modifying part of a valid mapped region
1063 // arguments to invocation: invoke frame cap, first affected
1064 // page (as offset from first page in mapping), #affected
1065 // pages, new flags. Invocation should check compatibility of
1066 // new set of flags with cap permissions.
1067 size_t off = ptentry - page->entry;
1068 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
1069 // VA hinting NYI on ARM, so we always pass 0 for va_hint
1070 err = invoke_mapping_modify_flags(page->mapping,
1071 off, pages, pmap_flags, 0);
1072 printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n",
1073 err_getstring(err), err);
1076 // overlaps some region border
1077 return LIB_ERR_PMAP_EXISTING_MAPPING;
1085 * \brief Modify page mapping
1087 * \param pmap The pmap object
1088 * \param vaddr The virtual address to unmap
1089 * \param flags New flags for the mapping
1090 * \param retsize If non-NULL, filled in with the actual size modified
1093 modify_flags(struct pmap *pmap,
1096 vregion_flags_t flags,
1099 errval_t err, ret = SYS_ERR_OK;
1100 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
1101 size = ROUND_UP(size, BASE_PAGE_SIZE);
1102 size_t pte_count = size / BASE_PAGE_SIZE;
1103 genvaddr_t vend = vaddr + size;
1105 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
1107 err = do_single_modify_flags(pmap_arm, vaddr, pte_count, flags);
1108 if (err_is_fail(err)) {
1109 return err_push(err, LIB_ERR_PMAP_UNMAP);
1113 // modify flags in first leaf
1114 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
1115 err = do_single_modify_flags(pmap_arm, vaddr, c, flags);
1116 if (err_is_fail(err)) {
1117 return err_push(err, LIB_ERR_PMAP_UNMAP);
1120 // modify flags in full leaves
1121 vaddr += c * BASE_PAGE_SIZE;
1122 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
1123 c = ARM_L2_MAX_ENTRIES;
1124 err = do_single_modify_flags(pmap_arm, vaddr, c, flags);
1125 if (err_is_fail(err)) {
1126 return err_push(err, LIB_ERR_PMAP_UNMAP);
1128 vaddr += c * BASE_PAGE_SIZE;
1131 // modify flags in remaining part
1132 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
1134 err = do_single_modify_flags(pmap_arm, vaddr, c, flags);
1135 if (err_is_fail(err)) {
1136 return err_push(err, LIB_ERR_PMAP_UNMAP);
1149 * \brief Query existing page mapping
1151 * \param pmap The pmap object
1152 * \param vaddr The virtual address to query
1153 * \param retvaddr Returns the base virtual address of the mapping
1154 * \param retsize Returns the actual size of the mapping
1155 * \param retcap Returns the cap mapped at this address
1156 * \param retoffset Returns the offset within the cap that is mapped
1157 * \param retflags Returns the flags for this mapping
1159 * All of the ret parameters are optional.
1161 static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
1162 genvaddr_t *retvaddr, size_t *retsize,
1163 struct capref *retcap, genvaddr_t *retoffset,
1164 vregion_flags_t *retflags)
1172 serialise(struct pmap *pmap, void *buf, size_t buflen)
1174 // Unimplemented: ignored
1179 deserialise(struct pmap *pmap, void *buf, size_t buflen)
1181 // Unimplemented: we start with an empty pmap, and avoid the bottom of the A/S
1185 static struct pmap_funcs pmap_funcs = {
1186 .determine_addr = determine_addr,
1187 .determine_addr_raw = determine_addr_raw,
1190 .modify_flags = modify_flags,
1192 .serialise = serialise,
1193 .deserialise = deserialise,
1197 * \brief Initialize the pmap object
1200 pmap_init(struct pmap *pmap,
1201 struct vspace *vspace,
1202 struct capref vnode,
1203 struct slot_allocator *opt_slot_alloc)
1205 struct pmap_arm* pmap_arm = (struct pmap_arm*)pmap;
1207 /* Generic portion */
1208 pmap->f = pmap_funcs;
1209 pmap->vspace = vspace;
1211 // Slab allocator for vnodes
1212 slab_init(&pmap_arm->slab, sizeof(struct vnode), NULL);
1213 slab_grow(&pmap_arm->slab,
1214 pmap_arm->slab_buffer,
1215 sizeof(pmap_arm->slab_buffer));
1217 pmap_arm->root.is_vnode = true;
1218 pmap_arm->root.u.vnode.cap = vnode;
1219 if (get_croot_addr(vnode) != CPTR_ROOTCN) {
1220 /* non invokable root cnode; copy */
1221 errval_t err = slot_alloc(&pmap_arm->root.u.vnode.invokable);
1222 assert(err_is_ok(err));
1223 err = cap_copy(pmap_arm->root.u.vnode.invokable, vnode);
1224 assert(err_is_ok(err));
1226 pmap_arm->root.u.vnode.invokable= vnode;
1228 pmap_arm->root.next = NULL;
1229 pmap_arm->root.u.vnode.children = NULL;
1234 errval_t pmap_current_init(bool init_domain)
1236 struct pmap_arm *pmap_arm = (struct pmap_arm*)get_current_pmap();
1238 // To reserve a block of virtual address space,
1239 // a vregion representing the address space is required.
1240 // We construct a superficial one here and add it to the vregion list.
1241 struct vregion *vregion = &pmap_arm->vregion;
1242 assert((void*)vregion > (void*)pmap_arm);
1243 assert((void*)vregion < (void*)(pmap_arm + 1));
1244 vregion->vspace = NULL;
1245 vregion->memobj = NULL;
1246 vregion->base = VSPACE_BEGIN;
1247 vregion->offset = 0;
1248 vregion->size = META_DATA_RESERVED_SPACE;
1250 vregion->next = NULL;
1252 struct vspace *vspace = pmap_arm->p.vspace;
1253 assert(!vspace->head);
1254 vspace->head = vregion;
1256 pmap_arm->vregion_offset = pmap_arm->vregion.base;