3 * \brief Pmap code common for x86 archs
7 * Copyright (c) 2011, ETH Zurich.
8 * Copyright (c) 2014, HP Labs.
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
16 #include <barrelfish/barrelfish.h>
17 #include <barrelfish/pmap.h>
18 #include "target/x86/pmap_x86.h"
20 // this should work for x86_64 and x86_32.
21 bool has_vnode(struct vnode *root, uint32_t entry, size_t len,
25 assert(root->is_vnode);
28 uint32_t end_entry = entry + len;
30 // region we check [entry .. end_entry)
32 for (n = root->u.vnode.children; n; n = n->next) {
33 // n is page table, we need to check if it's anywhere inside the
34 // region to check [entry .. end_entry)
35 // this amounts to n->entry == entry for len = 1
36 if (n->is_vnode && n->entry >= entry && n->entry < end_entry) {
38 return has_vnode(n, 0, PTABLE_SIZE, true);
40 #ifdef LIBBARRELFISH_DEBUG_PMAP
41 debug_printf("1: found page table inside our region\n");
44 } else if (n->is_vnode) {
45 // all other vnodes do not overlap with us, so go to next
46 assert(n->entry < entry || n->entry >= end_entry);
49 // this remains the same regardless of `only_pages`.
50 // n is frame [n->entry .. end)
52 // 1) entry < n->entry && end_entry >= end --> n is a strict subset of
54 // 2) entry inside n (entry >= n->entry && entry < end)
55 // 3) end_entry inside n (end_entry >= n->entry && end_entry < end)
56 uint32_t end = n->entry + n->u.frame.pte_count;
57 if (entry < n->entry && end_entry >= end) {
58 #ifdef LIBBARRELFISH_DEBUG_PMAP
59 debug_printf("2: found a strict subset of our region: (%"
60 PRIu32"--%"PRIu32") < (%"PRIu32"--%"PRIu32")\n",
61 n->entry, end, entry, end_entry);
65 if (entry >= n->entry && entry < end) {
66 #ifdef LIBBARRELFISH_DEBUG_PMAP
67 debug_printf("3: found a region starting inside our region: (%"
68 PRIu32"--%"PRIu32") <> (%"PRIu32"--%"PRIu32")\n",
69 n->entry, end, entry, end_entry);
73 if (end_entry > n->entry && end_entry < end) {
74 #ifdef LIBBARRELFISH_DEBUG_PMAP
75 debug_printf("4: found a region ending inside our region: (%"
76 PRIu32"--%"PRIu32") <> (%"PRIu32"--%"PRIu32")\n",
77 n->entry, end, entry, end_entry);
87 * \brief Starting at a given root, return the vnode with entry equal to #entry
89 struct vnode *find_vnode(struct vnode *root, uint16_t entry)
92 assert(root->is_vnode);
95 for(n = root->u.vnode.children; n != NULL; n = n->next) {
96 if(n->entry == entry) {
103 bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
105 assert(root != NULL);
106 assert(root->is_vnode);
110 for (n = root->u.vnode.children; n; n = n->next) {
112 uint16_t end = n->entry + n->u.frame.pte_count;
113 if (n->entry <= entry && entry + npages <= end) {
122 void remove_vnode(struct vnode *root, struct vnode *item)
124 assert(root->is_vnode);
125 struct vnode *walk = root->u.vnode.children;
126 struct vnode *prev = NULL;
130 prev->next = walk->next;
133 root->u.vnode.children = walk->next;
140 USER_PANIC("Should not get here");
144 * \brief Allocates a new VNode, adding it to the page table and our metadata
146 errval_t alloc_vnode(struct pmap_x86 *pmap, struct vnode *root,
147 enum objtype type, uint32_t entry,
148 struct vnode **retvnode)
152 struct vnode *newvnode = slab_alloc(&pmap->slab);
153 if (newvnode == NULL) {
154 return LIB_ERR_SLAB_ALLOC_FAIL;
157 // The VNode capability
158 err = pmap->p.slot_alloc->alloc(pmap->p.slot_alloc, &newvnode->u.vnode.cap);
159 if (err_is_fail(err)) {
160 return err_push(err, LIB_ERR_SLOT_ALLOC);
163 err = vnode_create(newvnode->u.vnode.cap, type);
164 if (err_is_fail(err)) {
165 return err_push(err, LIB_ERR_VNODE_CREATE);
169 err = vnode_map(root->u.vnode.cap, newvnode->u.vnode.cap, entry,
170 PTABLE_ACCESS_DEFAULT, 0, 1);
171 if (err_is_fail(err)) {
172 return err_push(err, LIB_ERR_VNODE_MAP);
175 // The VNode meta data
176 newvnode->is_vnode = true;
177 newvnode->entry = entry;
178 newvnode->next = root->u.vnode.children;
179 root->u.vnode.children = newvnode;
180 newvnode->u.vnode.children = NULL;
182 *retvnode = newvnode;
186 void remove_empty_vnodes(struct pmap_x86 *pmap, struct vnode *root,
187 uint32_t entry, size_t len)
190 uint32_t end_entry = entry + len;
191 for (struct vnode *n = root->u.vnode.children; n; n = n->next) {
192 if (n->entry >= entry && n->entry < end_entry) {
193 // sanity check and skip leaf entries
197 // here we know that all vnodes we're interested in are
200 if (n->u.vnode.children) {
201 remove_empty_vnodes(pmap, n, 0, PTABLE_SIZE);
205 err = vnode_unmap(root->u.vnode.cap, n->u.vnode.cap, n->entry, 1);
206 if (err_is_fail(err)) {
207 debug_printf("remove_empty_vnodes: vnode_unmap: %s\n",
212 err = cap_destroy(n->u.vnode.cap);
213 if (err_is_fail(err)) {
214 debug_printf("remove_empty_vnodes: cap_destroy: %s\n",
218 // remove vnode from list
219 remove_vnode(root, n);
220 slab_free(&pmap->slab, n);
227 * The serialisation format is depressingly ad-hoc, and assumes a depth-first
228 * walk of the tree. Each vnode is encoded as an entry in an array.
230 * We abuse the slot of the first entry, which is the root and thus is always
231 * zero, to store the number of entries in the array.
233 struct serial_entry {
234 uint16_t entry; ///< Entry #
235 uint8_t depth; ///< Depth of this node (0 = root)
236 cslot_t slot; ///< Slot number (in page cnode) of vnode cap
239 static errval_t serialise_tree(int depth, struct vnode *v,
240 struct serial_entry *out,
241 size_t outlen, size_t *outpos)
246 // don't serialise leaf pages (yet!)
251 if (*outpos >= outlen) {
252 return LIB_ERR_SERIALISE_BUFOVERFLOW;
255 // serialise this node
256 out[(*outpos)++] = (struct serial_entry) {
259 .slot = v->u.vnode.cap.slot,
263 for (struct vnode *c = v->u.vnode.children; c != NULL; c = c->next) {
264 err = serialise_tree(depth + 1, c, out, outlen, outpos);
265 if (err_is_fail(err)) {
274 * \brief Serialise vtree to a flat structure, for passing to another process
276 * This is used by spawn_vspace to communicate the vnode capabilities to the child.
278 errval_t pmap_x86_serialise(struct pmap *pmap, void *buf, size_t buflen)
281 struct pmap_x86 *pmapx = (struct pmap_x86 *)pmap;
283 // XXX: check alignment of buffer
284 assert((uintptr_t)buf % sizeof(uintptr_t) == 0);
286 struct serial_entry *out = buf;
287 size_t outlen = buflen / sizeof(struct serial_entry);
290 err = serialise_tree(0, &pmapx->root, out, outlen, &outpos);
291 if (err_is_ok(err)) {
292 // store length in first entry's slot number
293 assert(out[0].slot == 0);
294 out[0].slot = outpos;
300 static errval_t deserialise_tree(struct pmap *pmap, struct serial_entry **in,
301 size_t *inlen, int depth, struct vnode *parent)
304 struct pmap_x86 *pmapx = (struct pmap_x86 *)pmap;
310 while (*inlen > 0 && (*in)->depth == depth) {
311 // ensure slab allocator has sufficient space
312 err = pmapx->refill_slabs(pmapx);
313 if (err_is_fail(err)) {
314 return err_push(err, LIB_ERR_SLAB_REFILL);
317 // allocate storage for the new vnode
318 struct vnode *n = slab_alloc(&pmapx->slab);
321 // populate it and append to parent's list of children
323 n->entry = (*in)->entry;
324 n->u.vnode.cap.cnode = cnode_page;
325 n->u.vnode.cap.slot = (*in)->slot;
326 n->u.vnode.children = NULL;
327 n->next = parent->u.vnode.children;
328 parent->u.vnode.children = n;
333 // is next entry a child of the last node?
334 if (*inlen > 0 && (*in)->depth > depth) {
335 assert((*in)->depth == depth + 1); // depth-first, no missing nodes
336 err = deserialise_tree(pmap, in, inlen, depth + 1, n);
337 if (err_is_fail(err)) {
343 assert((*in)->depth < depth);
348 * \brief Deserialise vtree from a flat structure, for importing from another process
350 * This is used in a newly-spawned child
352 errval_t pmap_x86_deserialise(struct pmap *pmap, void *buf, size_t buflen)
354 struct pmap_x86 *pmapx = (struct pmap_x86 *)pmap;
357 // XXX: check alignment of buffer
358 assert((uintptr_t)buf % sizeof(uintptr_t) == 0);
360 // extract length and sanity-check
361 struct serial_entry *in = buf;
362 assert(buflen > sizeof(struct serial_entry));
363 size_t inlen = in[0].slot;
364 assert(inlen * sizeof(struct serial_entry) <= buflen);
368 err = deserialise_tree(pmap, &in, &inlen, 1, &pmapx->root);
369 if (err_is_ok(err)) {
370 // XXX: now that we know where our vnodes are, we can support mappings
371 // in the bottom of the address space. However, we still don't know
372 // exactly where our text and data are mapped (because we don't yet
373 // serialise vregions or memobjs), so instead we pad _end.
375 pmapx->min_mappable_va = ROUND_UP((lvaddr_t)&_end, 64 * 1024)
385 * \brief Determine a suitable address for a given memory object
387 * \param pmap The pmap object
388 * \param memobj The memory object to determine the address for
389 * \param alignment Minimum alignment
390 * \param retvaddr Pointer to return the determined address
392 * Relies on vspace.c code maintaining an ordered list of vregions
394 errval_t pmap_x86_determine_addr(struct pmap *pmap, struct memobj *memobj,
395 size_t alignment, genvaddr_t *retvaddr)
397 struct pmap_x86 *pmapx = (struct pmap_x86 *)pmap;
400 struct vregion *walk = pmap->vspace->head;
401 assert(walk != NULL); // assume there's always at least one existing entry
403 if (alignment == 0) {
404 alignment = BASE_PAGE_SIZE;
406 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
408 size_t size = ROUND_UP(memobj->size, alignment);
410 // if there's space before the first object, map it there
411 genvaddr_t minva = ROUND_UP(pmapx->min_mappable_va, alignment);
412 if (minva + size <= vregion_get_base_addr(walk)) {
417 while (walk->next) { // Try to insert between existing mappings
418 genvaddr_t walk_base = vregion_get_base_addr(walk);
419 genvaddr_t walk_size = ROUND_UP(vregion_get_size(walk), BASE_PAGE_SIZE);
420 genvaddr_t walk_end = ROUND_UP(walk_base + walk_size, alignment);
421 genvaddr_t next_base = vregion_get_base_addr(walk->next);
423 // sanity-check for page alignment
424 assert(walk_base % BASE_PAGE_SIZE == 0);
425 assert(next_base % BASE_PAGE_SIZE == 0);
427 if (next_base > walk_end + size) {
435 // Place beyond last mapping, with alignment
436 vaddr = ROUND_UP((vregion_get_base_addr(walk)
437 + ROUND_UP(vregion_get_size(walk), BASE_PAGE_SIZE)),
441 // Ensure that we haven't run out of address space
442 if (vaddr + memobj->size > pmapx->max_mappable_va) {
443 return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
446 assert(retvaddr != NULL);