7 * Copyright (c) 2010-2013 ETH Zurich.
8 * Copyright (c) 2014, HP Labs.
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
18 #include <target/x86_64/paging_kernel_target.h>
19 #include <target/x86_64/offsets_target.h>
20 #include <paging_kernel_arch.h>
21 #include <mdb/mdb_tree.h>
23 #include <barrelfish_kpi/init.h>
24 #include <cap_predicates.h>
27 #include <target/k1om/offsets_target.h>
28 #define MEMORY_OFFSET K1OM_MEMORY_OFFSET
30 #include <target/x86_64/offsets_target.h>
31 #define MEMORY_OFFSET X86_64_MEMORY_OFFSET
34 /// Map within a x86_64 non leaf ptable
35 static errval_t x86_64_non_ptable(struct capability *dest, cslot_t slot,
36 struct capability *src, uintptr_t flags,
37 uintptr_t offset, size_t pte_count)
39 //printf("page_mappings_arch:x86_64_non_ptable\n");
40 if (slot >= X86_64_PTABLE_SIZE) { // Within pagetable
41 return SYS_ERR_VNODE_SLOT_INVALID;
44 if (type_is_vnode(src->type) && pte_count != 1) { // only allow single ptable mappings
45 printf("src type and count mismatch\n");
46 return SYS_ERR_VM_MAP_SIZE;
49 if (slot + pte_count > X86_64_PTABLE_SIZE) { // mapping size ok
50 printf("mapping size invalid (%zd)\n", pte_count);
51 return SYS_ERR_VM_MAP_SIZE;
55 paging_x86_64_flags_t flags_large = 0;
57 case ObjType_VNode_x86_64_pml4:
58 if (src->type != ObjType_VNode_x86_64_pdpt) { // Right mapping
59 printf("src type invalid\n");
60 return SYS_ERR_WRONG_MAPPING;
62 if(slot >= X86_64_PML4_BASE(MEMORY_OFFSET)) { // Kernel mapped here
63 return SYS_ERR_VNODE_SLOT_RESERVED;
66 case ObjType_VNode_x86_64_pdpt:
68 if (src->type != ObjType_VNode_x86_64_pdir) { // Right mapping
69 // TODO: check if the system allows 1GB mappings
70 page_size = X86_64_HUGE_PAGE_SIZE;
71 // check offset within frame
72 genpaddr_t off = offset;
74 if (off + pte_count * X86_64_HUGE_PAGE_SIZE > get_size(src)) {
75 return SYS_ERR_FRAME_OFFSET_INVALID;
77 // Calculate page access protection flags /
78 // Get frame cap rights
79 flags_large = paging_x86_64_cap_to_page_flags(src->rights);
80 // Mask with provided access rights mask
81 flags_large = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(flags));
82 // Add additional arch-specific flags
83 flags_large |= X86_64_PTABLE_FLAGS(flags);
84 // Unconditionally mark the page present
85 flags_large |= X86_64_PTABLE_PRESENT;
88 case ObjType_VNode_x86_64_pdir:
90 if (src->type != ObjType_VNode_x86_64_ptable) { // Right mapping
91 page_size = X86_64_LARGE_PAGE_SIZE;
93 // check offset within frame
94 genpaddr_t off = offset;
96 if (off + pte_count * X86_64_LARGE_PAGE_SIZE > get_size(src)) {
97 return SYS_ERR_FRAME_OFFSET_INVALID;
99 // Calculate page access protection flags /
100 // Get frame cap rights
101 flags_large = paging_x86_64_cap_to_page_flags(src->rights);
102 // Mask with provided access rights mask
103 flags_large = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(flags));
104 // Add additional arch-specific flags
105 flags_large |= X86_64_PTABLE_FLAGS(flags);
106 // Unconditionally mark the page present
107 flags_large |= X86_64_PTABLE_PRESENT;
112 printf("dest type invalid\n");
113 return SYS_ERR_DEST_TYPE_INVALID;
116 // Convert destination base address
117 genpaddr_t dest_gp = get_address(dest);
118 lpaddr_t dest_lp = gen_phys_to_local_phys(dest_gp);
119 lvaddr_t dest_lv = local_phys_to_mem(dest_lp);
120 // Convert source base address
121 genpaddr_t src_gp = get_address(src);
122 lpaddr_t src_lp = gen_phys_to_local_phys(src_gp);
125 struct cte *src_cte = cte_for_cap(src);
126 src_cte->mapping_info.pte = dest_lp + slot * sizeof(union x86_64_ptable_entry);
127 src_cte->mapping_info.pte_count = pte_count;
128 src_cte->mapping_info.offset = offset;
130 cslot_t last_slot = slot + pte_count;
131 for (; slot < last_slot; slot++, offset += page_size) {
133 union x86_64_pdir_entry *entry = (union x86_64_pdir_entry *)dest_lv + slot;
135 if (X86_64_IS_PRESENT(entry)) {
136 // cleanup mapping info
137 // TODO: cleanup already mapped pages
138 memset(&src_cte->mapping_info, 0, sizeof(struct mapping_info));
139 printf("slot in use\n");
140 return SYS_ERR_VNODE_SLOT_INUSE;
143 // determine if we map a large/huge page or a normal entry
144 if (page_size == X86_64_LARGE_PAGE_SIZE)
146 //a large page is mapped
147 paging_x86_64_map_large((union x86_64_ptable_entry *)entry, src_lp + offset, flags_large);
148 } else if (page_size == X86_64_HUGE_PAGE_SIZE) {
149 // a huge page is mapped
150 paging_x86_64_map_huge((union x86_64_ptable_entry *)entry, src_lp + offset, flags_large);
152 //a normal paging structure entry is mapped
153 paging_x86_64_map_table(entry, src_lp + offset);
160 /// Map within a x86_64 ptable
161 static errval_t x86_64_ptable(struct capability *dest, cslot_t slot,
162 struct capability *src, uintptr_t mflags,
163 uintptr_t offset, size_t pte_count)
165 //printf("page_mappings_arch:x86_64_ptable\n");
166 if (slot >= X86_64_PTABLE_SIZE) { // Within pagetable
167 printf(" vnode_invalid\n");
168 return SYS_ERR_VNODE_SLOT_INVALID;
171 if (slot + pte_count > X86_64_PTABLE_SIZE) { // mapping size ok
172 printf("mapping size invalid (%zd)\n", pte_count);
173 return SYS_ERR_VM_MAP_SIZE;
176 if (src->type != ObjType_Frame &&
177 src->type != ObjType_DevFrame) { // Right mapping
178 printf("src type invalid\n");
179 return SYS_ERR_WRONG_MAPPING;
182 // check offset within frame
183 genpaddr_t off = offset;
184 if (off + pte_count * X86_64_BASE_PAGE_SIZE > get_size(src)) {
185 printf("frame offset invalid\n");
186 return SYS_ERR_FRAME_OFFSET_INVALID;
190 /* Calculate page access protection flags */
191 // Get frame cap rights
192 paging_x86_64_flags_t flags =
193 paging_x86_64_cap_to_page_flags(src->rights);
194 // Mask with provided access rights mask
195 flags = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(mflags));
196 // Add additional arch-specific flags
197 flags |= X86_64_PTABLE_FLAGS(mflags);
198 // Unconditionally mark the page present
199 flags |= X86_64_PTABLE_PRESENT;
201 // Convert destination base address
202 genpaddr_t dest_gp = get_address(dest);
203 lpaddr_t dest_lp = gen_phys_to_local_phys(dest_gp);
204 lvaddr_t dest_lv = local_phys_to_mem(dest_lp);
205 // Convert source base address
206 genpaddr_t src_gp = get_address(src);
207 lpaddr_t src_lp = gen_phys_to_local_phys(src_gp);
209 struct cte *src_cte = cte_for_cap(src);
210 src_cte->mapping_info.pte = dest_lp + slot * sizeof(union x86_64_ptable_entry);
211 src_cte->mapping_info.pte_count = pte_count;
212 src_cte->mapping_info.offset = offset;
214 cslot_t last_slot = slot + pte_count;
215 for (; slot < last_slot; slot++, offset += X86_64_BASE_PAGE_SIZE) {
216 union x86_64_ptable_entry *entry =
217 (union x86_64_ptable_entry *)dest_lv + slot;
219 /* FIXME: Flush TLB if the page is already present
220 * in the meantime, since we don't do this, we just fail to avoid
221 * ever reusing a VA mapping */
222 if (X86_64_IS_PRESENT(entry)) {
223 // TODO: cleanup already mapped pages
224 memset(&src_cte->mapping_info, 0, sizeof(struct mapping_info));
225 debug(LOG_WARN, "Trying to remap an already-present page is NYI, but "
226 "this is most likely a user-space bug!\n");
227 return SYS_ERR_VNODE_SLOT_INUSE;
230 // Carry out the page mapping
231 paging_x86_64_map(entry, src_lp + offset, flags);
237 typedef errval_t (*mapping_handler_t)(struct capability *dest_cap,
239 struct capability *src_cap,
240 uintptr_t flags, uintptr_t offset,
243 /// Dispatcher table for the type of mapping to create
244 static mapping_handler_t handler[ObjType_Num] = {
245 [ObjType_VNode_x86_64_pml4] = x86_64_non_ptable,
246 [ObjType_VNode_x86_64_pdpt] = x86_64_non_ptable,
247 [ObjType_VNode_x86_64_pdir] = x86_64_non_ptable,
248 [ObjType_VNode_x86_64_ptable] = x86_64_ptable,
252 #define DIAGNOSTIC_ON_ERROR 1
253 #define RETURN_ON_ERROR 1
255 /// Create page mappings
256 errval_t caps_copy_to_vnode(struct cte *dest_vnode_cte, cslot_t dest_slot,
257 struct cte *src_cte, uintptr_t flags,
258 uintptr_t offset, uintptr_t pte_count)
260 assert(type_is_vnode(dest_vnode_cte->cap.type));
262 struct capability *src_cap = &src_cte->cap;
263 struct capability *dest_cap = &dest_vnode_cte->cap;
264 mapping_handler_t handler_func = handler[dest_cap->type];
266 assert(handler_func != NULL);
269 genpaddr_t paddr = get_address(&src_cte->cap) + offset;
271 compile_vaddr(dest_vnode_cte, dest_slot, &vaddr);
272 printf("mapping 0x%"PRIxGENPADDR" to 0x%"PRIxGENVADDR"\n", paddr, vaddr);
275 if (src_cte->mapping_info.pte) {
276 // this cap is already mapped
277 #if DIAGNOSTIC_ON_ERROR
278 printf("caps_copy_to_vnode: this copy is already mapped @pte 0x%lx (paddr = 0x%"PRIxGENPADDR")\n", src_cte->mapping_info.pte, get_address(src_cap));
281 return SYS_ERR_VM_ALREADY_MAPPED;
285 cslot_t last_slot = dest_slot + pte_count;
287 if (last_slot > X86_64_PTABLE_SIZE) {
288 // requested map overlaps leaf page table
289 #if DIAGNOSTIC_ON_ERROR
290 printf("caps_copy_to_vnode: requested mapping spans multiple leaf page tables\n");
293 return SYS_ERR_VM_RETRY_SINGLE;
297 errval_t r = handler_func(dest_cap, dest_slot, src_cap, flags, offset, pte_count);
298 if (err_is_fail(r)) {
299 printf("caps_copy_to_vnode: handler func returned %ld\n", r);
303 printf("mapping_info.pte = 0x%lx\n", src_cte->mapping_info.pte);
304 printf("mapping_info.offset = 0x%lx\n", src_cte->mapping_info.offset);
305 printf("mapping_info.pte_count = %zu\n", src_cte->mapping_info.pte_count);
311 static inline void read_pt_entry(struct capability *pgtable, size_t slot,
312 genpaddr_t *mapped_addr, lpaddr_t *pte,
315 assert(type_is_vnode(pgtable->type));
321 genpaddr_t gp = get_address(pgtable);
322 lpaddr_t lp = gen_phys_to_local_phys(gp);
323 lvaddr_t lv = local_phys_to_mem(lp);
326 switch (pgtable->type) {
327 case ObjType_VNode_x86_64_pml4:
328 case ObjType_VNode_x86_64_pdpt:
329 case ObjType_VNode_x86_64_pdir: {
330 union x86_64_pdir_entry *e =
331 (union x86_64_pdir_entry *)lv + slot;
332 paddr = (lpaddr_t)e->d.base_addr << BASE_PAGE_BITS;
334 pte_ = lp + slot * sizeof(union x86_64_pdir_entry);
337 case ObjType_VNode_x86_64_ptable: {
338 union x86_64_ptable_entry *e =
339 (union x86_64_ptable_entry *)lv + slot;
340 paddr = (lpaddr_t)e->base.base_addr << BASE_PAGE_BITS;
342 pte_ = lp + slot * sizeof(union x86_64_ptable_entry);
346 assert(!"Should not get here");
350 *mapped_addr = paddr;
360 __attribute__((unused))
361 static inline lvaddr_t get_leaf_ptable_for_vaddr(genvaddr_t vaddr)
363 lvaddr_t root_pt = local_phys_to_mem(dcb_current->vspace);
366 union x86_64_pdir_entry *pdpt = (union x86_64_pdir_entry *)root_pt + X86_64_PML4_BASE(vaddr);
367 if (!pdpt->raw) { return 0; }
368 genpaddr_t pdpt_gp = pdpt->d.base_addr << BASE_PAGE_BITS;
369 lvaddr_t pdpt_lv = local_phys_to_mem(gen_phys_to_local_phys(pdpt_gp));
371 union x86_64_pdir_entry *pdir = (union x86_64_pdir_entry *)pdpt_lv + X86_64_PDPT_BASE(vaddr);
372 if (!pdir->raw) { return 0; }
373 genpaddr_t pdir_gp = pdir->d.base_addr << BASE_PAGE_BITS;
374 lvaddr_t pdir_lv = local_phys_to_mem(gen_phys_to_local_phys(pdir_gp));
376 union x86_64_ptable_entry *ptable = (union x86_64_ptable_entry *)pdir_lv + X86_64_PDIR_BASE(vaddr);
377 if (!ptable->raw) { return 0; }
378 genpaddr_t ptable_gp = ptable->base.base_addr << BASE_PAGE_BITS;
379 lvaddr_t ptable_lv = local_phys_to_mem(gen_phys_to_local_phys(ptable_gp));
384 size_t do_unmap(lvaddr_t pt, cslot_t slot, size_t num_pages)
386 // iterate over affected leaf ptables
387 size_t unmapped_pages = 0;
388 union x86_64_ptable_entry *ptentry = (union x86_64_ptable_entry *)pt + slot;
389 for (int i = 0; i < num_pages; i++) {
393 return unmapped_pages;
396 errval_t page_mappings_unmap(struct capability *pgtable, struct cte *mapping,
397 size_t slot, size_t num_pages)
399 assert(type_is_vnode(pgtable->type));
401 debug(SUBSYS_PAGING, "page_mappings_unmap(%zd pages)\n", num_pages);
403 // get page table entry data
406 read_pt_entry(pgtable, slot, &paddr, NULL, NULL);
407 lvaddr_t pt = local_phys_to_mem(gen_phys_to_local_phys(get_address(pgtable)));
409 // get virtual address of first page
410 // TODO: error checking
412 bool tlb_flush_necessary = true;
413 struct cte *leaf_pt = cte_for_cap(pgtable);
414 err = compile_vaddr(leaf_pt, slot, &vaddr);
415 if (err_is_fail(err)) {
416 if (err_no(err) == SYS_ERR_VNODE_NOT_INSTALLED) {
417 debug(SUBSYS_PAGING, "couldn't reconstruct virtual address\n");
418 } else if (err_no(err) == SYS_ERR_VNODE_SLOT_INVALID
419 && leaf_pt->mapping_info.pte == 0) {
420 debug(SUBSYS_PAGING, "unmapping in floating page table; not flushing TLB\n");
421 tlb_flush_necessary = false;
427 if (num_pages != mapping->mapping_info.pte_count) {
428 // want to unmap a different amount of pages than was mapped
429 return SYS_ERR_VM_MAP_SIZE;
432 do_unmap(pt, slot, num_pages);
434 // flush TLB for unmapped pages if we got a valid virtual address
435 // TODO: heuristic that decides if selective or full flush is more
437 if (tlb_flush_necessary) {
438 if (num_pages > 1 || err_is_fail(err)) {
441 do_one_tlb_flush(vaddr);
445 // update mapping info
446 memset(&mapping->mapping_info, 0, sizeof(struct mapping_info));
451 errval_t page_mappings_modify_flags(struct capability *frame, size_t offset,
452 size_t pages, size_t mflags)
454 struct cte *mapping = cte_for_cap(frame);
455 struct mapping_info *info = &mapping->mapping_info;
457 /* Calculate page access protection flags */
458 // Get frame cap rights
459 paging_x86_64_flags_t flags =
460 paging_x86_64_cap_to_page_flags(frame->rights);
461 // Mask with provided access rights mask
462 flags = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(mflags));
463 // Add additional arch-specific flags
464 flags |= X86_64_PTABLE_FLAGS(mflags);
465 // Unconditionally mark the page present
466 flags |= X86_64_PTABLE_PRESENT;
468 /* Calculate location of page table entries we need to modify */
469 lvaddr_t base = local_phys_to_mem(info->pte) + offset;
471 for (int i = 0; i < pages; i++) {
472 union x86_64_ptable_entry *entry =
473 (union x86_64_ptable_entry *)base + i;
474 paging_x86_64_modify_flags(entry, flags);
477 /* do full TLB flush */
482 void paging_dump_tables(struct dcb *dispatcher)
484 lvaddr_t root_pt = local_phys_to_mem(dispatcher->vspace);
487 union x86_64_ptable_entry *pt;
488 size_t kernel_pml4e = X86_64_PML4_BASE(X86_64_MEMORY_OFFSET);
489 for (int pdpt_index = 0; pdpt_index < kernel_pml4e; pdpt_index++) {
490 union x86_64_pdir_entry *pdpt = (union x86_64_pdir_entry *)root_pt + pdpt_index;
491 if (!pdpt->raw) { continue; }
492 genpaddr_t pdpt_gp = pdpt->d.base_addr << BASE_PAGE_BITS;
493 lvaddr_t pdpt_lv = local_phys_to_mem(gen_phys_to_local_phys(pdpt_gp));
495 for (int pdir_index = 0; pdir_index < X86_64_PTABLE_SIZE; pdir_index++) {
497 union x86_64_pdir_entry *pdir = (union x86_64_pdir_entry *)pdpt_lv + pdir_index;
498 pt = (union x86_64_ptable_entry*)pdir;
499 if (!pdir->raw) { continue; }
500 // check if pdir or huge page
501 if (pt->huge.always1) {
502 // is huge page mapping
503 genpaddr_t paddr = (genpaddr_t)pt->huge.base_addr << HUGE_PAGE_BITS;
504 printf("%d.%d: 0x%"PRIxGENPADDR"\n", pdpt_index, pdir_index, paddr);
505 // goto next pdpt entry
508 genpaddr_t pdir_gp = pdir->d.base_addr << BASE_PAGE_BITS;
509 lvaddr_t pdir_lv = local_phys_to_mem(gen_phys_to_local_phys(pdir_gp));
511 for (int ptable_index = 0; ptable_index < X86_64_PTABLE_SIZE; ptable_index++) {
513 union x86_64_pdir_entry *ptable = (union x86_64_pdir_entry *)pdir_lv + ptable_index;
514 pt = (union x86_64_ptable_entry *)ptable;
515 if (!ptable->raw) { continue; }
516 // check if ptable or large page
517 if (pt->large.always1) {
518 // is large page mapping
519 genpaddr_t paddr = (genpaddr_t)pt->large.base_addr << LARGE_PAGE_BITS;
520 printf("%d.%d.%d: 0x%"PRIxGENPADDR"\n", pdpt_index, pdir_index, ptable_index, paddr);
521 // goto next pdir entry
524 genpaddr_t ptable_gp = ptable->d.base_addr << BASE_PAGE_BITS;
525 lvaddr_t ptable_lv = local_phys_to_mem(gen_phys_to_local_phys(ptable_gp));
527 for (int entry = 0; entry < X86_64_PTABLE_SIZE; entry++) {
528 union x86_64_ptable_entry *e =
529 (union x86_64_ptable_entry *)ptable_lv + entry;
530 genpaddr_t paddr = (genpaddr_t)e->base.base_addr << BASE_PAGE_BITS;
534 printf("%d.%d.%d.%d: 0x%"PRIxGENPADDR"\n", pdpt_index, pdir_index, ptable_index, entry, paddr);