7 * Copyright (c) 2010-2013 ETH Zurich.
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
17 #include <target/x86_64/paging_kernel_target.h>
18 #include <target/x86_64/offsets_target.h>
19 #include <paging_kernel_arch.h>
20 #include <mdb/mdb_tree.h>
22 #include <barrelfish_kpi/init.h>
23 #include <cap_predicates.h>
25 static inline struct cte *cte_for_cap(struct capability *cap)
27 return (struct cte *) (cap - offsetof(struct cte, cap));
30 /// Map within a x86_64 non leaf ptable
31 static errval_t x86_64_non_ptable(struct capability *dest, cslot_t slot,
32 struct capability *src, uintptr_t flags,
33 uintptr_t offset, size_t pte_count)
35 //printf("page_mappings_arch:x86_64_non_ptable\n");
36 if (slot >= X86_64_PTABLE_SIZE) { // Within pagetable
37 return SYS_ERR_VNODE_SLOT_INVALID;
40 if (type_is_vnode(src->type) && pte_count != 1) { // only allow single ptable mappings
41 printf("src type and count mismatch\n");
42 return SYS_ERR_VM_MAP_SIZE;
45 if (slot + pte_count > X86_64_PTABLE_SIZE) { // mapping size ok
46 printf("mapping size invalid (%zd)\n", pte_count);
47 return SYS_ERR_VM_MAP_SIZE;
51 paging_x86_64_flags_t flags_large = 0;
53 case ObjType_VNode_x86_64_pml4:
54 if (src->type != ObjType_VNode_x86_64_pdpt) { // Right mapping
55 printf("src type invalid\n");
56 return SYS_ERR_WRONG_MAPPING;
58 if(slot >= X86_64_PML4_BASE(X86_64_MEMORY_OFFSET)) { // Kernel mapped here
59 return SYS_ERR_VNODE_SLOT_RESERVED;
62 case ObjType_VNode_x86_64_pdpt:
63 // TODO: huge page support, set page_size to HUGE_PAGE_SIZE
64 if (src->type != ObjType_VNode_x86_64_pdir) { // Right mapping
65 // TODO: check if the system allows 1GB mappings
66 page_size = X86_64_HUGE_PAGE_SIZE;
69 // check offset within frame
70 genpaddr_t off = offset;
72 if (off + pte_count * X86_64_HUGE_PAGE_SIZE > get_size(src)) {
73 return SYS_ERR_FRAME_OFFSET_INVALID;
75 // Calculate page access protection flags /
76 // Get frame cap rights
77 flags_large = paging_x86_64_cap_to_page_flags(src->rights);
78 // Mask with provided access rights mask
79 flags_large = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(flags));
80 // Add additional arch-specific flags
81 flags_large |= X86_64_PTABLE_FLAGS(flags);
82 // Unconditionally mark the page present
83 flags_large |= X86_64_PTABLE_PRESENT;
86 case ObjType_VNode_x86_64_pdir:
87 // superpage support, set page_size to LARGE_PAGE_SIZE
88 if (src->type != ObjType_VNode_x86_64_ptable) { // Right mapping
89 printf("2m page ------\n");
90 page_size = X86_64_LARGE_PAGE_SIZE;
93 // check offset within frame
94 genpaddr_t off = offset;
96 if (off + pte_count * X86_64_LARGE_PAGE_SIZE > get_size(src)) {
97 return SYS_ERR_FRAME_OFFSET_INVALID;
99 // Calculate page access protection flags /
100 // Get frame cap rights
101 flags_large = paging_x86_64_cap_to_page_flags(src->rights);
102 // Mask with provided access rights mask
103 flags_large = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(flags));
104 // Add additional arch-specific flags
105 flags_large |= X86_64_PTABLE_FLAGS(flags);
106 // Unconditionally mark the page present
107 flags_large |= X86_64_PTABLE_PRESENT;
112 printf("dest type invalid\n");
113 return SYS_ERR_DEST_TYPE_INVALID;
116 // Convert destination base address
117 genpaddr_t dest_gp = get_address(dest);
118 lpaddr_t dest_lp = gen_phys_to_local_phys(dest_gp);
119 lvaddr_t dest_lv = local_phys_to_mem(dest_lp);
120 // Convert source base address
121 genpaddr_t src_gp = get_address(src);
122 lpaddr_t src_lp = gen_phys_to_local_phys(src_gp);
125 struct cte *src_cte = cte_for_cap(src);
126 src_cte->mapping_info.pte = dest_lp + slot * sizeof(union x86_64_ptable_entry);
127 src_cte->mapping_info.pte_count = pte_count;
128 src_cte->mapping_info.offset = offset;
130 cslot_t last_slot = slot + pte_count;
131 for (; slot < last_slot; slot++, offset += page_size) {
133 union x86_64_pdir_entry *entry = (union x86_64_pdir_entry *)dest_lv + slot;
135 if (X86_64_IS_PRESENT(entry)) {
136 // cleanup mapping info
137 // TODO: cleanup already mapped pages
138 memset(&src_cte->mapping_info, 0, sizeof(struct mapping_info));
139 printf("slot in use\n");
140 return SYS_ERR_VNODE_SLOT_INUSE;
142 if (page_size == X86_64_LARGE_PAGE_SIZE)
144 //a large page is mapped
145 paging_x86_64_map_large((union x86_64_ptable_entry *)entry, src_lp + offset, flags_large);
146 } else if (page_size == X86_64_HUGE_PAGE_SIZE) {
147 // a huge page is mapped
148 paging_x86_64_map_huge((union x86_64_ptable_entry *)entry, src_lp + offset, flags_large);
150 //a normal paging structure entry is mapped
151 paging_x86_64_map_table(entry, src_lp + offset);
158 /// Map within a x86_64 ptable
159 static errval_t x86_64_ptable(struct capability *dest, cslot_t slot,
160 struct capability *src, uintptr_t mflags,
161 uintptr_t offset, size_t pte_count)
163 //printf("page_mappings_arch:x86_64_ptable\n");
164 if (slot >= X86_64_PTABLE_SIZE) { // Within pagetable
165 printf(" vnode_invalid\n");
166 return SYS_ERR_VNODE_SLOT_INVALID;
169 if (slot + pte_count > X86_64_PTABLE_SIZE) { // mapping size ok
170 printf("mapping size invalid (%zd)\n", pte_count);
171 return SYS_ERR_VM_MAP_SIZE;
174 if (src->type != ObjType_Frame &&
175 src->type != ObjType_DevFrame) { // Right mapping
176 printf("src type invalid\n");
177 return SYS_ERR_WRONG_MAPPING;
180 // check offset within frame
181 genpaddr_t off = offset;
182 if (off + pte_count * X86_64_BASE_PAGE_SIZE > get_size(src)) {
183 printf("frame offset invalid\n");
184 return SYS_ERR_FRAME_OFFSET_INVALID;
188 /* Calculate page access protection flags */
189 // Get frame cap rights
190 paging_x86_64_flags_t flags =
191 paging_x86_64_cap_to_page_flags(src->rights);
192 // Mask with provided access rights mask
193 flags = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(mflags));
194 // Add additional arch-specific flags
195 flags |= X86_64_PTABLE_FLAGS(mflags);
196 // Unconditionally mark the page present
197 flags |= X86_64_PTABLE_PRESENT;
199 // Convert destination base address
200 genpaddr_t dest_gp = get_address(dest);
201 lpaddr_t dest_lp = gen_phys_to_local_phys(dest_gp);
202 lvaddr_t dest_lv = local_phys_to_mem(dest_lp);
203 // Convert source base address
204 genpaddr_t src_gp = get_address(src);
205 lpaddr_t src_lp = gen_phys_to_local_phys(src_gp);
207 struct cte *src_cte = cte_for_cap(src);
208 src_cte->mapping_info.pte = dest_lp + slot * sizeof(union x86_64_ptable_entry);
209 src_cte->mapping_info.pte_count = pte_count;
210 src_cte->mapping_info.offset = offset;
212 cslot_t last_slot = slot + pte_count;
213 for (; slot < last_slot; slot++, offset += X86_64_BASE_PAGE_SIZE) {
214 union x86_64_ptable_entry *entry =
215 (union x86_64_ptable_entry *)dest_lv + slot;
217 /* FIXME: Flush TLB if the page is already present
218 * in the meantime, since we don't do this, we just fail to avoid
219 * ever reusing a VA mapping */
220 if (X86_64_IS_PRESENT(entry)) {
221 // TODO: cleanup already mapped pages
222 memset(&src_cte->mapping_info, 0, sizeof(struct mapping_info));
223 debug(LOG_WARN, "Trying to remap an already-present page is NYI, but "
224 "this is most likely a user-space bug!\n");
225 return SYS_ERR_VNODE_SLOT_INUSE;
228 // Carry out the page mapping
229 paging_x86_64_map(entry, src_lp + offset, flags);
235 typedef errval_t (*mapping_handler_t)(struct capability *dest_cap,
237 struct capability *src_cap,
238 uintptr_t flags, uintptr_t offset,
241 /// Dispatcher table for the type of mapping to create
242 static mapping_handler_t handler[ObjType_Num] = {
243 [ObjType_VNode_x86_64_pml4] = x86_64_non_ptable,
244 [ObjType_VNode_x86_64_pdpt] = x86_64_non_ptable,
245 [ObjType_VNode_x86_64_pdir] = x86_64_non_ptable,
246 [ObjType_VNode_x86_64_ptable] = x86_64_ptable,
250 #define DIAGNOSTIC_ON_ERROR 1
251 #define RETURN_ON_ERROR 1
253 /// Create page mappings
254 errval_t caps_copy_to_vnode(struct cte *dest_vnode_cte, cslot_t dest_slot,
255 struct cte *src_cte, uintptr_t flags,
256 uintptr_t offset, uintptr_t pte_count)
258 assert(type_is_vnode(dest_vnode_cte->cap.type));
260 struct capability *src_cap = &src_cte->cap;
261 struct capability *dest_cap = &dest_vnode_cte->cap;
262 mapping_handler_t handler_func = handler[dest_cap->type];
264 assert(handler_func != NULL);
267 genpaddr_t paddr = get_address(&src_cte->cap) + offset;
269 compile_vaddr(dest_vnode_cte, dest_slot, &vaddr);
270 printf("mapping 0x%"PRIxGENPADDR" to 0x%"PRIxGENVADDR"\n", paddr, vaddr);
273 if (src_cte->mapping_info.pte) {
274 // this cap is already mapped
275 #if DIAGNOSTIC_ON_ERROR
276 printf("caps_copy_to_vnode: this copy is already mapped @pte 0x%lx (paddr = 0x%"PRIxGENPADDR")\n", src_cte->mapping_info.pte, get_address(src_cap));
279 return SYS_ERR_VM_ALREADY_MAPPED;
283 cslot_t last_slot = dest_slot + pte_count;
285 if (last_slot > X86_64_PTABLE_SIZE) {
286 // requested map overlaps leaf page table
287 #if DIAGNOSTIC_ON_ERROR
288 printf("caps_copy_to_vnode: requested mapping spans multiple leaf page tables\n");
291 return SYS_ERR_VM_RETRY_SINGLE;
295 errval_t r = handler_func(dest_cap, dest_slot, src_cap, flags, offset, pte_count);
296 if (err_is_fail(r)) {
297 printf("caps_copy_to_vnode: handler func returned %ld\n", r);
301 printf("mapping_info.pte = 0x%lx\n", src_cte->mapping_info.pte);
302 printf("mapping_info.offset = 0x%lx\n", src_cte->mapping_info.offset);
303 printf("mapping_info.pte_count = %zu\n", src_cte->mapping_info.pte_count);
309 static inline void read_pt_entry(struct capability *pgtable, size_t slot,
310 genpaddr_t *mapped_addr, lpaddr_t *pte,
313 assert(type_is_vnode(pgtable->type));
319 genpaddr_t gp = get_address(pgtable);
320 lpaddr_t lp = gen_phys_to_local_phys(gp);
321 lvaddr_t lv = local_phys_to_mem(lp);
324 switch (pgtable->type) {
325 case ObjType_VNode_x86_64_pml4:
326 case ObjType_VNode_x86_64_pdpt:
327 case ObjType_VNode_x86_64_pdir: {
328 union x86_64_pdir_entry *e =
329 (union x86_64_pdir_entry *)lv + slot;
330 paddr = (lpaddr_t)e->d.base_addr << BASE_PAGE_BITS;
332 pte_ = lp + slot * sizeof(union x86_64_pdir_entry);
335 case ObjType_VNode_x86_64_ptable: {
336 union x86_64_ptable_entry *e =
337 (union x86_64_ptable_entry *)lv + slot;
338 paddr = (lpaddr_t)e->base.base_addr << BASE_PAGE_BITS;
340 pte_ = lp + slot * sizeof(union x86_64_ptable_entry);
344 assert(!"Should not get here");
348 *mapped_addr = paddr;
358 static inline void clear_pt_entry(lvaddr_t pte) {
359 ((union x86_64_pdir_entry *)pte)->raw = 0;
362 static inline lvaddr_t get_leaf_ptable_for_vaddr(genvaddr_t vaddr)
364 lvaddr_t root_pt = local_phys_to_mem(dcb_current->vspace);
367 union x86_64_pdir_entry *pdpt = (union x86_64_pdir_entry *)root_pt + X86_64_PML4_BASE(vaddr);
368 if (!pdpt->raw) { return 0; }
369 genpaddr_t pdpt_gp = pdpt->d.base_addr << BASE_PAGE_BITS;
370 lvaddr_t pdpt_lv = local_phys_to_mem(gen_phys_to_local_phys(pdpt_gp));
372 union x86_64_pdir_entry *pdir = (union x86_64_pdir_entry *)pdpt_lv + X86_64_PDPT_BASE(vaddr);
373 if (!pdir->raw) { return 0; }
374 genpaddr_t pdir_gp = pdir->d.base_addr << BASE_PAGE_BITS;
375 lvaddr_t pdir_lv = local_phys_to_mem(gen_phys_to_local_phys(pdir_gp));
377 union x86_64_ptable_entry *ptable = (union x86_64_ptable_entry *)pdir_lv + X86_64_PDIR_BASE(vaddr);
378 if (!ptable->raw) { return 0; }
379 genpaddr_t ptable_gp = ptable->base.base_addr << BASE_PAGE_BITS;
380 lvaddr_t ptable_lv = local_phys_to_mem(gen_phys_to_local_phys(ptable_gp));
385 size_t do_unmap(lvaddr_t pt, cslot_t slot, size_t num_pages)
387 // iterate over affected leaf ptables
388 size_t unmapped_pages = 0;
389 union x86_64_ptable_entry *ptentry = (union x86_64_ptable_entry *)pt + slot;
390 for (int i = 0; i < num_pages; i++) {
394 return unmapped_pages;
397 errval_t page_mappings_unmap(struct capability *pgtable, struct cte *mapping,
398 size_t slot, size_t num_pages)
400 assert(type_is_vnode(pgtable->type));
402 debug(SUBSYS_PAGING, "page_mappings_unmap(%zd pages)\n", num_pages);
404 // get page table entry data
407 read_pt_entry(pgtable, slot, &paddr, NULL, NULL);
408 lvaddr_t pt = local_phys_to_mem(gen_phys_to_local_phys(get_address(pgtable)));
410 // get virtual address of first page
411 // TODO: error checking
413 bool tlb_flush_necessary = true;
414 struct cte *leaf_pt = cte_for_cap(pgtable);
415 err = compile_vaddr(leaf_pt, slot, &vaddr);
416 if (err_is_fail(err)) {
417 if (err_no(err) == SYS_ERR_VNODE_NOT_INSTALLED) {
418 debug(SUBSYS_PAGING, "couldn't reconstruct virtual address\n");
419 } else if (err_no(err) == SYS_ERR_VNODE_SLOT_INVALID
420 && leaf_pt->mapping_info.pte == 0) {
421 debug(SUBSYS_PAGING, "unmapping in floating page table; not flushing TLB\n");
422 tlb_flush_necessary = false;
428 if (num_pages != mapping->mapping_info.pte_count) {
429 // want to unmap a different amount of pages than was mapped
430 return SYS_ERR_VM_MAP_SIZE;
433 do_unmap(pt, slot, num_pages);
435 // flush TLB for unmapped pages if we got a valid virtual address
436 // TODO: heuristic that decides if selective or full flush is more
438 if (tlb_flush_necessary) {
439 if (num_pages > 1 || err_is_fail(err)) {
442 do_one_tlb_flush(vaddr);
446 // update mapping info
447 memset(&mapping->mapping_info, 0, sizeof(struct mapping_info));
452 errval_t page_mappings_modify_flags(struct capability *frame, size_t offset,
453 size_t pages, size_t mflags)
455 struct cte *mapping = cte_for_cap(frame);
456 struct mapping_info *info = &mapping->mapping_info;
458 /* Calculate page access protection flags */
459 // Get frame cap rights
460 paging_x86_64_flags_t flags =
461 paging_x86_64_cap_to_page_flags(frame->rights);
462 // Mask with provided access rights mask
463 flags = paging_x86_64_mask_attrs(flags, X86_64_PTABLE_ACCESS(mflags));
464 // Add additional arch-specific flags
465 flags |= X86_64_PTABLE_FLAGS(mflags);
466 // Unconditionally mark the page present
467 flags |= X86_64_PTABLE_PRESENT;
469 /* Calculate location of page table entries we need to modify */
470 lvaddr_t base = local_phys_to_mem(info->pte) + offset;
472 for (int i = 0; i < pages; i++) {
473 union x86_64_ptable_entry *entry =
474 (union x86_64_ptable_entry *)base + i;
475 paging_x86_64_modify_flags(entry, flags);
478 /* flush affected TLB entries and return */
479 return paging_tlb_flush_range(mapping, pages);
482 void paging_dump_tables(struct dcb *dispatcher)
484 lvaddr_t root_pt = local_phys_to_mem(dispatcher->vspace);
487 for (int pdpt_index = 0; pdpt_index < X86_64_PTABLE_SIZE-1; pdpt_index++) {
489 union x86_64_pdir_entry *pdpt = (union x86_64_pdir_entry *)root_pt + pdpt_index;
490 if (!pdpt->raw) { continue; }
491 genpaddr_t pdpt_gp = pdpt->d.base_addr << BASE_PAGE_BITS;
492 lvaddr_t pdpt_lv = local_phys_to_mem(gen_phys_to_local_phys(pdpt_gp));
494 for (int pdir_index = 0; pdir_index < X86_64_PTABLE_SIZE; pdir_index++) {
496 union x86_64_pdir_entry *pdir = (union x86_64_pdir_entry *)pdpt_lv + pdir_index;
497 if (!pdir->raw) { continue; }
498 genpaddr_t pdir_gp = pdir->d.base_addr << BASE_PAGE_BITS;
499 lvaddr_t pdir_lv = local_phys_to_mem(gen_phys_to_local_phys(pdir_gp));
501 for (int ptable_index = 0; ptable_index < X86_64_PTABLE_SIZE; ptable_index++) {
503 union x86_64_ptable_entry *ptable = (union x86_64_ptable_entry *)pdir_lv + ptable_index;
504 if (!ptable->raw) { continue; }
505 genpaddr_t ptable_gp = ptable->base.base_addr << BASE_PAGE_BITS;
506 lvaddr_t ptable_lv = local_phys_to_mem(gen_phys_to_local_phys(ptable_gp));
508 for (int entry = 0; entry < X86_64_PTABLE_SIZE; entry++) {
509 union x86_64_ptable_entry *e =
510 (union x86_64_ptable_entry *)ptable_lv + entry;
511 genpaddr_t paddr = (genpaddr_t)e->base.base_addr << BASE_PAGE_BITS;
515 printf("%d.%d.%d.%d: 0x%"PRIxGENPADDR"\n", pdpt_index, pdir_index, ptable_index, entry, paddr);