3 * \brief System calls implementation.
7 * Copyright (c) 2007, 2008, 2009, 2010, 2012, ETH Zurich.
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
18 #include <barrelfish_kpi/syscalls.h>
20 #include <mdb/mdb_tree.h>
22 #include <paging_kernel_arch.h>
23 #include <paging_generic.h>
26 #include <arch/x86/x86.h>
27 #include <arch/x86/apic.h>
28 #include <arch/x86/global.h>
29 #include <arch/x86/perfmon.h>
30 #include <arch/x86/debugregs.h>
31 #include <arch/x86/syscall.h>
32 #include <arch/x86/timing.h>
33 #include <arch/x86/ipi_notify.h>
34 #include <barrelfish_kpi/sys_debug.h>
35 #include <barrelfish_kpi/lmp.h>
36 #include <barrelfish_kpi/dispatcher_shared_target.h>
37 #include <trace/trace.h>
40 #include <dev/amd_vmcb_dev.h>
43 #define MIN(a,b) ((a) < (b) ? (a) : (b))
45 extern uint64_t user_stack_save;
47 /* FIXME: lots of missing argument checks in this function */
48 static struct sysret handle_dispatcher_setup(struct capability *to,
49 int cmd, uintptr_t *args)
51 capaddr_t cptr = args[0];
53 capaddr_t vptr = args[2];
54 capaddr_t dptr = args[3];
56 capaddr_t odptr = args[5];
58 TRACE(KERNEL, SC_DISP_SETUP, 0);
59 struct sysret sr = sys_dispatcher_setup(to, cptr, depth, vptr, dptr, run, odptr);
60 TRACE(KERNEL, SC_DISP_SETUP, 1);
64 static struct sysret handle_dispatcher_properties(struct capability *to,
65 int cmd, uintptr_t *args)
67 enum task_type type = args[0];
68 unsigned long deadline = args[1];
69 unsigned long wcet = args[2];
70 unsigned long period = args[3];
71 unsigned long release = args[4];
72 unsigned short weight = args[5];
74 TRACE(KERNEL, SC_DISP_PROPS, 0);
75 struct sysret sr = sys_dispatcher_properties(to, type, deadline, wcet, period,
77 TRACE(KERNEL, SC_DISP_PROPS, 1);
81 static struct sysret handle_retype_common(struct capability *root,
85 uint64_t source_cptr = args[0];
86 uint64_t type = args[1];
87 uint64_t objbits = args[2];
88 uint64_t dest_cnode_cptr = args[3];
89 uint64_t dest_slot = args[4];
90 uint64_t dest_vbits = args[5];
92 TRACE(KERNEL, SC_RETYPE, 0);
93 struct sysret sr = sys_retype(root, source_cptr, type, objbits, dest_cnode_cptr,
94 dest_slot, dest_vbits, from_monitor);
95 TRACE(KERNEL, SC_RETYPE, 1);
99 static struct sysret handle_retype(struct capability *root,
100 int cmd, uintptr_t *args)
102 return handle_retype_common(root, args, false);
105 static struct sysret handle_create(struct capability *root,
106 int cmd, uintptr_t *args)
108 /* Retrieve arguments */
109 enum objtype type = args[0];
110 uint8_t objbits = args[1];
111 capaddr_t dest_cnode_cptr = args[2];
112 cslot_t dest_slot = args[3];
113 uint8_t dest_vbits = args[4];
115 TRACE(KERNEL, SC_CREATE, 0);
116 struct sysret sr = sys_create(root, type, objbits, dest_cnode_cptr, dest_slot,
118 TRACE(KERNEL, SC_CREATE, 1);
124 * Common code for copying and minting except the mint flag and param passing
126 static struct sysret copy_or_mint(struct capability *root,
127 uintptr_t *args, bool mint)
129 /* Retrive arguments */
130 capaddr_t destcn_cptr = args[0];
131 uint64_t dest_slot = args[1];
132 capaddr_t source_cptr = args[2];
133 int destcn_vbits = args[3];
134 int source_vbits = args[4];
135 uint64_t param1, param2;
136 // params only sent if mint operation
144 TRACE(KERNEL, SC_COPY_OR_MINT, 0);
145 struct sysret sr = sys_copy_or_mint(root, destcn_cptr, dest_slot, source_cptr,
146 destcn_vbits, source_vbits, param1, param2, mint);
147 TRACE(KERNEL, SC_COPY_OR_MINT, 1);
151 static struct sysret handle_map(struct capability *ptable,
152 int cmd, uintptr_t *args)
154 /* Retrieve arguments */
155 uint64_t slot = args[0];
156 capaddr_t source_cptr = args[1];
157 int source_vbits = args[2];
158 uint64_t flags = args[3];
159 uint64_t offset = args[4];
160 uint64_t pte_count = args[5];
162 TRACE(KERNEL, SC_MAP, 0);
163 struct sysret sr = sys_map(ptable, slot, source_cptr, source_vbits, flags, offset,
165 TRACE(KERNEL, SC_MAP, 1);
169 static struct sysret handle_mint(struct capability *root,
170 int cmd, uintptr_t *args)
172 return copy_or_mint(root, args, true);
175 static struct sysret handle_copy(struct capability *root,
176 int cmd, uintptr_t *args)
178 return copy_or_mint(root, args, false);
181 static struct sysret handle_delete(struct capability *root,
182 int cmd, uintptr_t *args)
184 capaddr_t cptr = args[0];
186 return sys_delete(root, cptr, bits);
189 static struct sysret handle_revoke(struct capability *root,
190 int cmd, uintptr_t *args)
192 capaddr_t cptr = args[0];
194 return sys_revoke(root, cptr, bits);
197 static struct sysret handle_get_state(struct capability *root,
198 int cmd, uintptr_t *args)
200 capaddr_t cptr = args[0];
202 return sys_get_state(root, cptr, bits);
205 static struct sysret handle_unmap(struct capability *pgtable,
206 int cmd, uintptr_t *args)
208 capaddr_t cptr = args[0];
210 size_t entry = args[2];
211 size_t pages = args[3];
215 err = caps_lookup_slot(&dcb_current->cspace.cap, cptr, bits,
216 &mapping, CAPRIGHTS_READ_WRITE);
217 if (err_is_fail(err)) {
218 return SYSRET(err_push(err, SYS_ERR_CAP_NOT_FOUND));
221 TRACE(KERNEL, SC_UNMAP, 0);
222 err = page_mappings_unmap(pgtable, mapping, entry, pages);
223 TRACE(KERNEL, SC_UNMAP, 1);
227 /// Different handler for cap operations performed by the monitor
228 static struct sysret monitor_handle_retype(struct capability *kernel_cap,
229 int cmd, uintptr_t *args)
233 capaddr_t root_caddr = args[0];
234 capaddr_t root_vbits = args[1];
236 struct capability *root;
237 err = caps_lookup_cap(&dcb_current->cspace.cap, root_caddr, root_vbits,
238 &root, CAPRIGHTS_READ);
239 if (err_is_fail(err)) {
240 return SYSRET(err_push(err, SYS_ERR_ROOT_CAP_LOOKUP));
243 /* XXX: this hides the first two arguments */
244 return handle_retype_common(root, &args[2], true);
247 static struct sysret monitor_handle_has_descendants(struct capability *kernel_cap,
248 int cmd, uintptr_t *args)
250 struct capability *src = (struct capability *)args;
252 struct cte *next = mdb_find_greater(src, false);
254 return (struct sysret) {
256 .value = (next && is_ancestor(&next->cap, src)),
260 static struct sysret monitor_handle_delete_last(struct capability *kernel_cap,
261 int cmd, uintptr_t *args)
263 capaddr_t root_caddr = args[0];
264 uint8_t root_vbits = args[1];
265 capaddr_t target_caddr = args[2];
266 uint8_t target_vbits = args[3];
267 capaddr_t retcn_caddr = args[4];
268 uint8_t retcn_vbits = args[5];
269 cslot_t ret_slot = args[6];
271 return sys_monitor_delete_last(root_caddr, root_vbits, target_caddr,
272 target_vbits, retcn_caddr, retcn_vbits,
276 static struct sysret monitor_handle_delete_foreigns(struct capability *kernel_cap,
277 int cmd, uintptr_t *args)
279 capaddr_t caddr = args[0];
280 uint8_t bits = args[1];
281 return sys_monitor_delete_foreigns(caddr, bits);
284 static struct sysret monitor_handle_revoke_mark_tgt(struct capability *kernel_cap,
285 int cmd, uintptr_t *args)
287 capaddr_t root_caddr = args[0];
288 uint8_t root_vbits = args[1];
289 capaddr_t target_caddr = args[2];
290 uint8_t target_vbits = args[3];
292 return sys_monitor_revoke_mark_tgt(root_caddr, root_vbits,
293 target_caddr, target_vbits);
296 static struct sysret monitor_handle_revoke_mark_rels(struct capability *kernel_cap,
297 int cmd, uintptr_t *args)
299 struct capability *base = (struct capability*)args;
301 return sys_monitor_revoke_mark_rels(base);
304 static struct sysret monitor_handle_delete_step(struct capability *kernel_cap,
305 int cmd, uintptr_t *args)
307 capaddr_t ret_cn_addr = args[0];
308 capaddr_t ret_cn_bits = args[1];
309 capaddr_t ret_slot = args[2];
310 return sys_monitor_delete_step(ret_cn_addr, ret_cn_bits, ret_slot);
313 static struct sysret monitor_handle_clear_step(struct capability *kernel_cap,
314 int cmd, uintptr_t *args)
316 capaddr_t ret_cn_addr = args[0];
317 capaddr_t ret_cn_bits = args[1];
318 capaddr_t ret_slot = args[2];
319 return sys_monitor_clear_step(ret_cn_addr, ret_cn_bits, ret_slot);
322 static struct sysret monitor_handle_register(struct capability *kernel_cap,
323 int cmd, uintptr_t *args)
325 capaddr_t ep_caddr = args[0];
327 TRACE(KERNEL, SC_MONITOR_REGISTER, 0);
328 struct sysret sr = sys_monitor_register(ep_caddr);
329 TRACE(KERNEL, SC_MONITOR_REGISTER, 1);
333 static struct sysret monitor_get_core_id(struct capability *kernel_cap,
334 int cmd, uintptr_t *args)
336 return (struct sysret){.error = SYS_ERR_OK, .value = my_core_id};
339 static struct sysret monitor_get_arch_id(struct capability *kernel_cap,
340 int cmd, uintptr_t *args)
342 return (struct sysret){.error = SYS_ERR_OK, .value = apic_id};
345 static struct sysret monitor_identify_cap_common(struct capability *kernel_cap,
346 struct capability *root,
349 capaddr_t cptr = args[0];
350 uint8_t bits = args[1];
352 struct capability *retbuf = (void *)args[2];
354 return sys_monitor_identify_cap(root, cptr, bits, retbuf);
357 static struct sysret monitor_identify_cap(struct capability *kernel_cap,
358 int cmd, uintptr_t *args)
360 return monitor_identify_cap_common(kernel_cap, &dcb_current->cspace.cap, args);
363 static struct sysret monitor_identify_domains_cap(struct capability *kernel_cap,
364 int cmd, uintptr_t *args)
368 capaddr_t root_caddr = args[0];
369 capaddr_t root_vbits = args[1];
371 struct capability *root;
372 err = caps_lookup_cap(&dcb_current->cspace.cap, root_caddr, root_vbits,
373 &root, CAPRIGHTS_READ);
375 if (err_is_fail(err)) {
376 return SYSRET(err_push(err, SYS_ERR_ROOT_CAP_LOOKUP));
379 /* XXX: this hides the first two arguments */
380 return monitor_identify_cap_common(kernel_cap, root, &args[2]);
383 static struct sysret monitor_cap_has_relations(struct capability *kernel_cap,
384 int cmd, uintptr_t *args)
386 capaddr_t caddr = args[0];
387 uint8_t vbits = args[1];
388 uint8_t mask = args[2];
390 return sys_cap_has_relations(caddr, vbits, mask);
393 static struct sysret monitor_remote_relations(struct capability *kernel_cap,
394 int cmd, uintptr_t *args)
396 capaddr_t root_addr = args[0];
397 int root_bits = args[1];
398 capaddr_t cptr = args[2];
400 uint8_t relations = args[4] & 0xFF;
401 uint8_t mask = (args[4] >> 8) & 0xFF;
403 return sys_monitor_remote_relations(root_addr, root_bits, cptr, bits,
408 static struct sysret monitor_create_cap(struct capability *kernel_cap,
409 int cmd, uintptr_t *args)
411 /* XXX: Get the raw metadata of the capability to create */
412 struct capability *src = (struct capability *)args;
413 int pos = sizeof(struct capability) / sizeof(uint64_t);
415 /* Cannot create null caps */
416 if (src->type == ObjType_Null) {
417 return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
420 coreid_t owner = args[pos + 3];
422 /* For certain types, only foreign copies can be created here */
423 if ((src->type == ObjType_EndPoint || src->type == ObjType_Dispatcher
424 || src->type == ObjType_Kernel || src->type == ObjType_IRQTable)
425 && owner == my_core_id)
427 return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
430 /* Create the cap in the destination */
431 capaddr_t cnode_cptr = args[pos];
432 int cnode_vbits = args[pos + 1];
433 size_t slot = args[pos + 2];
435 return SYSRET(caps_create_from_existing(&dcb_current->cspace.cap,
436 cnode_cptr, cnode_vbits,
440 static struct sysret monitor_copy_existing(struct capability *kernel_cap,
441 int cmd, uintptr_t *args)
443 /* XXX: Get the raw metadata of the capability to create */
444 struct capability *src = (struct capability *)args;
445 int pos = sizeof(struct capability) / sizeof(uint64_t);
447 capaddr_t cnode_cptr = args[pos];
448 int cnode_vbits = args[pos + 1];
449 size_t slot = args[pos + 2];
451 return sys_monitor_copy_existing(src, cnode_cptr, cnode_vbits, slot);
454 static struct sysret monitor_nullify_cap(struct capability *kernel_cap,
455 int cmd, uintptr_t *args)
457 capaddr_t cptr = args[0];
458 uint8_t bits = args[1];
460 return sys_monitor_nullify_cap(cptr, bits);
463 static struct sysret monitor_handle_sync_timer(struct capability *kern_cap,
464 int cmd, uintptr_t *args)
466 uint64_t synctime = args[0];
467 return sys_monitor_handle_sync_timer(synctime);
470 static struct sysret handle_frame_identify(struct capability *to,
471 int cmd, uintptr_t *args)
473 // Return with physical base address of frame
474 // XXX: pack size into bottom bits of base address
475 assert(to->type == ObjType_Frame || to->type == ObjType_DevFrame);
476 assert((to->u.frame.base & BASE_PAGE_MASK) == 0);
477 return (struct sysret) {
479 .value = to->u.frame.base | to->u.frame.bits,
483 static struct sysret handle_frame_modify_flags(struct capability *to,
484 int cmd, uintptr_t *args)
486 // Modify flags of (part of) mapped region of frame
487 assert(to->type == ObjType_Frame || to->type == ObjType_DevFrame);
490 size_t offset = args[0]; // in pages; of first page to modify from first
491 // page in mapped region
492 size_t pages = args[1]; // #pages to modify
493 size_t flags = args[2]; // new flags
494 genvaddr_t va = args[3]; // virtual addr hint
496 errval_t err = page_mappings_modify_flags(to, offset, pages, flags, va);
498 return (struct sysret) {
504 static struct sysret handle_io(struct capability *to, int cmd, uintptr_t *args)
506 uint64_t port = args[0];
507 uint64_t data = args[1]; // ignored for input
509 return sys_io(to, cmd, port, data);
514 handle_dispatcher_setup_guest (struct capability *to, int cmd, uintptr_t *args)
517 struct dcb *dcb = to->u.dispatcher.dcb;
519 capaddr_t epp = args[0];
520 capaddr_t vnodep = args[1];
521 capaddr_t vmcbp = args[2];
522 capaddr_t ctrlp = args[3];
524 // 0. Enable VM extensions
525 err = vmkit_enable_virtualization();
526 if (err != SYS_ERR_OK) {
530 // 1. Check arguments
531 // Monitor endpoint for exits of this geust
534 err = caps_lookup_slot(&dcb_current->cspace.cap, epp, CPTR_BITS,
535 &ep_cte, CAPRIGHTS_READ_WRITE);
536 if (err_is_fail(err)) {
539 if (ep_cte->cap.type != ObjType_EndPoint) {
540 return SYSRET(SYS_ERR_VMKIT_ENDPOINT_INVALID);
542 err = caps_copy_to_cte(&dcb->guest_desc.monitor_ep, ep_cte, false, 0, 0);
543 if (err_is_fail(err)) {
544 return SYSRET(err_push(err, SYS_ERR_VMKIT_ENDPOINT));
548 struct capability *vnode_cap;
549 err = caps_lookup_cap(&dcb_current->cspace.cap, vnodep, CPTR_BITS,
550 &vnode_cap, CAPRIGHTS_WRITE);
551 if (err_is_fail(err)) {
554 if (vnode_cap->type != ObjType_VNode_x86_64_pml4) {
555 return SYSRET(SYS_ERR_DISP_VSPACE_INVALID);
558 assert(vnode_cap->type == ObjType_VNode_x86_64_pml4);
561 struct cte *vmcb_cte;
562 err = caps_lookup_slot(&dcb_current->cspace.cap, vmcbp, CPTR_BITS,
563 &vmcb_cte, CAPRIGHTS_READ_WRITE);
564 if (err_is_fail(err)) {
567 if (vmcb_cte->cap.type != ObjType_Frame ||
568 vmcb_cte->cap.u.frame.bits < BASE_PAGE_BITS) {
569 return SYSRET(SYS_ERR_VMKIT_VMCB_INVALID);
571 err = caps_copy_to_cte(&dcb->guest_desc.vmcb, vmcb_cte, false, 0, 0);
572 if (err_is_fail(err)) {
573 return SYSRET(err_push(err, SYS_ERR_VMKIT_VMCB));
577 struct cte *ctrl_cte;
578 err = caps_lookup_slot(&dcb_current->cspace.cap, ctrlp, CPTR_BITS,
579 &ctrl_cte, CAPRIGHTS_READ_WRITE);
580 if (err_is_fail(err)) {
583 if (ctrl_cte->cap.type != ObjType_Frame ||
584 ctrl_cte->cap.u.frame.bits < BASE_PAGE_BITS) {
585 return SYSRET(SYS_ERR_VMKIT_CTRL_INVALID);
587 err = caps_copy_to_cte(&dcb->guest_desc.ctrl, ctrl_cte, false, 0, 0);
588 if (err_is_fail(err)) {
589 return SYSRET(err_push(err, SYS_ERR_VMKIT_CTRL));
592 // 2. Set up the target DCB
593 /* dcb->guest_desc.monitor_ep = ep_cap; */
594 dcb->vspace = vnode_cap->u.vnode_x86_64_pml4.base;
595 dcb->is_vm_guest = true;
596 /* dcb->guest_desc.vmcb = vmcb_cap->u.frame.base; */
597 /* dcb->guest_desc.ctrl = (void *)x86_64_phys_to_mem(ctrl_cap->u.frame.base); */
599 return SYSRET(SYS_ERR_OK);
603 static struct sysret monitor_handle_domain_id(struct capability *monitor_cap,
604 int cmd, uintptr_t *args)
606 capaddr_t cptr = args[0];
607 domainid_t domain_id = args[1];
609 return sys_monitor_domain_id(cptr, domain_id);
612 static struct sysret monitor_get_cap_owner(struct capability *monitor_cap,
613 int cmd, uintptr_t *args)
615 capaddr_t root_addr = args[0];
616 uint8_t root_bits = args[1];
617 capaddr_t cptr = args[2];
618 uint8_t bits = args[3];
620 return sys_get_cap_owner(root_addr, root_bits, cptr, bits);
623 static struct sysret monitor_set_cap_owner(struct capability *monitor_cap,
624 int cmd, uintptr_t *args)
626 capaddr_t root_addr = args[0];
627 uint8_t root_bits = args[1];
628 capaddr_t cptr = args[2];
629 uint8_t bits = args[3];
630 coreid_t owner = args[4];
632 return sys_set_cap_owner(root_addr, root_bits, cptr, bits, owner);
635 static struct sysret monitor_lock_cap(struct capability *monitor_cap,
636 int cmd, uintptr_t *args)
638 capaddr_t root_addr = args[0];
639 uint8_t root_bits = args[1];
640 capaddr_t cptr = args[2];
641 uint8_t bits = args[3];
643 return sys_lock_cap(root_addr, root_bits, cptr, bits);
646 static struct sysret monitor_unlock_cap(struct capability *monitor_cap,
647 int cmd, uintptr_t *args)
649 capaddr_t root_addr = args[0];
650 uint8_t root_bits = args[1];
651 capaddr_t cptr = args[2];
652 uint8_t bits = args[3];
654 return sys_unlock_cap(root_addr, root_bits, cptr, bits);
658 * \brief Set up tracing in the kernel
660 static struct sysret handle_trace_setup(struct capability *cap,
661 int cmd, uintptr_t *args)
663 struct capability *frame;
666 /* lookup passed cap */
667 capaddr_t cptr = args[0];
668 err = caps_lookup_cap(&dcb_current->cspace.cap, cptr, CPTR_BITS, &frame,
669 CAPRIGHTS_READ_WRITE);
670 if (err_is_fail(err)) {
674 lpaddr_t lpaddr = gen_phys_to_local_phys(frame->u.frame.base);
675 kernel_trace_buf = local_phys_to_mem(lpaddr);
676 //printf("kernel.%u: handle_trace_setup at %lx\n", apic_id, kernel_trace_buf);
678 // Copy boot applications.
679 trace_copy_boot_applications();
681 return SYSRET(SYS_ERR_OK);
684 static struct sysret handle_irq_table_alloc(struct capability *to, int cmd,
689 ret.error = irq_table_alloc(&outvec);
695 static struct sysret handle_irq_table_set(struct capability *to, int cmd,
698 return SYSRET(irq_table_set(args[0], args[1]));
701 static struct sysret handle_irq_table_delete(struct capability *to, int cmd,
704 return SYSRET(irq_table_delete(args[0]));
707 static struct sysret handle_ipi_notify_send(struct capability *cap,
708 int cmd, uintptr_t *args)
710 assert(cap->type == ObjType_Notify_IPI);
711 return ipi_raise_notify(cap->u.notify_ipi.coreid, cap->u.notify_ipi.chanid);
714 static struct sysret kernel_ipi_register(struct capability *cap,
715 int cmd, uintptr_t *args)
717 assert(cap->type == ObjType_Kernel);
718 capaddr_t ep = args[0];
719 int chanid = args[1];
720 return SYSRET(ipi_register_notification(ep, chanid));
723 static struct sysret kernel_ipi_delete(struct capability *cap,
724 int cmd, uintptr_t *args)
726 assert(cap->type == ObjType_Kernel);
728 return SYSRET(SYS_ERR_OK);
731 static struct sysret dispatcher_dump_ptables(struct capability *cap,
732 int cmd, uintptr_t *args)
734 assert(cap->type == ObjType_Dispatcher);
736 printf("kernel_dump_ptables\n");
738 struct dcb *dispatcher = cap->u.dispatcher.dcb;
740 paging_dump_tables(dispatcher);
742 return SYSRET(SYS_ERR_OK);
746 * \brief Activate performance monitoring
748 * Activates performance monitoring.
749 * \param xargs Expected parameters in args:
750 * - performance monitoring type
751 * - mask for given type
753 * - Also count in privileged mode
754 * - Number of counts before overflow. This parameter may be used to
755 * set tradeoff between accuracy and overhead. Set the counter to 0
756 * to deactivate the usage of APIC.
757 * - Endpoint capability to be invoked when the counter overflows.
758 * The buffer associated with the endpoint needs to be large enough
759 * to hold several overflow notifications depending on the overflow
762 static struct sysret performance_counter_activate(struct capability *cap,
763 int cmd, uintptr_t *args)
765 uint8_t event = args[0];
766 uint8_t umask = args[1];
767 uint8_t counter_id = args[2];
768 bool kernel = args[3];
769 uint64_t counter_value = args[4];
770 capaddr_t ep_addr = args[5];
773 struct capability *ep;
774 extern struct capability perfmon_callback_ep;
777 assert(ep_addr!=0 || counter_value==0);
780 perfmon_measure_start(event, umask, counter_id, kernel, counter_value);
784 err = caps_lookup_cap(&dcb_current->cspace.cap, ep_addr, CPTR_BITS, &ep,
786 if(err_is_fail(err)) {
790 perfmon_callback_ep = *ep;
793 return SYSRET(SYS_ERR_OK);
797 * \brief Write counter values.
799 static struct sysret performance_counter_write(struct capability *cap,
800 int cmd, uintptr_t *args)
802 uint8_t counter_id = args[0];
803 uint64_t counter_value = args[1];
805 perfmon_measure_write(counter_id, counter_value);
806 return SYSRET(SYS_ERR_OK);
810 * \brief Deactivate performance counters again.
812 static struct sysret performance_counter_deactivate(struct capability *cap,
813 int cmd, uintptr_t *args)
815 perfmon_measure_stop();
816 return SYSRET(SYS_ERR_OK);
820 * \brief Return system-wide unique ID of this ID cap.
822 static struct sysret handle_idcap_identify(struct capability *cap, int cmd,
826 struct sysret sysret = sys_idcap_identify(cap, &id);
832 static struct sysret kernel_send_init_ipi(struct capability *cap, int cmd,
835 coreid_t destination = args[0];
836 // printk(LOG_DEBUG, "%s:%s:%d: destination=%"PRIuCOREID"\n",
837 // __FILE__, __FUNCTION__, __LINE__, destination);
839 apic_send_init_assert(destination, xapic_none);
840 apic_send_init_deassert();
842 return SYSRET(SYS_ERR_OK);
845 static struct sysret kernel_send_start_ipi(struct capability *cap,
849 coreid_t destination = args[0];
850 genvaddr_t start_vector = X86_64_REAL_MODE_SEGMENT_TO_REAL_MODE_PAGE(X86_64_REAL_MODE_SEGMENT);
851 // printk(LOG_DEBUG, "%s:%d: destination=%"PRIuCOREID" start_vector=%"PRIxGENVADDR"\n",
852 // __FILE__, __LINE__, destination, start_vector);
854 apic_send_start_up(destination, xapic_none, start_vector);
856 return SYSRET(SYS_ERR_OK);
859 static struct sysret kernel_get_global_phys(struct capability *cap,
864 struct sysret sysret;
865 sysret.value = mem_to_local_phys((lvaddr_t)global);
866 sysret.error = SYS_ERR_OK;
871 static struct sysret kernel_add_kcb(struct capability *kern_cap,
872 int cmd, uintptr_t *args)
874 uintptr_t kcb_addr = args[0];
875 struct kcb *new_kcb = (struct kcb *)kcb_addr;
877 return sys_kernel_add_kcb(new_kcb);
880 static struct sysret kernel_remove_kcb(struct capability *kern_cap,
881 int cmd, uintptr_t *args)
883 printk(LOG_NOTE, "in kernel_remove_kcb invocation!\n");
884 uintptr_t kcb_addr = args[0];
885 struct kcb *to_remove = (struct kcb *)kcb_addr;
887 return sys_kernel_remove_kcb(to_remove);
890 static struct sysret kernel_suspend_kcb_sched(struct capability *kern_cap,
891 int cmd, uintptr_t *args)
893 printk(LOG_NOTE, "in kernel_suspend_kcb_sched invocation!\n");
894 return sys_kernel_suspend_kcb_sched((bool)args[0]);
897 static struct sysret handle_kcb_identify(struct capability *to,
898 int cmd, uintptr_t *args)
900 return sys_handle_kcb_identify(to);
904 typedef struct sysret (*invocation_handler_t)(struct capability *to,
905 int cmd, uintptr_t *args);
907 static invocation_handler_t invocations[ObjType_Num][CAP_MAX_CMD] = {
908 [ObjType_Dispatcher] = {
909 [DispatcherCmd_Setup] = handle_dispatcher_setup,
910 [DispatcherCmd_Properties] = handle_dispatcher_properties,
912 [DispatcherCmd_SetupGuest] = handle_dispatcher_setup_guest,
914 [DispatcherCmd_DumpPTables] = dispatcher_dump_ptables,
916 [ObjType_KernelControlBlock] = {
917 [FrameCmd_Identify] = handle_kcb_identify,
920 [FrameCmd_Identify] = handle_frame_identify,
921 [FrameCmd_ModifyFlags] = handle_frame_modify_flags,
923 [ObjType_DevFrame] = {
924 [FrameCmd_Identify] = handle_frame_identify,
925 [FrameCmd_ModifyFlags] = handle_frame_modify_flags,
928 [CNodeCmd_Copy] = handle_copy,
929 [CNodeCmd_Mint] = handle_mint,
930 [CNodeCmd_Retype] = handle_retype,
931 [CNodeCmd_Create] = handle_create,
932 [CNodeCmd_Delete] = handle_delete,
933 [CNodeCmd_Revoke] = handle_revoke,
934 [CNodeCmd_GetState] = handle_get_state,
936 [ObjType_VNode_x86_64_pml4] = {
937 [VNodeCmd_Map] = handle_map,
938 [VNodeCmd_Unmap] = handle_unmap,
940 [ObjType_VNode_x86_64_pdpt] = {
941 [VNodeCmd_Map] = handle_map,
942 [VNodeCmd_Unmap] = handle_unmap,
944 [ObjType_VNode_x86_64_pdir] = {
945 [VNodeCmd_Map] = handle_map,
946 [VNodeCmd_Unmap] = handle_unmap,
948 [ObjType_VNode_x86_64_ptable] = {
949 [VNodeCmd_Map] = handle_map,
950 [VNodeCmd_Unmap] = handle_unmap,
953 [KernelCmd_Get_core_id] = monitor_get_core_id,
954 [KernelCmd_Get_arch_id] = monitor_get_arch_id,
955 [KernelCmd_Identify_cap] = monitor_identify_cap,
956 [KernelCmd_Identify_domains_cap] = monitor_identify_domains_cap,
957 [KernelCmd_Remote_relations] = monitor_remote_relations,
958 [KernelCmd_Cap_has_relations] = monitor_cap_has_relations,
959 [KernelCmd_Create_cap] = monitor_create_cap,
960 [KernelCmd_Copy_existing] = monitor_copy_existing,
961 [KernelCmd_Nullify_cap] = monitor_nullify_cap,
962 [KernelCmd_Setup_trace] = handle_trace_setup,
963 [KernelCmd_Register] = monitor_handle_register,
964 [KernelCmd_Domain_Id] = monitor_handle_domain_id,
965 [KernelCmd_Get_cap_owner] = monitor_get_cap_owner,
966 [KernelCmd_Set_cap_owner] = monitor_set_cap_owner,
967 [KernelCmd_Lock_cap] = monitor_lock_cap,
968 [KernelCmd_Unlock_cap] = monitor_unlock_cap,
969 [KernelCmd_Retype] = monitor_handle_retype,
970 [KernelCmd_Has_descendants] = monitor_handle_has_descendants,
971 [KernelCmd_Delete_last] = monitor_handle_delete_last,
972 [KernelCmd_Delete_foreigns] = monitor_handle_delete_foreigns,
973 [KernelCmd_Revoke_mark_target] = monitor_handle_revoke_mark_tgt,
974 [KernelCmd_Revoke_mark_relations] = monitor_handle_revoke_mark_rels,
975 [KernelCmd_Delete_step] = monitor_handle_delete_step,
976 [KernelCmd_Clear_step] = monitor_handle_clear_step,
977 [KernelCmd_Sync_timer] = monitor_handle_sync_timer,
978 [KernelCmd_IPI_Register] = kernel_ipi_register,
979 [KernelCmd_IPI_Delete] = kernel_ipi_delete,
980 [KernelCmd_GetGlobalPhys] = kernel_get_global_phys,
981 [KernelCmd_Add_kcb] = kernel_add_kcb,
982 [KernelCmd_Remove_kcb] = kernel_remove_kcb,
983 [KernelCmd_Suspend_kcb_sched] = kernel_suspend_kcb_sched,
986 [IPICmd_Send_Start] = kernel_send_start_ipi,
987 [IPICmd_Send_Init] = kernel_send_init_ipi,
989 [ObjType_IRQTable] = {
990 [IRQTableCmd_Alloc] = handle_irq_table_alloc,
991 [IRQTableCmd_Set] = handle_irq_table_set,
992 [IRQTableCmd_Delete] = handle_irq_table_delete
995 [IOCmd_Outb] = handle_io,
996 [IOCmd_Outw] = handle_io,
997 [IOCmd_Outd] = handle_io,
998 [IOCmd_Inb] = handle_io,
999 [IOCmd_Inw] = handle_io,
1000 [IOCmd_Ind] = handle_io
1002 [ObjType_Notify_IPI] = {
1003 [NotifyCmd_Send] = handle_ipi_notify_send
1005 [ObjType_PerfMon] = {
1006 [PerfmonCmd_Activate] = performance_counter_activate,
1007 [PerfmonCmd_Deactivate] = performance_counter_deactivate,
1008 [PerfmonCmd_Write] = performance_counter_write,
1011 [IDCmd_Identify] = handle_idcap_identify,
1015 /* syscall C entry point; called only from entry.S so no prototype in header */
1016 struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1017 uint64_t *args, uint64_t rflags, uint64_t rip);
1018 struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1019 uint64_t *args, uint64_t rflags, uint64_t rip)
1021 struct sysret retval = { .error = SYS_ERR_OK, .value = 0 };
1024 case SYSCALL_INVOKE: /* Handle capability invocation */
1026 // unpack "header" word
1027 capaddr_t invoke_cptr = arg0 >> 32;
1028 uint8_t send_bits = arg0 >> 24;
1029 uint8_t invoke_bits = arg0 >> 16;
1030 uint8_t length_words = arg0 >> 8;
1031 uint8_t flags = arg0;
1033 debug(SUBSYS_SYSCALL, "sys_invoke(0x%x(%d), 0x%lx)\n",
1034 invoke_cptr, invoke_bits, arg1);
1036 // Capability to invoke
1037 struct capability *to = NULL;
1038 retval.error = caps_lookup_cap(&dcb_current->cspace.cap, invoke_cptr,
1039 invoke_bits, &to, CAPRIGHTS_READ);
1040 if (err_is_fail(retval.error)) {
1045 assert(to->type < ObjType_Num);
1047 // Endpoint cap, do LMP
1048 if (to->type == ObjType_EndPoint) {
1049 struct dcb *listener = to->u.endpoint.listener;
1050 assert(listener != NULL);
1052 if (listener->disp == 0) {
1053 retval.error = SYS_ERR_LMP_NO_TARGET;
1057 /* limit length of message from buggy/malicious sender */
1058 length_words = MIN(length_words, LMP_MSG_LENGTH);
1060 // does the sender want to yield their timeslice on success?
1061 bool sync = flags & LMP_FLAG_SYNC;
1062 // does the sender want to yield to the target if undeliverable?
1063 bool yield = flags & LMP_FLAG_YIELD;
1064 // is the cap (if present) to be deleted on send?
1065 bool give_away = flags & LMP_FLAG_GIVEAWAY;
1067 // try to deliver message
1068 retval.error = lmp_deliver(to, dcb_current, args, length_words,
1069 arg1, send_bits, give_away);
1071 /* Switch to reciever upon successful delivery with sync flag,
1072 * or (some cases of) unsuccessful delivery with yield flag */
1073 enum err_code err_code = err_no(retval.error);
1074 if ((sync && err_is_ok(retval.error)) ||
1075 (yield && (err_code == SYS_ERR_LMP_BUF_OVERFLOW
1076 || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_LOOKUP
1077 || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_INVALID
1078 || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_SLOT_OCCUPIED))
1080 if (err_is_fail(retval.error)) {
1081 struct dispatcher_shared_generic *current_disp =
1082 get_dispatcher_shared_generic(dcb_current->disp);
1083 struct dispatcher_shared_generic *listener_disp =
1084 get_dispatcher_shared_generic(listener->disp);
1085 debug(SUBSYS_DISPATCH, "LMP failed; %.*s yields to %.*s: %u\n",
1086 DISP_NAME_LEN, current_disp->name,
1087 DISP_NAME_LEN, listener_disp->name, err_code);
1090 // special-case context switch: ensure correct state in current DCB
1091 dispatcher_handle_t handle = dcb_current->disp;
1092 struct dispatcher_shared_x86_64 *disp =
1093 get_dispatcher_shared_x86_64(handle);
1094 dcb_current->disabled = dispatcher_is_disabled_ip(handle, rip);
1095 struct registers_x86_64 *save_area;
1096 if (dcb_current->disabled) {
1097 save_area = &disp->disabled_save_area;
1099 save_area = &disp->enabled_save_area;
1102 // Should be enabled. Else, how do we do an invocation??
1103 if(dcb_current->disabled) {
1104 panic("Dispatcher needs to be enabled for this invocation");
1107 // save calling dispatcher's registers, so that when the dispatcher
1108 // next runs, it has a valid state in the relevant save area.
1109 // Save RIP, RFLAGS, RSP and set RAX (return value) for later resume
1110 save_area->rax = retval.error; // XXX: x86 1st return register
1111 save_area->rip = rip;
1112 save_area->eflags = rflags;
1113 save_area->rsp = user_stack_save;
1115 if(!dcb_current->is_vm_guest) {
1116 /* save and zero FS/GS selectors (they're unmodified by the syscall path) */
1117 __asm ("mov %%fs, %[fs] \n\t"
1118 "mov %%gs, %[gs] \n\t"
1119 "mov %[zero], %%fs \n\t"
1120 "mov %[zero], %%gs \n\t"
1123 [fs] "m" (save_area->fs),
1124 [gs] "m" (save_area->gs),
1129 lpaddr_t lpaddr = gen_phys_to_local_phys(dcb_current->guest_desc.vmcb.cap.u.frame.base);
1131 amd_vmcb_initialize(&vmcb, (void *)local_phys_to_mem(lpaddr));
1132 save_area->fs = amd_vmcb_fs_selector_rd(&vmcb);
1133 save_area->gs = amd_vmcb_gs_selector_rd(&vmcb);
1135 panic("VM Guests not supported on Xeon Phi");
1139 dispatch(to->u.endpoint.listener);
1140 panic("dispatch returned");
1142 } else { // not endpoint cap, call kernel handler through dispatch table
1143 uint64_t cmd = args[0];
1144 if (cmd >= CAP_MAX_CMD) {
1145 retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1149 // Call the invocation
1150 invocation_handler_t invocation = invocations[to->type][cmd];
1151 if(invocation == NULL) {
1152 retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1154 retval = invocation(to, cmd, &args[1]);
1160 // Yield the CPU to the next dispatcher
1162 TRACE(KERNEL, SC_YIELD, 0);
1163 retval = sys_yield((capaddr_t)arg0);
1164 TRACE(KERNEL, SC_YIELD, 1);
1167 // NOP system call for benchmarking purposes
1171 // Debug print system call
1173 TRACE(KERNEL, SC_PRINT, 0);
1174 retval.error = sys_print((char *)arg0, arg1);
1175 TRACE(KERNEL, SC_PRINT, 1);
1179 // FIXME: this should be a kernel cap invocation or similarly restricted
1180 case SYSCALL_REBOOT:
1184 case SYSCALL_X86_FPU_TRAP_ON:
1188 case SYSCALL_X86_RELOAD_LDT:
1189 maybe_reload_ldt(dcb_current, true);
1192 // Temporarily suspend the CPU
1193 case SYSCALL_SUSPEND:
1194 TRACE(KERNEL, SC_SUSPEND, 0);
1195 retval = sys_suspend((bool)arg0);
1196 TRACE(KERNEL, SC_SUSPEND, 1);
1199 case SYSCALL_GET_ABS_TIME:
1200 retval = sys_get_absolute_time();
1205 case DEBUG_CONTEXT_COUNTER_RESET:
1206 dispatch_csc_reset();
1209 case DEBUG_CONTEXT_COUNTER_READ:
1210 retval.value = dispatch_get_csc();
1213 case DEBUG_TIMESLICE_COUNTER_READ:
1214 retval.value = kernel_now;
1217 case DEBUG_FLUSH_CACHE:
1221 case DEBUG_SEND_IPI:
1222 apic_send_std_ipi(arg1, args[0], args[1]);
1225 case DEBUG_SET_BREAKPOINT:
1226 debugregs_set_breakpoint(arg1, args[0], args[1]);
1229 case DEBUG_GET_TSC_PER_MS:
1230 retval.value = timing_get_tsc_per_ms();
1233 case DEBUG_GET_APIC_TIMER:
1234 retval.value = apic_timer_get_count();
1237 case DEBUG_GET_APIC_TICKS_PER_SEC:
1238 retval.value = timing_get_apic_ticks_per_sec();
1241 case DEBUG_TRACE_PMEM_CTRL:
1242 #ifdef TRACE_PMEM_CAPS
1244 caps_trace_ctrl(arg1, args[0], args[1]);
1246 caps_trace_ctrl(arg1, 0, 0);
1250 retval.error = SYS_ERR_OK;
1254 case DEBUG_GET_APIC_ID:
1255 retval.value = apic_get_id();
1259 printk(LOG_ERR, "invalid sys_debug msg type\n");
1263 case SYSCALL_DEBUG_PRINT_CAPABILITIES: {
1264 retval = sys_debug_print_capabilities();
1269 printk(LOG_ERR, "sys_syscall: Illegal system call! "
1270 "(0x%lx, 0x%lx, 0x%lx)\n", syscall, arg0, arg1);
1271 retval.error = SYS_ERR_ILLEGAL_SYSCALL;
1275 // If dcb_current got removed, dispatch someone else
1276 if (dcb_current == NULL) {
1277 assert(err_is_ok(retval.error));
1278 dispatch(schedule());
1281 if (syscall == SYSCALL_INVOKE) {
1282 debug(SUBSYS_SYSCALL, "invoke returning 0x%lx 0x%lx\n",
1283 retval.error, retval.value);