T266: Resizing root cnode enabled for libmm slot allocator
[barrelfish] / kernel / arch / x86_64 / syscall.c
1 /**
2  * \file
3  * \brief System calls implementation.
4  */
5
6 /*
7  * Copyright (c) 2007, 2008, 2009, 2010, 2012, ETH Zurich.
8  * All rights reserved.
9  *
10  * This file is distributed under the terms in the attached LICENSE file.
11  * If you do not find this file, copies can be found by writing to:
12  * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13  */
14
15 #include <kernel.h>
16 #include <kcb.h>
17 #include <sys_debug.h>
18 #include <syscall.h>
19 #include <barrelfish_kpi/syscalls.h>
20 #include <mdb/mdb.h>
21 #include <mdb/mdb_tree.h>
22 #include <dispatch.h>
23 #include <paging_kernel_arch.h>
24 #include <paging_generic.h>
25 #include <exec.h>
26 #include <fpu.h>
27 #include <arch/x86/x86.h>
28 #include <arch/x86/apic.h>
29 #include <arch/x86/global.h>
30 #include <arch/x86/perfmon.h>
31 #include <arch/x86/debugregs.h>
32 #include <arch/x86/syscall.h>
33 #include <arch/x86/timing.h>
34 #include <arch/x86/ipi_notify.h>
35 #include <barrelfish_kpi/sys_debug.h>
36 #include <barrelfish_kpi/lmp.h>
37 #include <barrelfish_kpi/dispatcher_shared_target.h>
38 #include <barrelfish_kpi/platform.h>
39 #include <trace/trace.h>
40 #include <useraccess.h>
41 #ifndef __k1om__
42 #include <vmkit.h>
43 #include <dev/amd_vmcb_dev.h>
44 #endif
45
46 #define MIN(a,b)        ((a) < (b) ? (a) : (b))
47
48 extern uint64_t user_stack_save;
49
50 /* FIXME: lots of missing argument checks in this function */
51 static struct sysret handle_dispatcher_setup(struct capability *to,
52                                              int cmd, uintptr_t *args)
53 {
54     capaddr_t cptr      = args[0];
55     uint8_t   level     = args[1];
56     capaddr_t vptr      = args[2] & 0xffffffff;
57     capaddr_t dptr      = args[3] & 0xffffffff;
58     bool      run       = args[4];
59     capaddr_t odptr     = args[5] & 0xffffffff;
60
61     TRACE(KERNEL, SC_DISP_SETUP, 0);
62     struct sysret sr = sys_dispatcher_setup(to, cptr, level, vptr, dptr, run, odptr);
63     TRACE(KERNEL, SC_DISP_SETUP, 1);
64     return sr;
65 }
66
67 static struct sysret handle_dispatcher_properties(struct capability *to,
68                                                   int cmd, uintptr_t *args)
69 {
70     enum task_type type = args[0];
71     unsigned long deadline = args[1];
72     unsigned long wcet = args[2];
73     unsigned long period = args[3];
74     unsigned long release = args[4];
75     unsigned short weight = args[5];
76
77     TRACE(KERNEL, SC_DISP_PROPS, 0);
78     struct sysret sr = sys_dispatcher_properties(to, type, deadline, wcet, period,
79                                                  release, weight);
80     TRACE(KERNEL, SC_DISP_PROPS, 1);
81     return sr;
82 }
83
84 static struct sysret handle_retype_common(struct capability *root,
85                                           uintptr_t *args,
86                                           bool from_monitor)
87 {
88     capaddr_t source_croot    = args[0] >> 32;
89     capaddr_t source_cptr     = args[0] & 0xffffffff;
90     uint64_t offset           = args[1];
91     uint64_t type             = args[2];
92     uint64_t objsize          = args[3];
93     uint64_t objcount         = args[4];
94     capaddr_t dest_cspace_cptr= args[5] >> 32;
95     capaddr_t dest_cnode_cptr = args[5] & 0xffffffff;
96     uint64_t dest_slot        = args[6];
97     uint64_t dest_cnode_level = args[7];
98
99     TRACE(KERNEL, SC_RETYPE, 0);
100     struct sysret sr = sys_retype(root, source_croot, source_cptr, offset, type,
101                                   objsize, objcount, dest_cspace_cptr,
102                                   dest_cnode_cptr, dest_cnode_level,
103                                   dest_slot, from_monitor);
104     TRACE(KERNEL, SC_RETYPE, 1);
105     return sr;
106 }
107
108 static struct sysret handle_retype(struct capability *root,
109                                    int cmd, uintptr_t *args)
110 {
111         return handle_retype_common(root, args, false);
112 }
113
114 static struct sysret handle_create(struct capability *root,
115                                    int cmd, uintptr_t *args)
116 {
117     /* Retrieve arguments */
118     enum objtype type         = args[0];
119     size_t objsize            = args[1];
120     capaddr_t dest_cnode_cptr = args[2];
121     uint8_t dest_level        = args[3];
122     cslot_t dest_slot         = args[4];
123
124     TRACE(KERNEL, SC_CREATE, 0);
125     struct sysret sr = sys_create(root, type, objsize, dest_cnode_cptr,
126                                   dest_level, dest_slot);
127     TRACE(KERNEL, SC_CREATE, 1);
128     return sr;
129 }
130
131
132 static struct sysret handle_map(struct capability *ptable,
133                                 int cmd, uintptr_t *args)
134 {
135     /* Retrieve arguments */
136     uint64_t  slot            = args[0];
137     capaddr_t source_root_cptr= args[1] >> 32;
138     capaddr_t source_cptr     = args[1] & 0xffffffff;
139     uint8_t   source_level    = args[2];
140     uint64_t  flags           = args[3];
141     uint64_t  offset          = args[4];
142     uint64_t  pte_count       = args[5];
143     capaddr_t mapping_croot   = args[6] >> 32;
144     capaddr_t mapping_cnptr   = args[6] & 0xffffffff;
145     uint8_t   mapping_cn_level= args[7];
146     cslot_t   mapping_slot    = args[8];
147
148     TRACE(KERNEL, SC_MAP, 0);
149     struct sysret sr = sys_map(ptable, slot, source_root_cptr, source_cptr,
150                                source_level, flags, offset, pte_count,
151                                mapping_croot, mapping_cnptr,
152                                mapping_cn_level, mapping_slot);
153     TRACE(KERNEL, SC_MAP, 1);
154     return sr;
155 }
156
157 /**
158  * Common code for copying and minting except the mint flag and param passing
159  */
160 static struct sysret copy_or_mint(struct capability *root,
161                                   uintptr_t *args, bool mint)
162 {
163     /* Retrieve arguments */
164     capaddr_t dest_cspace_cptr = args[0];
165     capaddr_t destcn_cptr      = args[1];
166     uint64_t  dest_slot        = args[2];
167     capaddr_t source_croot_ptr = args[3];
168     capaddr_t source_cptr      = args[4];
169     uint8_t destcn_level       = args[5];
170     uint8_t source_level       = args[6];
171     uint64_t param1, param2;
172     // params only sent if mint operation
173     if (mint) {
174         param1 = args[7];
175         param2 = args[8];
176     } else {
177         param1 = param2 = 0;
178     }
179
180     TRACE(KERNEL, SC_COPY_OR_MINT, 0);
181     struct sysret sr = sys_copy_or_mint(root, dest_cspace_cptr, destcn_cptr, dest_slot,
182                                         source_croot_ptr, source_cptr,
183                                         destcn_level, source_level,
184                                         param1, param2, mint);
185     TRACE(KERNEL, SC_COPY_OR_MINT, 1);
186     return sr;
187 }
188
189 static struct sysret handle_mint(struct capability *root,
190                                  int cmd, uintptr_t *args)
191 {
192     return copy_or_mint(root, args, true);
193 }
194
195 static struct sysret handle_copy(struct capability *root,
196                                  int cmd, uintptr_t *args)
197 {
198     return copy_or_mint(root, args, false);
199 }
200
201 static struct sysret handle_delete(struct capability *root,
202                                    int cmd, uintptr_t *args)
203 {
204     capaddr_t cptr = args[0];
205     uint8_t level  = args[1];
206     return sys_delete(root, cptr, level);
207 }
208
209 static struct sysret handle_revoke(struct capability *root,
210                                    int cmd, uintptr_t *args)
211 {
212     capaddr_t cptr = args[0];
213     uint8_t level  = args[1];
214     return sys_revoke(root, cptr, level);
215 }
216
217 static struct sysret handle_get_state(struct capability *root,
218                                       int cmd, uintptr_t *args)
219 {
220     capaddr_t cptr = args[0];
221     uint8_t level  = args[1];
222     return sys_get_state(root, cptr, level);
223 }
224
225 static struct sysret handle_resize(struct capability *root,
226                                    int cmd, uintptr_t *args)
227 {
228     capaddr_t newroot_ptr = args[0];
229     capaddr_t retcn_ptr   = args[1];
230     cslot_t   retslot     = args[2];
231     return sys_resize_l1cnode(root, newroot_ptr, retcn_ptr, retslot);
232 }
233
234 #if 1
235 static struct sysret handle_cnode_cmd_obsolete(struct capability *root,
236                                                int cmd, uintptr_t *args)
237 {
238     panic("Trying to invoke GPT CNode: command %d", cmd);
239     return SYSRET(LIB_ERR_NOT_IMPLEMENTED);
240 }
241 #endif
242
243 static struct sysret handle_unmap(struct capability *pgtable,
244                                   int cmd, uintptr_t *args)
245 {
246     capaddr_t cptr = args[0];
247     uint8_t level  = args[1];
248
249     errval_t err;
250     struct cte *mapping;
251     err = caps_lookup_slot_2(&dcb_current->cspace.cap, cptr, level,
252                              &mapping, CAPRIGHTS_READ_WRITE);
253     if (err_is_fail(err)) {
254         return SYSRET(err_push(err, SYS_ERR_CAP_NOT_FOUND));
255     }
256
257     TRACE(KERNEL, SC_UNMAP, 0);
258     err = page_mappings_unmap(pgtable, mapping);
259     TRACE(KERNEL, SC_UNMAP, 1);
260     return SYSRET(err);
261 }
262
263 static struct sysret handle_mapping_destroy(struct capability *mapping,
264                                             int cmd, uintptr_t *args)
265 {
266     panic("NYI!");
267     return SYSRET(SYS_ERR_OK);
268 }
269
270 static struct sysret handle_mapping_modify(struct capability *mapping,
271                                            int cmd, uintptr_t *args)
272 {
273     // Modify flags of (part of) mapped region of frame
274     assert(type_is_mapping(mapping->type));
275
276     // unpack arguments
277     size_t offset = args[0]; // in pages; of first page to modify from first
278                              // page in mapped region
279     size_t pages  = args[1]; // #pages to modify
280     size_t flags  = args[2]; // new flags
281     genvaddr_t va = args[3]; // virtual addr hint
282
283     errval_t err = page_mappings_modify_flags(mapping, offset, pages, flags, va);
284
285     return (struct sysret) {
286         .error = err,
287         .value = 0,
288     };
289 }
290
291 /// Different handler for cap operations performed by the monitor
292 static struct sysret monitor_handle_retype(struct capability *kernel_cap,
293                                            int cmd, uintptr_t *args)
294 {
295     errval_t err;
296
297     capaddr_t root_caddr = args[0] & 0xFFFFFFFF;
298     capaddr_t root_level = (args[0] >> 32);
299
300     struct capability *root;
301     err = caps_lookup_cap_2(&dcb_current->cspace.cap, root_caddr, root_level,
302                             &root, CAPRIGHTS_READ);
303     if (err_is_fail(err)) {
304         return SYSRET(err_push(err, SYS_ERR_ROOT_CAP_LOOKUP));
305     }
306
307     /* This hides the first argument, which is resolved here and passed as 'root' */
308     return handle_retype_common(root, &args[1], true);
309 }
310
311 static struct sysret monitor_handle_has_descendants(struct capability *kernel_cap,
312                                                     int cmd, uintptr_t *args)
313 {
314     struct capability *src = (struct capability *)args;
315
316     struct cte *next = mdb_find_greater(src, false);
317
318     return (struct sysret) {
319         .error = SYS_ERR_OK,
320         .value = (next && is_ancestor(&next->cap, src)),
321     };
322 }
323
324 static struct sysret monitor_handle_delete_last(struct capability *kernel_cap,
325                                                 int cmd, uintptr_t *args)
326 {
327     capaddr_t root_caddr   = args[0];
328     uint8_t root_level     = args[1];
329     capaddr_t target_caddr = args[2];
330     uint8_t target_level   = args[3];
331     capaddr_t retcn_caddr  = args[4];
332     uint8_t retcn_level    = args[5];
333     cslot_t ret_slot       = args[6];
334
335     return sys_monitor_delete_last(root_caddr, root_level, target_caddr,
336                                    target_level, retcn_caddr, retcn_level,
337                                    ret_slot);
338 }
339
340 static struct sysret monitor_handle_delete_foreigns(struct capability *kernel_cap,
341                                                     int cmd, uintptr_t *args)
342 {
343     capaddr_t caddr = args[0];
344     uint8_t level   = args[1];
345     return sys_monitor_delete_foreigns(caddr, level);
346 }
347
348 static struct sysret monitor_handle_revoke_mark_tgt(struct capability *kernel_cap,
349                                                     int cmd, uintptr_t *args)
350 {
351     capaddr_t root_caddr   = args[0];
352     uint8_t root_level     = args[1];
353     capaddr_t target_caddr = args[2];
354     uint8_t target_level   = args[3];
355
356     return sys_monitor_revoke_mark_tgt(root_caddr, root_level,
357                                        target_caddr, target_level);
358 }
359
360 static struct sysret monitor_handle_revoke_mark_rels(struct capability *kernel_cap,
361                                                      int cmd, uintptr_t *args)
362 {
363     struct capability *base = (struct capability*)args;
364
365     return sys_monitor_revoke_mark_rels(base);
366 }
367
368 static struct sysret monitor_handle_delete_step(struct capability *kernel_cap,
369                                                 int cmd, uintptr_t *args)
370 {
371     capaddr_t ret_cn_addr  = args[0];
372     capaddr_t ret_cn_level = args[1];
373     capaddr_t ret_slot     = args[2];
374
375     return sys_monitor_delete_step(ret_cn_addr, ret_cn_level, ret_slot);
376 }
377
378 static struct sysret monitor_handle_clear_step(struct capability *kernel_cap,
379                                                int cmd, uintptr_t *args)
380 {
381     capaddr_t ret_cn_addr  = args[0];
382     capaddr_t ret_cn_level = args[1];
383     capaddr_t ret_slot     = args[2];
384
385     return sys_monitor_clear_step(ret_cn_addr, ret_cn_level, ret_slot);
386 }
387
388 static struct sysret monitor_handle_register(struct capability *kernel_cap,
389                                              int cmd, uintptr_t *args)
390 {
391     capaddr_t ep_caddr = args[0];
392
393     TRACE(KERNEL, SC_MONITOR_REGISTER, 0);
394     struct sysret sr = sys_monitor_register(ep_caddr);
395     TRACE(KERNEL, SC_MONITOR_REGISTER, 1);
396     return sr;
397 }
398
399 static struct sysret monitor_get_core_id(struct capability *kernel_cap,
400                                          int cmd, uintptr_t *args)
401 {
402     return (struct sysret){.error = SYS_ERR_OK, .value = my_core_id};
403 }
404
405 static struct sysret monitor_get_arch_id(struct capability *kernel_cap,
406                                          int cmd, uintptr_t *args)
407 {
408     return (struct sysret){.error = SYS_ERR_OK, .value = apic_id};
409 }
410
411 static struct sysret monitor_identify_cap_common(struct capability *kernel_cap,
412                                                  struct capability *root,
413                                                  uintptr_t *args)
414 {
415     capaddr_t cptr = args[0];
416     uint8_t level  = args[1];
417
418     struct capability *retbuf = (void *)args[2];
419
420     return sys_monitor_identify_cap(root, cptr, level, retbuf);
421 }
422
423 static struct sysret monitor_identify_cap(struct capability *kernel_cap,
424                                           int cmd, uintptr_t *args)
425 {
426     return monitor_identify_cap_common(kernel_cap, &dcb_current->cspace.cap, args);
427 }
428
429 static struct sysret monitor_identify_domains_cap(struct capability *kernel_cap,
430                                                   int cmd, uintptr_t *args)
431 {
432     errval_t err;
433
434     capaddr_t root_caddr = args[0];
435     capaddr_t root_level = args[1];
436
437     struct capability *root;
438     err = caps_lookup_cap_2(&dcb_current->cspace.cap, root_caddr, root_level,
439                             &root, CAPRIGHTS_READ);
440
441     if (err_is_fail(err)) {
442         return SYSRET(err_push(err, SYS_ERR_ROOT_CAP_LOOKUP));
443     }
444
445     /* XXX: this hides the first two arguments */
446     return monitor_identify_cap_common(kernel_cap, root, &args[2]);
447 }
448
449 static struct sysret monitor_cap_has_relations(struct capability *kernel_cap,
450                                                int cmd, uintptr_t *args)
451 {
452     capaddr_t caddr = args[0];
453     uint8_t level   = args[1];
454     uint8_t mask    = args[2];
455
456     return sys_cap_has_relations(caddr, level, mask);
457 }
458
459 static struct sysret monitor_remote_relations(struct capability *kernel_cap,
460                                               int cmd, uintptr_t *args)
461 {
462     capaddr_t root_addr = args[0];
463     int root_level      = args[1];
464     capaddr_t cptr      = args[2];
465     int level           = args[3];
466     uint8_t relations   = args[4]        & 0xFF;
467     uint8_t mask        = (args[4] >> 8) & 0xFF;
468
469     return sys_monitor_remote_relations(root_addr, root_level, cptr, level,
470                                         relations, mask);
471 }
472
473
474 static struct sysret monitor_create_cap(struct capability *kernel_cap,
475                                         int cmd, uintptr_t *args)
476 {
477     /* XXX: Get the raw metadata of the capability to create */
478     struct capability *src = (struct capability *)args;
479     int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
480
481     /* Cannot create null caps */
482     if (src->type == ObjType_Null) {
483         return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
484     }
485
486     coreid_t owner = args[pos + 3];
487
488     /* For certain types, only foreign copies can be created here */
489     if ((src->type == ObjType_EndPoint || src->type == ObjType_Dispatcher
490          || src->type == ObjType_Kernel || src->type == ObjType_IRQTable)
491         && owner == my_core_id)
492     {
493         return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
494     }
495
496     /* Create the cap in the destination */
497     capaddr_t cnode_cptr = args[pos];
498     int cnode_level      = args[pos + 1];
499     size_t slot          = args[pos + 2];
500     assert(cnode_level <= 2);
501
502     return SYSRET(caps_create_from_existing(&dcb_current->cspace.cap,
503                                             cnode_cptr, cnode_level,
504                                             slot, owner, src));
505 }
506
507 static struct sysret monitor_copy_existing(struct capability *kernel_cap,
508                                         int cmd, uintptr_t *args)
509 {
510     /* XXX: Get the raw metadata of the capability to create */
511     struct capability *src = (struct capability *)args;
512     int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
513
514     capaddr_t cnode_cptr = args[pos];
515     int cnode_level    = args[pos + 1];
516     size_t slot        = args[pos + 2];
517
518     return sys_monitor_copy_existing(src, cnode_cptr, cnode_level, slot);
519 }
520
521 static struct sysret monitor_nullify_cap(struct capability *kernel_cap,
522                                          int cmd, uintptr_t *args)
523 {
524     capaddr_t cptr = args[0];
525     uint8_t level  = args[1];
526
527     return sys_monitor_nullify_cap(cptr, level);
528 }
529
530 static struct sysret monitor_handle_sync_timer(struct capability *kern_cap,
531                                                int cmd, uintptr_t *args)
532 {
533     uint64_t synctime = args[0];
534     return sys_monitor_handle_sync_timer(synctime);
535 }
536
537 static struct sysret monitor_get_platform(struct capability *kern_cap,
538                                           int cmd, uintptr_t *args)
539 {
540     if (!access_ok(ACCESS_WRITE, args[0], sizeof(struct platform_info))) {
541         return SYSRET(SYS_ERR_INVALID_USER_BUFFER);
542     }
543     struct platform_info *pi = (struct platform_info *)args[0];
544     // x86: only have PC as platform
545     pi->arch = PI_ARCH_X86;
546     pi->platform = PI_PLATFORM_PC;
547     return SYSRET(SYS_ERR_OK);
548 }
549
550 static struct sysret handle_frame_identify(struct capability *to,
551                                            int cmd, uintptr_t *args)
552 {
553     // Return with physical base address of frame
554     assert(to->type == ObjType_Frame || to->type == ObjType_DevFrame ||
555            to->type == ObjType_RAM);
556     assert((get_address(to) & BASE_PAGE_MASK) == 0);
557
558     struct frame_identity *fi = (struct frame_identity *)args[0];
559
560     if (!access_ok(ACCESS_WRITE, (lvaddr_t)fi, sizeof(struct frame_identity))) {
561         return SYSRET(SYS_ERR_INVALID_USER_BUFFER);
562     }
563
564     fi->base = get_address(to);
565     fi->bytes = get_size(to);
566
567     return SYSRET(SYS_ERR_OK);
568 }
569
570 static struct sysret handle_vnode_identify(struct capability *to,
571                                            int cmd, uintptr_t *args)
572 {
573     // Return with physical base address of the VNode
574     assert(to->type == ObjType_VNode_x86_64_pml4 ||
575            to->type == ObjType_VNode_x86_64_pdpt ||
576            to->type == ObjType_VNode_x86_64_pdir ||
577            to->type == ObjType_VNode_x86_64_ptable);
578     
579     uint64_t base_addr = 0;
580     switch (to->type) {
581     case ObjType_VNode_x86_64_pml4:
582         base_addr = (uint64_t)(to->u.vnode_x86_64_pml4.base);
583         break;
584     case ObjType_VNode_x86_64_pdpt:
585         base_addr = (uint64_t)(to->u.vnode_x86_64_pdpt.base);
586         break;
587     case ObjType_VNode_x86_64_pdir:
588         base_addr = (uint64_t)(to->u.vnode_x86_64_pdir.base);
589         break;
590     case ObjType_VNode_x86_64_ptable:
591         base_addr = (uint64_t)(to->u.vnode_x86_64_ptable.base);
592         break;
593     default:
594         break;
595     }
596     assert((base_addr & BASE_PAGE_MASK) == 0);
597
598     return (struct sysret) {
599         .error = SYS_ERR_OK,
600         .value = (genpaddr_t)base_addr | ((uint8_t)to->type),
601     };
602 }
603
604
605 static struct sysret handle_io(struct capability *to, int cmd, uintptr_t *args)
606 {
607     uint64_t    port = args[0];
608     uint64_t    data = args[1]; // ignored for input
609
610     return sys_io(to, cmd, port, data);
611 }
612
613 static struct sysret handle_vmread(struct capability *to, 
614                                    int cmd, uintptr_t *args) 
615 {
616 #if defined(__k1om__) || defined(CONFIG_SVM)
617     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
618 #else
619     errval_t err;
620     struct dcb *dcb = to->u.dispatcher.dcb;
621     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
622     if (vmcs_base != vmptrst()) {
623         err = SYS_ERR_VMKIT_VMX_VMFAIL_INVALID;
624     } else {
625         err = vmread(args[0], (lvaddr_t *)args[1]);
626     }
627     return SYSRET(err);
628 #endif
629 }
630
631 static struct sysret handle_vmwrite(struct capability *to, 
632                                     int cmd, uintptr_t *args) 
633 {
634 #if defined(__k1om__) || defined(CONFIG_SVM)
635     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
636 #else
637     errval_t err;
638     struct dcb *dcb = to->u.dispatcher.dcb;
639     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
640     if (vmcs_base != vmptrst()) {
641         err = SYS_ERR_VMKIT_VMX_VMFAIL_INVALID;
642     } else {
643         err = vmwrite(args[0], args[1]);
644     }
645     return SYSRET(err);
646 #endif
647 }
648
649 static struct sysret handle_vmptrld(struct capability *to, 
650                                     int cmd, uintptr_t *args) 
651 {
652 #if defined(__k1om__) || defined(CONFIG_SVM)
653     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
654 #else
655     errval_t err;
656     struct dcb *dcb = to->u.dispatcher.dcb;
657     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
658     err = vmptrld(vmcs_base);
659     return SYSRET(err);
660 #endif
661 }
662
663 static struct sysret handle_vmclear(struct capability *to, 
664                                     int cmd, uintptr_t *args) 
665 {
666 #if defined(__k1om__) || defined(CONFIG_SVM)
667     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
668 #else
669     errval_t err;
670     struct dcb *dcb = to->u.dispatcher.dcb;
671     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
672     err = vmclear(vmcs_base);
673     return SYSRET(err);
674 #endif
675 }
676
677 #ifndef __k1om__
678 static struct sysret
679 handle_dispatcher_setup_guest (struct capability *to, int cmd, uintptr_t *args)
680 {
681     errval_t err;
682     struct dcb *dcb = to->u.dispatcher.dcb;
683
684     capaddr_t epp = args[0];
685     capaddr_t vnodep = args[1];
686     capaddr_t vmcbp = args[2];
687     capaddr_t ctrlp = args[3];
688
689     // 0. Enable VM extensions
690     err = vmkit_enable_virtualization();
691     if (err != SYS_ERR_OK) {
692         return SYSRET(err);
693     }
694
695     // 1. Check arguments
696     // Monitor endpoint for exits of this geust
697     struct cte *ep_cte;
698
699     err = caps_lookup_slot_2(&dcb_current->cspace.cap, epp, 2,
700                              &ep_cte, CAPRIGHTS_READ_WRITE);
701     if (err_is_fail(err)) {
702         return SYSRET(err);
703     }
704     if (ep_cte->cap.type != ObjType_EndPoint) {
705         return SYSRET(SYS_ERR_VMKIT_ENDPOINT_INVALID);
706     }
707     err = caps_copy_to_cte(&dcb->guest_desc.monitor_ep, ep_cte, false, 0, 0);
708     if (err_is_fail(err)) {
709         return SYSRET(err_push(err, SYS_ERR_VMKIT_ENDPOINT));
710     }
711
712     // Domain vspace
713     struct capability *vnode_cap;
714     err = caps_lookup_cap_2(&dcb_current->cspace.cap, vnodep, 2,
715                             &vnode_cap, CAPRIGHTS_WRITE);
716     if (err_is_fail(err)) {
717         return SYSRET(err);
718     }
719     if (vnode_cap->type != ObjType_VNode_x86_64_pml4) {
720         return SYSRET(SYS_ERR_DISP_VSPACE_INVALID);
721     }
722
723     assert(vnode_cap->type == ObjType_VNode_x86_64_pml4);
724
725     // VMCB
726     struct cte *vmcb_cte;
727     err = caps_lookup_slot_2(&dcb_current->cspace.cap, vmcbp, 2,
728                              &vmcb_cte, CAPRIGHTS_READ_WRITE);
729     if (err_is_fail(err)) {
730         return SYSRET(err);
731     }
732     if (vmcb_cte->cap.type != ObjType_Frame ||
733         vmcb_cte->cap.u.frame.bytes < BASE_PAGE_SIZE) {
734         return SYSRET(SYS_ERR_VMKIT_VMCB_INVALID);
735     }
736     err = caps_copy_to_cte(&dcb->guest_desc.vmcb, vmcb_cte, false, 0, 0);
737     if (err_is_fail(err)) {
738         return SYSRET(err_push(err, SYS_ERR_VMKIT_VMCB));
739     }
740
741     // guest control
742     struct cte *ctrl_cte;
743     err = caps_lookup_slot_2(&dcb_current->cspace.cap, ctrlp, 2,
744                              &ctrl_cte, CAPRIGHTS_READ_WRITE);
745     if (err_is_fail(err)) {
746         return SYSRET(err);
747     }
748     if (ctrl_cte->cap.type != ObjType_Frame ||
749         ctrl_cte->cap.u.frame.bytes < BASE_PAGE_SIZE) {
750         return SYSRET(SYS_ERR_VMKIT_CTRL_INVALID);
751     }
752     err = caps_copy_to_cte(&dcb->guest_desc.ctrl, ctrl_cte, false, 0, 0);
753     if (err_is_fail(err)) {
754         return SYSRET(err_push(err, SYS_ERR_VMKIT_CTRL));
755     }
756
757 #ifndef CONFIG_SVM
758     // Initialize VMCS for the single virtual-CPU here instead of in 
759     // userspace, where the privilege level is not 0.
760     err = initialize_vmcs(vmcb_cte->cap.u.frame.base);
761     assert(err_is_ok(err));
762 #endif
763
764     // 2. Set up the target DCB
765 /*     dcb->guest_desc.monitor_ep = ep_cap; */
766     dcb->vspace = vnode_cap->u.vnode_x86_64_pml4.base;
767     dcb->is_vm_guest = true;
768 /*     dcb->guest_desc.vmcb = vmcb_cap->u.frame.base; */
769 /*     dcb->guest_desc.ctrl = (void *)x86_64_phys_to_mem(ctrl_cap->u.frame.base); */
770
771     return SYSRET(SYS_ERR_OK);
772 }
773 #endif
774
775 static struct sysret monitor_handle_domain_id(struct capability *monitor_cap,
776                                               int cmd, uintptr_t *args)
777 {
778     capaddr_t cptr = args[0];
779     domainid_t domain_id = args[1];
780
781     return sys_monitor_domain_id(cptr, domain_id);
782 }
783
784 static struct sysret monitor_get_cap_owner(struct capability *monitor_cap,
785                                            int cmd, uintptr_t *args)
786 {
787     capaddr_t root_addr = args[0];
788     uint8_t root_level = args[1];
789     capaddr_t cptr = args[2];
790     uint8_t level = args[3];
791
792     return sys_get_cap_owner(root_addr, root_level, cptr, level);
793 }
794
795 static struct sysret monitor_set_cap_owner(struct capability *monitor_cap,
796                                            int cmd, uintptr_t *args)
797 {
798     capaddr_t root_addr = args[0];
799     uint8_t root_level = args[1];
800     capaddr_t cptr = args[2];
801     uint8_t level = args[3];
802     coreid_t owner = args[4];
803
804     return sys_set_cap_owner(root_addr, root_level, cptr, level, owner);
805 }
806
807 static struct sysret monitor_lock_cap(struct capability *monitor_cap,
808                                       int cmd, uintptr_t *args)
809 {
810     capaddr_t root_addr = args[0];
811     uint8_t root_level = args[1];
812     capaddr_t cptr = args[2];
813     uint8_t level = args[3];
814
815     return sys_lock_cap(root_addr, root_level, cptr, level);
816 }
817
818 static struct sysret monitor_unlock_cap(struct capability *monitor_cap,
819                                         int cmd, uintptr_t *args)
820 {
821     capaddr_t root_addr = args[0];
822     uint8_t root_level = args[1];
823     capaddr_t cptr = args[2];
824     uint8_t level = args[3];
825
826     return sys_unlock_cap(root_addr, root_level, cptr, level);
827 }
828
829 /**
830  * \brief Set up tracing in the kernel
831  */
832 static struct sysret handle_trace_setup(struct capability *cap,
833                                         int cmd, uintptr_t *args)
834 {
835     struct capability *frame;
836     errval_t err;
837
838     /* lookup passed cap */
839     capaddr_t cptr = args[0];
840     err = caps_lookup_cap_2(&dcb_current->cspace.cap, cptr, 2, &frame,
841                             CAPRIGHTS_READ_WRITE);
842     if (err_is_fail(err)) {
843         return SYSRET(err);
844     }
845
846     lpaddr_t lpaddr = gen_phys_to_local_phys(frame->u.frame.base);
847     kernel_trace_buf = local_phys_to_mem(lpaddr);
848     //printf("kernel.%u: handle_trace_setup at %lx\n", apic_id, kernel_trace_buf);
849
850     // Copy boot applications.
851     trace_copy_boot_applications();
852
853     return SYSRET(SYS_ERR_OK);
854 }
855
856 static struct sysret handle_irqsrc_get_vector(struct capability * to, int cmd,
857         uintptr_t *args)
858 {
859     struct sysret ret;
860     ret.error = SYS_ERR_OK;
861     ret.value = to->u.irqsrc.vector;
862     return ret;
863
864 }
865
866
867 static struct sysret handle_irqdest_get_vector(struct capability *to, int cmd,
868                                             uintptr_t *args)
869 {
870     struct sysret ret;
871     ret.error = SYS_ERR_OK;
872     ret.value = to->u.irqdest.vector;
873     return ret;
874 }
875
876 static struct sysret handle_irqdest_connect(struct capability *to, int cmd,
877                                             uintptr_t *args)
878 {
879     return SYSRET(irq_connect(to, args[0]));
880 }
881
882 static struct sysret handle_irq_table_alloc(struct capability *to, int cmd,
883                                             uintptr_t *args)
884 {
885     struct sysret ret;
886     int outvec;
887     ret.error = irq_table_alloc(&outvec);
888     ret.value = outvec;
889     return ret;
890 }
891
892 static struct sysret handle_irq_table_alloc_dest_cap(struct capability *to, int cmd,
893                                             uintptr_t *args)
894 {
895     return SYSRET(irq_table_alloc_dest_cap(args[0],args[1],args[2]));
896 }
897
898
899 static struct sysret handle_irq_table_set(struct capability *to, int cmd,
900                                           uintptr_t *args)
901 {
902     return SYSRET(irq_table_set(args[0], args[1]));
903 }
904
905 static struct sysret handle_irq_table_delete(struct capability *to, int cmd,
906                                              uintptr_t *args)
907 {
908     return SYSRET(irq_table_delete(args[0]));
909 }
910
911 static struct sysret handle_ipi_notify_send(struct capability *cap,
912                                             int cmd, uintptr_t *args)
913 {
914     assert(cap->type == ObjType_Notify_IPI);
915     return ipi_raise_notify(cap->u.notify_ipi.coreid, cap->u.notify_ipi.chanid);
916 }
917
918 static struct sysret kernel_ipi_register(struct capability *cap,
919                                          int cmd, uintptr_t *args)
920 {
921     assert(cap->type == ObjType_Kernel);
922     capaddr_t ep = args[0];
923     int chanid = args[1];
924     return SYSRET(ipi_register_notification(ep, chanid));
925 }
926
927 static struct sysret kernel_ipi_delete(struct capability *cap,
928                                        int cmd, uintptr_t *args)
929 {
930     assert(cap->type == ObjType_Kernel);
931     assert(!"NYI");
932     return SYSRET(SYS_ERR_OK);
933 }
934
935 static struct sysret dispatcher_dump_ptables(struct capability *cap,
936                                              int cmd, uintptr_t *args)
937 {
938     assert(cap->type == ObjType_Dispatcher);
939
940     printf("kernel_dump_ptables\n");
941
942     struct dcb *dispatcher = cap->u.dispatcher.dcb;
943
944     paging_dump_tables(dispatcher);
945
946     return SYSRET(SYS_ERR_OK);
947 }
948
949 static struct sysret dispatcher_dump_capabilities(struct capability *cap,
950                                              int cmd, uintptr_t *args)
951 {
952     assert(cap->type == ObjType_Dispatcher);
953
954     printf("dispatcher_dump_capabilities\n");
955
956     struct dcb *dispatcher = cap->u.dispatcher.dcb;
957
958     errval_t err = debug_print_cababilities(dispatcher);
959
960     return SYSRET(err);
961 }
962
963 /*
964  * \brief Activate performance monitoring
965  *
966  * Activates performance monitoring.
967  * \param xargs Expected parameters in args:
968  * - performance monitoring type
969  * - mask for given type
970  * - Counter id
971  * - Also count in privileged mode
972  * - Number of counts before overflow. This parameter may be used to
973  *   set tradeoff between accuracy and overhead. Set the counter to 0
974  *   to deactivate the usage of APIC.
975  * - Endpoint capability to be invoked when the counter overflows.
976  *   The buffer associated with the endpoint needs to be large enough
977  *   to hold several overflow notifications depending on the overflow
978  *   frequency.
979  */
980 static struct sysret performance_counter_activate(struct capability *cap,
981                                                   int cmd, uintptr_t *args)
982 {
983     uint8_t event = args[0];
984     uint8_t umask = args[1];
985     uint8_t counter_id = args[2];
986     bool kernel = args[3];
987     uint64_t counter_value = args[4];
988     capaddr_t ep_addr = args[5];
989
990     errval_t err;
991     struct capability *ep;
992     extern struct capability perfmon_callback_ep;
993
994     // Make sure that
995     assert(ep_addr!=0 || counter_value==0);
996
997     perfmon_init();
998     perfmon_measure_start(event, umask, counter_id, kernel, counter_value);
999
1000     if(ep_addr!=0) {
1001
1002         err = caps_lookup_cap_2(&dcb_current->cspace.cap, ep_addr, 2, &ep,
1003                                 CAPRIGHTS_READ);
1004         if(err_is_fail(err)) {
1005             return SYSRET(err);
1006         }
1007
1008         perfmon_callback_ep = *ep;
1009     }
1010
1011     return SYSRET(SYS_ERR_OK);
1012 }
1013
1014 /*
1015  * \brief Write counter values.
1016  */
1017 static struct sysret performance_counter_write(struct capability *cap,
1018                                                int cmd, uintptr_t *args)
1019 {
1020     uint8_t counter_id = args[0];
1021     uint64_t counter_value = args[1];
1022
1023     perfmon_measure_write(counter_id, counter_value);
1024     return SYSRET(SYS_ERR_OK);
1025 }
1026
1027 /*
1028  * \brief Deactivate performance counters again.
1029  */
1030 static struct sysret performance_counter_deactivate(struct capability *cap,
1031                                                   int cmd, uintptr_t *args)
1032 {
1033     perfmon_measure_stop();
1034     return SYSRET(SYS_ERR_OK);
1035 }
1036
1037 /*
1038  * \brief Return system-wide unique ID of this ID cap.
1039  */
1040 static struct sysret handle_idcap_identify(struct capability *cap, int cmd,
1041                                            uintptr_t *args)
1042 {
1043     idcap_id_t id;
1044     struct sysret sysret = sys_idcap_identify(cap, &id);
1045     sysret.value = id;
1046
1047     return sysret;
1048 }
1049
1050 static struct sysret kernel_send_init_ipi(struct capability *cap, int cmd,
1051                                           uintptr_t *args)
1052 {
1053     coreid_t destination = args[0];
1054 //    printk(LOG_DEBUG, "%s:%s:%d: destination=%"PRIuCOREID"\n",
1055 //           __FILE__, __FUNCTION__, __LINE__, destination);
1056
1057     apic_send_init_assert(destination, xapic_none);
1058     apic_send_init_deassert();
1059
1060     return SYSRET(SYS_ERR_OK);
1061 }
1062
1063 static struct sysret kernel_send_start_ipi(struct capability *cap,
1064                                            int cmd,
1065                                            uintptr_t *args)
1066 {
1067     coreid_t destination = args[0];
1068     genvaddr_t start_vector = X86_64_REAL_MODE_SEGMENT_TO_REAL_MODE_PAGE(X86_64_REAL_MODE_SEGMENT);
1069 //    printk(LOG_DEBUG, "%s:%d: destination=%"PRIuCOREID" start_vector=%"PRIxGENVADDR"\n",
1070 //           __FILE__, __LINE__, destination, start_vector);
1071
1072     apic_send_start_up(destination, xapic_none, start_vector);
1073
1074     return SYSRET(SYS_ERR_OK);
1075 }
1076
1077 static struct sysret kernel_get_global_phys(struct capability *cap,
1078                                            int cmd,
1079                                            uintptr_t *args)
1080 {
1081
1082     struct sysret sysret;
1083     sysret.value = mem_to_local_phys((lvaddr_t)global);
1084     sysret.error = SYS_ERR_OK;
1085
1086     return sysret;
1087 }
1088
1089 static struct sysret kernel_add_kcb(struct capability *kern_cap,
1090                                     int cmd, uintptr_t *args)
1091 {
1092     uintptr_t kcb_addr = args[0];
1093     struct kcb *new_kcb = (struct kcb *)kcb_addr;
1094
1095     return sys_kernel_add_kcb(new_kcb);
1096 }
1097
1098 static struct sysret kernel_remove_kcb(struct capability *kern_cap,
1099                                        int cmd, uintptr_t *args)
1100 {
1101     printk(LOG_NOTE, "in kernel_remove_kcb invocation!\n");
1102     uintptr_t kcb_addr = args[0];
1103     struct kcb *to_remove = (struct kcb *)kcb_addr;
1104
1105     return sys_kernel_remove_kcb(to_remove);
1106 }
1107
1108 static struct sysret kernel_suspend_kcb_sched(struct capability *kern_cap,
1109                                               int cmd, uintptr_t *args)
1110 {
1111     printk(LOG_NOTE, "in kernel_suspend_kcb_sched invocation!\n");
1112     return sys_kernel_suspend_kcb_sched((bool)args[0]);
1113 }
1114
1115 static struct sysret handle_kcb_identify(struct capability *to,
1116                                          int cmd, uintptr_t *args)
1117 {
1118     return sys_handle_kcb_identify(to, (struct frame_identity *)args[0]);
1119 }
1120
1121
1122 typedef struct sysret (*invocation_handler_t)(struct capability *to,
1123                                               int cmd, uintptr_t *args);
1124
1125 static invocation_handler_t invocations[ObjType_Num][CAP_MAX_CMD] = {
1126     [ObjType_Dispatcher] = {
1127         [DispatcherCmd_Setup] = handle_dispatcher_setup,
1128         [DispatcherCmd_Properties] = handle_dispatcher_properties,
1129 #ifndef __k1om__
1130         [DispatcherCmd_SetupGuest] = handle_dispatcher_setup_guest,
1131 #endif
1132         [DispatcherCmd_DumpPTables]  = dispatcher_dump_ptables,
1133         [DispatcherCmd_DumpCapabilities] = dispatcher_dump_capabilities,
1134         [DispatcherCmd_Vmread] = handle_vmread,
1135         [DispatcherCmd_Vmwrite] = handle_vmwrite,
1136         [DispatcherCmd_Vmptrld] = handle_vmptrld,
1137         [DispatcherCmd_Vmclear] = handle_vmclear,
1138     },
1139     [ObjType_KernelControlBlock] = {
1140         [FrameCmd_Identify] = handle_kcb_identify,
1141     },
1142     [ObjType_RAM] = {
1143         [RAMCmd_Identify] = handle_frame_identify,
1144     },
1145     [ObjType_Frame] = {
1146         [FrameCmd_Identify] = handle_frame_identify,
1147     },
1148     [ObjType_DevFrame] = {
1149         [FrameCmd_Identify] = handle_frame_identify,
1150     },
1151     [ObjType_CNode] = {
1152         [CNodeCmd_Copy]   = handle_cnode_cmd_obsolete,
1153         [CNodeCmd_Mint]   = handle_cnode_cmd_obsolete,
1154         [CNodeCmd_Retype] = handle_cnode_cmd_obsolete,
1155         [CNodeCmd_Create] = handle_cnode_cmd_obsolete,
1156         [CNodeCmd_Delete] = handle_cnode_cmd_obsolete,
1157         [CNodeCmd_Revoke] = handle_cnode_cmd_obsolete,
1158         [CNodeCmd_GetState] = handle_cnode_cmd_obsolete,
1159         [CNodeCmd_Resize] = handle_cnode_cmd_obsolete,
1160
1161     },
1162     [ObjType_L1CNode] = {
1163         [CNodeCmd_Copy]   = handle_copy,
1164         [CNodeCmd_Mint]   = handle_mint,
1165         [CNodeCmd_Retype] = handle_retype,
1166         [CNodeCmd_Create] = handle_create,
1167         [CNodeCmd_Delete] = handle_delete,
1168         [CNodeCmd_Revoke] = handle_revoke,
1169         [CNodeCmd_GetState] = handle_get_state,
1170         [CNodeCmd_Resize] = handle_resize,
1171     },
1172     [ObjType_L2CNode] = {
1173         [CNodeCmd_Copy]   = handle_copy,
1174         [CNodeCmd_Mint]   = handle_mint,
1175         [CNodeCmd_Retype] = handle_retype,
1176         [CNodeCmd_Create] = handle_create,
1177         [CNodeCmd_Delete] = handle_delete,
1178         [CNodeCmd_Revoke] = handle_revoke,
1179         [CNodeCmd_GetState] = handle_get_state,
1180         [CNodeCmd_Resize] = handle_resize,
1181     },
1182     [ObjType_VNode_x86_64_pml4] = {
1183         [VNodeCmd_Identify] = handle_vnode_identify,
1184         [VNodeCmd_Map]   = handle_map,
1185         [VNodeCmd_Unmap] = handle_unmap,
1186     },
1187     [ObjType_VNode_x86_64_pdpt] = {
1188         [VNodeCmd_Identify] = handle_vnode_identify,
1189         [VNodeCmd_Map]   = handle_map,
1190         [VNodeCmd_Unmap] = handle_unmap,
1191     },
1192     [ObjType_VNode_x86_64_pdir] = {
1193         [VNodeCmd_Identify] = handle_vnode_identify,
1194         [VNodeCmd_Map]   = handle_map,
1195         [VNodeCmd_Unmap] = handle_unmap,
1196     },
1197     [ObjType_VNode_x86_64_ptable] = {
1198         [VNodeCmd_Identify] = handle_vnode_identify,
1199         [VNodeCmd_Map]   = handle_map,
1200         [VNodeCmd_Unmap] = handle_unmap,
1201     },
1202     [ObjType_Frame_Mapping] = {
1203         [MappingCmd_Destroy] = handle_mapping_destroy,
1204         [MappingCmd_Modify] = handle_mapping_modify,
1205     },
1206     [ObjType_DevFrame_Mapping] = {
1207         [MappingCmd_Destroy] = handle_mapping_destroy,
1208         [MappingCmd_Modify] = handle_mapping_modify,
1209     },
1210     [ObjType_VNode_x86_64_pml4_Mapping] = {
1211         [MappingCmd_Destroy] = handle_mapping_destroy,
1212         [MappingCmd_Modify] = handle_mapping_modify,
1213     },
1214     [ObjType_VNode_x86_64_pdpt_Mapping] = {
1215         [MappingCmd_Destroy] = handle_mapping_destroy,
1216         [MappingCmd_Modify] = handle_mapping_modify,
1217     },
1218     [ObjType_VNode_x86_64_pdir_Mapping] = {
1219         [MappingCmd_Destroy] = handle_mapping_destroy,
1220         [MappingCmd_Modify] = handle_mapping_modify,
1221     },
1222     [ObjType_VNode_x86_64_ptable_Mapping] = {
1223         [MappingCmd_Destroy] = handle_mapping_destroy,
1224         [MappingCmd_Modify] = handle_mapping_modify,
1225     },
1226     [ObjType_Kernel] = {
1227         [KernelCmd_Get_core_id]  = monitor_get_core_id,
1228         [KernelCmd_Get_arch_id]  = monitor_get_arch_id,
1229         [KernelCmd_Identify_cap] = monitor_identify_cap,
1230         [KernelCmd_Identify_domains_cap] = monitor_identify_domains_cap,
1231         [KernelCmd_Remote_relations] = monitor_remote_relations,
1232         [KernelCmd_Cap_has_relations] = monitor_cap_has_relations,
1233         [KernelCmd_Create_cap]   = monitor_create_cap,
1234         [KernelCmd_Copy_existing] = monitor_copy_existing,
1235         [KernelCmd_Nullify_cap]  = monitor_nullify_cap,
1236         [KernelCmd_Setup_trace]  = handle_trace_setup,
1237         [KernelCmd_Register]     = monitor_handle_register,
1238         [KernelCmd_Domain_Id]    = monitor_handle_domain_id,
1239         [KernelCmd_Get_cap_owner] = monitor_get_cap_owner,
1240         [KernelCmd_Set_cap_owner] = monitor_set_cap_owner,
1241         [KernelCmd_Lock_cap]     = monitor_lock_cap,
1242         [KernelCmd_Unlock_cap]   = monitor_unlock_cap,
1243         [KernelCmd_Retype]       = monitor_handle_retype,
1244         [KernelCmd_Has_descendants] = monitor_handle_has_descendants,
1245         [KernelCmd_Delete_last]  = monitor_handle_delete_last,
1246         [KernelCmd_Delete_foreigns] = monitor_handle_delete_foreigns,
1247         [KernelCmd_Revoke_mark_target] = monitor_handle_revoke_mark_tgt,
1248         [KernelCmd_Revoke_mark_relations] = monitor_handle_revoke_mark_rels,
1249         [KernelCmd_Delete_step] = monitor_handle_delete_step,
1250         [KernelCmd_Clear_step] = monitor_handle_clear_step,
1251         [KernelCmd_Sync_timer]   = monitor_handle_sync_timer,
1252         [KernelCmd_IPI_Register] = kernel_ipi_register,
1253         [KernelCmd_IPI_Delete]   = kernel_ipi_delete,
1254         [KernelCmd_GetGlobalPhys] = kernel_get_global_phys,
1255         [KernelCmd_Add_kcb]      = kernel_add_kcb,
1256         [KernelCmd_Remove_kcb]   = kernel_remove_kcb,
1257         [KernelCmd_Suspend_kcb_sched]   = kernel_suspend_kcb_sched,
1258         [KernelCmd_Get_platform] = monitor_get_platform,
1259     },
1260     [ObjType_IPI] = {
1261         [IPICmd_Send_Start] = kernel_send_start_ipi,
1262         [IPICmd_Send_Init] = kernel_send_init_ipi,
1263     },
1264         [ObjType_IRQDest] = {
1265         [IRQDestCmd_Connect] = handle_irqdest_connect,
1266         [IRQDestCmd_GetVector] = handle_irqdest_get_vector
1267         },
1268         [ObjType_IRQSrc] = {
1269         [IRQSrcCmd_GetVector] = handle_irqsrc_get_vector,
1270         },
1271     [ObjType_IRQTable] = {
1272         [IRQTableCmd_Alloc] = handle_irq_table_alloc,
1273         [IRQTableCmd_AllocDestCap] = handle_irq_table_alloc_dest_cap,
1274         [IRQTableCmd_Set] = handle_irq_table_set,
1275         [IRQTableCmd_Delete] = handle_irq_table_delete
1276     },
1277     [ObjType_IO] = {
1278         [IOCmd_Outb] = handle_io,
1279         [IOCmd_Outw] = handle_io,
1280         [IOCmd_Outd] = handle_io,
1281         [IOCmd_Inb] = handle_io,
1282         [IOCmd_Inw] = handle_io,
1283         [IOCmd_Ind] = handle_io
1284     },
1285     [ObjType_Notify_IPI] = {
1286         [NotifyCmd_Send] = handle_ipi_notify_send
1287     },
1288     [ObjType_PerfMon] = {
1289         [PerfmonCmd_Activate] = performance_counter_activate,
1290         [PerfmonCmd_Deactivate] = performance_counter_deactivate,
1291         [PerfmonCmd_Write] = performance_counter_write,
1292     },
1293     [ObjType_ID] = {
1294         [IDCmd_Identify] = handle_idcap_identify,
1295     }
1296 };
1297
1298 /* syscall C entry point; called only from entry.S so no prototype in header */
1299 struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1300                           uint64_t *args, uint64_t rflags, uint64_t rip);
1301 struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1302                           uint64_t *args, uint64_t rflags, uint64_t rip)
1303 {
1304     struct sysret retval = { .error = SYS_ERR_OK, .value = 0 };
1305
1306     // XXX
1307     // Set dcb_current->disabled correctly.  This should really be
1308     // done in entry.S
1309     // XXX
1310     assert(dcb_current != NULL);
1311     if (dispatcher_is_disabled_ip(dcb_current->disp, rip)) {
1312         dcb_current->disabled = true;
1313     } else {
1314         dcb_current->disabled = false;
1315     }
1316     assert(get_dispatcher_shared_generic(dcb_current->disp)->disabled ==
1317             dcb_current->disabled);
1318
1319     switch(syscall) {
1320     case SYSCALL_INVOKE: /* Handle capability invocation */
1321     {
1322         // unpack "header" word
1323         capaddr_t invoke_cptr = arg0 >> 32;
1324         uint8_t send_level = arg0 >> 24;
1325         uint8_t invoke_level = arg0 >> 16;
1326         uint8_t length_words = arg0 >> 8;
1327         uint8_t flags = arg0;
1328
1329         debug(SUBSYS_SYSCALL, "sys_invoke(0x%x(%d), 0x%lx)\n",
1330               invoke_cptr, invoke_level, arg1);
1331         //printk(LOG_NOTE, "sys_invoke(0x%x(%d), 0x%lx)\n",
1332         //      invoke_cptr, invoke_level, arg1);
1333
1334         // Capability to invoke
1335         struct capability *to = NULL;
1336         retval.error = caps_lookup_cap_2(&dcb_current->cspace.cap, invoke_cptr,
1337                                          invoke_level, &to, CAPRIGHTS_READ);
1338         if (err_is_fail(retval.error)) {
1339             break;
1340         }
1341
1342         assert(to != NULL);
1343         assert(to->type < ObjType_Num);
1344
1345         // Endpoint cap, do LMP
1346         if (to->type == ObjType_EndPoint) {
1347             struct dcb *listener = to->u.endpoint.listener;
1348             assert(listener != NULL);
1349
1350             if (listener->disp == 0) {
1351                 retval.error = SYS_ERR_LMP_NO_TARGET;
1352                 break;
1353             }
1354
1355             /* limit length of message from buggy/malicious sender */
1356             length_words = MIN(length_words, LMP_MSG_LENGTH);
1357
1358             // does the sender want to yield their timeslice on success?
1359             bool sync = flags & LMP_FLAG_SYNC;
1360             // does the sender want to yield to the target if undeliverable?
1361             bool yield = flags & LMP_FLAG_YIELD;
1362             // is the cap (if present) to be deleted on send?
1363             bool give_away = flags & LMP_FLAG_GIVEAWAY;
1364
1365             // try to deliver message
1366             retval.error = lmp_deliver(to, dcb_current, args, length_words,
1367                                        arg1, send_level, give_away);
1368
1369             /* Switch to reciever upon successful delivery with sync flag,
1370              * or (some cases of) unsuccessful delivery with yield flag */
1371             enum err_code err_code = err_no(retval.error);
1372             if ((sync && err_is_ok(retval.error)) ||
1373                 (yield && (err_code == SYS_ERR_LMP_BUF_OVERFLOW
1374                            || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_LOOKUP
1375                            || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_INVALID
1376                            || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_SLOT_OCCUPIED))
1377                     ) {
1378                 if (err_is_fail(retval.error)) {
1379                     struct dispatcher_shared_generic *current_disp =
1380                         get_dispatcher_shared_generic(dcb_current->disp);
1381                     struct dispatcher_shared_generic *listener_disp =
1382                         get_dispatcher_shared_generic(listener->disp);
1383                     debug(SUBSYS_DISPATCH, "LMP failed; %.*s yields to %.*s: %u\n",
1384                           DISP_NAME_LEN, current_disp->name,
1385                           DISP_NAME_LEN, listener_disp->name, err_code);
1386                 }
1387
1388                 // special-case context switch: ensure correct state in current DCB
1389                 dispatcher_handle_t handle = dcb_current->disp;
1390                 struct dispatcher_shared_x86_64 *disp =
1391                     get_dispatcher_shared_x86_64(handle);
1392                 dcb_current->disabled = dispatcher_is_disabled_ip(handle, rip);
1393                 struct registers_x86_64 *save_area;
1394                 if (dcb_current->disabled) {
1395                     save_area = &disp->disabled_save_area;
1396                 } else {
1397                     save_area = &disp->enabled_save_area;
1398                 }
1399
1400                 // Should be enabled. Else, how do we do an invocation??
1401                 if(dcb_current->disabled) {
1402                   panic("Dispatcher needs to be enabled for this invocation");
1403                 }
1404
1405                 // save calling dispatcher's registers, so that when the dispatcher
1406                 // next runs, it has a valid state in the relevant save area.
1407                 // Save RIP, RFLAGS, RSP and set RAX (return value) for later resume
1408                 save_area->rax = retval.error; // XXX: x86 1st return register
1409                 save_area->rip = rip;
1410                 save_area->eflags = rflags;
1411                 save_area->rsp = user_stack_save;
1412
1413                 if(!dcb_current->is_vm_guest) {
1414                   /* save and zero FS/GS selectors (they're unmodified by the syscall path) */
1415                   __asm ("mov     %%fs, %[fs]     \n\t"
1416                          "mov     %%gs, %[gs]     \n\t"
1417                          "mov     %[zero], %%fs   \n\t"
1418                          "mov     %[zero], %%gs   \n\t"
1419                          : /* No output */
1420                          :
1421                          [fs] "m" (save_area->fs),
1422                          [gs] "m" (save_area->gs),
1423                          [zero] "r" (0)
1424                          );
1425                 } else {
1426 #ifndef __k1om__
1427 #ifdef CONFIG_SVM
1428                   lpaddr_t lpaddr = gen_phys_to_local_phys(dcb_current->guest_desc.vmcb.cap.u.frame.base);
1429                   amd_vmcb_t vmcb;
1430                   amd_vmcb_initialize(&vmcb, (void *)local_phys_to_mem(lpaddr));
1431                   save_area->fs = amd_vmcb_fs_selector_rd(&vmcb);
1432                   save_area->gs = amd_vmcb_gs_selector_rd(&vmcb);
1433 #else
1434                   errval_t err;
1435                   err = vmread(VMX_GUEST_FS_SEL, (uint64_t *)&save_area->fs);
1436                   err += vmread(VMX_GUEST_GS_SEL, (uint64_t *)&save_area->gs);
1437                   assert(err_is_ok(err));
1438 #endif
1439 #else
1440           panic("VM Guests not supported on Xeon Phi");
1441 #endif
1442                 }
1443
1444                 dispatch(to->u.endpoint.listener);
1445                 panic("dispatch returned");
1446             }
1447         } else { // not endpoint cap, call kernel handler through dispatch table
1448             // printk(LOG_NOTE, "sys_invoke: to->type = %d, cmd = %"PRIu64"\n",
1449             //         to->type, args[0]);
1450
1451             uint64_t cmd = args[0];
1452             if (cmd >= CAP_MAX_CMD) {
1453                 retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1454                 break;
1455             }
1456
1457             // Call the invocation
1458             invocation_handler_t invocation = invocations[to->type][cmd];
1459             if(invocation == NULL) {
1460                 printk(LOG_WARN, "invocation not found. type: %"PRIu32", cmd: %"PRIu64"\n",
1461                               to->type, cmd);
1462                 retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1463             } else {
1464                 retval = invocation(to, cmd, &args[1]);
1465             }
1466         }
1467         break;
1468     }
1469
1470         // Yield the CPU to the next dispatcher
1471     case SYSCALL_YIELD:
1472         TRACE(KERNEL, SC_YIELD, 0);
1473         retval = sys_yield((capaddr_t)arg0);
1474         TRACE(KERNEL, SC_YIELD, 1);
1475         break;
1476
1477         // NOP system call for benchmarking purposes
1478     case SYSCALL_NOP:
1479         break;
1480
1481         // Debug print system call
1482     case SYSCALL_PRINT:
1483         TRACE(KERNEL, SC_PRINT, 0);
1484         retval.error = sys_print((char *)arg0, arg1);
1485         TRACE(KERNEL, SC_PRINT, 1);
1486         break;
1487
1488         // Reboot!
1489         // FIXME: this should be a kernel cap invocation or similarly restricted
1490     case SYSCALL_REBOOT:
1491         reboot();
1492         break;
1493
1494     case SYSCALL_X86_FPU_TRAP_ON:
1495         fpu_trap_on();
1496         break;
1497
1498     case SYSCALL_X86_RELOAD_LDT:
1499         maybe_reload_ldt(dcb_current, true);
1500         break;
1501
1502         // Temporarily suspend the CPU
1503     case SYSCALL_SUSPEND:
1504         TRACE(KERNEL, SC_SUSPEND, 0);
1505         retval = sys_suspend((bool)arg0);
1506         TRACE(KERNEL, SC_SUSPEND, 1);
1507         break;
1508
1509     case SYSCALL_GET_ABS_TIME:
1510         retval = sys_get_absolute_time();
1511         break;
1512
1513     case SYSCALL_DEBUG:
1514         switch(arg0) {
1515         case DEBUG_CONTEXT_COUNTER_RESET:
1516             dispatch_csc_reset();
1517             break;
1518
1519         case DEBUG_CONTEXT_COUNTER_READ:
1520             retval.value = dispatch_get_csc();
1521             break;
1522
1523         case DEBUG_TIMESLICE_COUNTER_READ:
1524             retval.value = kernel_now;
1525             break;
1526
1527         case DEBUG_FLUSH_CACHE:
1528             wbinvd();
1529             break;
1530
1531         case DEBUG_SEND_IPI:
1532             apic_send_std_ipi(arg1, args[0], args[1]);
1533             break;
1534
1535         case DEBUG_SET_BREAKPOINT:
1536             debugregs_set_breakpoint(arg1, args[0], args[1]);
1537             break;
1538
1539         case DEBUG_GET_TSC_PER_MS:
1540             retval.value = timing_get_tsc_per_ms();
1541             break;
1542
1543         case DEBUG_GET_APIC_TIMER:
1544             retval.value = apic_timer_get_count();
1545             break;
1546
1547         case DEBUG_GET_APIC_TICKS_PER_SEC:
1548             retval.value = timing_get_apic_ticks_per_sec();
1549             break;
1550
1551         case DEBUG_TRACE_PMEM_CTRL:
1552 #ifdef TRACE_PMEM_CAPS
1553             if (arg1) {
1554                 caps_trace_ctrl(arg1, args[0], args[1]);
1555             } else {
1556                 caps_trace_ctrl(arg1, 0, 0);
1557             }
1558 #endif
1559             retval.value = 0;
1560             retval.error = SYS_ERR_OK;
1561             break;
1562
1563
1564         case DEBUG_GET_APIC_ID:
1565             retval.value = apic_get_id();
1566             break;
1567
1568         case DEBUG_CREATE_IRQ_SRC_CAP:
1569             retval.error = irq_debug_create_src_cap(arg1, args[0], args[1], args[2]);
1570             break;
1571
1572         default:
1573             printk(LOG_ERR, "invalid sys_debug msg type\n");
1574         }
1575         break;
1576
1577     default:
1578         printk(LOG_ERR, "sys_syscall: Illegal system call! "
1579                "(0x%lx, 0x%lx, 0x%lx)\n", syscall, arg0, arg1);
1580         retval.error = SYS_ERR_ILLEGAL_SYSCALL;
1581         break;
1582     }
1583
1584     // If dcb_current got removed, dispatch someone else
1585     if (dcb_current == NULL) {
1586         assert(err_is_ok(retval.error));
1587         dispatch(schedule());
1588     }
1589
1590     if (syscall == SYSCALL_INVOKE) {
1591         debug(SUBSYS_SYSCALL, "invoke returning 0x%lx 0x%lx\n",
1592               retval.error, retval.value);
1593     }
1594
1595     return retval;
1596 }