Fixup of some headers.
[barrelfish] / kernel / arch / x86_64 / syscall.c
1 /**
2  * \file
3  * \brief System calls implementation.
4  */
5
6 /*
7  * Copyright (c) 2007, 2008, 2009, 2010, 2012, ETH Zurich.
8  * All rights reserved.
9  *
10  * This file is distributed under the terms in the attached LICENSE file.
11  * If you do not find this file, copies can be found by writing to:
12  * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13  */
14
15 #include <kernel.h>
16 #include <kcb.h>
17 #include <sys_debug.h>
18 #include <syscall.h>
19 #include <barrelfish_kpi/syscalls.h>
20 #include <mdb/mdb.h>
21 #include <mdb/mdb_tree.h>
22 #include <dispatch.h>
23 #include <paging_kernel_arch.h>
24 #include <paging_generic.h>
25 #include <exec.h>
26 #include <fpu.h>
27 #include <systime.h>
28 #include <arch/x86/x86.h>
29 #include <arch/x86/apic.h>
30 #include <arch/x86/global.h>
31 #include <arch/x86/perfmon.h>
32 #include <arch/x86/debugregs.h>
33 #include <arch/x86/syscall.h>
34 #include <arch/x86/timing.h>
35 #include <arch/x86/ipi_notify.h>
36 #include <barrelfish_kpi/sys_debug.h>
37 #include <barrelfish_kpi/lmp.h>
38 #include <barrelfish_kpi/dispatcher_shared_target.h>
39 #include <barrelfish_kpi/platform.h>
40 #include <trace/trace.h>
41 #include <useraccess.h>
42 #ifndef __k1om__
43 #include <vmkit.h>
44 #include <dev/amd_vmcb_dev.h>
45 #endif
46
47 extern uint64_t user_stack_save;
48
49 /* FIXME: lots of missing argument checks in this function */
50 static struct sysret handle_dispatcher_setup(struct capability *to,
51                                              int cmd, uintptr_t *args)
52 {
53     capaddr_t cptr      = args[0];
54     uint8_t   level     = args[1];
55     capaddr_t vptr      = args[2] & 0xffffffff;
56     capaddr_t dptr      = args[3] & 0xffffffff;
57     bool      run       = args[4];
58     capaddr_t odptr     = args[5] & 0xffffffff;
59
60     TRACE(KERNEL, SC_DISP_SETUP, 0);
61     struct sysret sr = sys_dispatcher_setup(to, cptr, level, vptr, dptr, run, odptr);
62     TRACE(KERNEL, SC_DISP_SETUP, 1);
63     return sr;
64 }
65
66 static struct sysret handle_dispatcher_properties(struct capability *to,
67                                                   int cmd, uintptr_t *args)
68 {
69     enum task_type type = args[0];
70     unsigned long deadline = args[1];
71     unsigned long wcet = args[2];
72     unsigned long period = args[3];
73     unsigned long release = args[4];
74     unsigned short weight = args[5];
75
76     TRACE(KERNEL, SC_DISP_PROPS, 0);
77     struct sysret sr = sys_dispatcher_properties(to, type, deadline, wcet, period,
78                                                  release, weight);
79     TRACE(KERNEL, SC_DISP_PROPS, 1);
80     return sr;
81 }
82
83 static struct sysret handle_retype_common(struct capability *root,
84                                           uintptr_t *args,
85                                           bool from_monitor)
86 {
87     capaddr_t source_croot    = args[0] >> 32;
88     capaddr_t source_cptr     = args[0] & 0xffffffff;
89     uint64_t offset           = args[1];
90     uint64_t type             = args[2];
91     uint64_t objsize          = args[3];
92     uint64_t objcount         = args[4];
93     capaddr_t dest_cspace_cptr= args[5] >> 32;
94     capaddr_t dest_cnode_cptr = args[5] & 0xffffffff;
95     uint64_t dest_slot        = args[6];
96     uint64_t dest_cnode_level = args[7];
97
98     TRACE(KERNEL, SC_RETYPE, 0);
99     struct sysret sr = sys_retype(root, source_croot, source_cptr, offset, type,
100                                   objsize, objcount, dest_cspace_cptr,
101                                   dest_cnode_cptr, dest_cnode_level,
102                                   dest_slot, from_monitor);
103     TRACE(KERNEL, SC_RETYPE, 1);
104     return sr;
105 }
106
107 static struct sysret handle_retype(struct capability *root,
108                                    int cmd, uintptr_t *args)
109 {
110         return handle_retype_common(root, args, false);
111 }
112
113 static struct sysret handle_create(struct capability *root,
114                                    int cmd, uintptr_t *args)
115 {
116     /* Retrieve arguments */
117     enum objtype type         = args[0];
118     size_t objsize            = args[1];
119     capaddr_t dest_cnode_cptr = args[2];
120     uint8_t dest_level        = args[3];
121     cslot_t dest_slot         = args[4];
122
123     TRACE(KERNEL, SC_CREATE, 0);
124     struct sysret sr = sys_create(root, type, objsize, dest_cnode_cptr,
125                                   dest_level, dest_slot);
126     TRACE(KERNEL, SC_CREATE, 1);
127     return sr;
128 }
129
130
131 static struct sysret handle_map(struct capability *ptable,
132                                 int cmd, uintptr_t *args)
133 {
134     /* Retrieve arguments */
135     uint64_t  slot            = args[0];
136     capaddr_t source_root_cptr= args[1] >> 32;
137     capaddr_t source_cptr     = args[1] & 0xffffffff;
138     uint8_t   source_level    = args[2];
139     uint64_t  flags           = args[3];
140     uint64_t  offset          = args[4];
141     uint64_t  pte_count       = args[5];
142     capaddr_t mapping_croot   = args[6] >> 32;
143     capaddr_t mapping_cnptr   = args[6] & 0xffffffff;
144     uint8_t   mapping_cn_level= args[7];
145     cslot_t   mapping_slot    = args[8];
146
147     TRACE(KERNEL, SC_MAP, 0);
148     struct sysret sr = sys_map(ptable, slot, source_root_cptr, source_cptr,
149                                source_level, flags, offset, pte_count,
150                                mapping_croot, mapping_cnptr,
151                                mapping_cn_level, mapping_slot);
152     TRACE(KERNEL, SC_MAP, 1);
153     return sr;
154 }
155
156 /**
157  * Common code for copying and minting except the mint flag and param passing
158  */
159 static struct sysret copy_or_mint(struct capability *root,
160                                   uintptr_t *args, bool mint)
161 {
162     /* Retrieve arguments */
163     capaddr_t dest_cspace_cptr = args[0];
164     capaddr_t destcn_cptr      = args[1];
165     uint64_t  dest_slot        = args[2];
166     capaddr_t source_croot_ptr = args[3];
167     capaddr_t source_cptr      = args[4];
168     uint8_t destcn_level       = args[5];
169     uint8_t source_level       = args[6];
170     uint64_t param1, param2;
171     // params only sent if mint operation
172     if (mint) {
173         param1 = args[7];
174         param2 = args[8];
175     } else {
176         param1 = param2 = 0;
177     }
178
179     TRACE(KERNEL, SC_COPY_OR_MINT, 0);
180     struct sysret sr = sys_copy_or_mint(root, dest_cspace_cptr, destcn_cptr, dest_slot,
181                                         source_croot_ptr, source_cptr,
182                                         destcn_level, source_level,
183                                         param1, param2, mint);
184     TRACE(KERNEL, SC_COPY_OR_MINT, 1);
185     return sr;
186 }
187
188 static struct sysret handle_mint(struct capability *root,
189                                  int cmd, uintptr_t *args)
190 {
191     return copy_or_mint(root, args, true);
192 }
193
194 static struct sysret handle_copy(struct capability *root,
195                                  int cmd, uintptr_t *args)
196 {
197     return copy_or_mint(root, args, false);
198 }
199
200 static struct sysret handle_delete(struct capability *root,
201                                    int cmd, uintptr_t *args)
202 {
203     capaddr_t cptr = args[0];
204     uint8_t level  = args[1];
205     return sys_delete(root, cptr, level);
206 }
207
208 static struct sysret handle_revoke(struct capability *root,
209                                    int cmd, uintptr_t *args)
210 {
211     capaddr_t cptr = args[0];
212     uint8_t level  = args[1];
213     return sys_revoke(root, cptr, level);
214 }
215
216 static struct sysret handle_get_state(struct capability *root,
217                                       int cmd, uintptr_t *args)
218 {
219     capaddr_t cptr = args[0];
220     uint8_t level  = args[1];
221     return sys_get_state(root, cptr, level);
222 }
223
224 static struct sysret handle_get_size(struct capability *root,
225                                       int cmd, uintptr_t *args)
226 {
227     return sys_get_size_l1cnode(root);
228 }
229
230 static struct sysret handle_resize(struct capability *root,
231                                    int cmd, uintptr_t *args)
232 {
233     capaddr_t newroot_ptr = args[0];
234     capaddr_t retcn_ptr   = args[1];
235     cslot_t   retslot     = args[2];
236     return sys_resize_l1cnode(root, newroot_ptr, retcn_ptr, retslot);
237 }
238
239 static struct sysret handle_unmap(struct capability *pgtable,
240                                   int cmd, uintptr_t *args)
241 {
242     capaddr_t cptr = args[0];
243     uint8_t level  = args[1];
244
245     errval_t err;
246     struct cte *mapping;
247     err = caps_lookup_slot(&dcb_current->cspace.cap, cptr, level,
248                            &mapping, CAPRIGHTS_READ_WRITE);
249     if (err_is_fail(err)) {
250         return SYSRET(err_push(err, SYS_ERR_CAP_NOT_FOUND));
251     }
252
253     TRACE(KERNEL, SC_UNMAP, 0);
254     err = page_mappings_unmap(pgtable, mapping);
255     TRACE(KERNEL, SC_UNMAP, 1);
256     return SYSRET(err);
257 }
258
259 static struct sysret handle_mapping_destroy(struct capability *mapping,
260                                             int cmd, uintptr_t *args)
261 {
262     panic("NYI!");
263     return SYSRET(SYS_ERR_OK);
264 }
265
266 static struct sysret handle_mapping_modify(struct capability *mapping,
267                                            int cmd, uintptr_t *args)
268 {
269     // Modify flags of (part of) mapped region of frame
270     assert(type_is_mapping(mapping->type));
271
272     // unpack arguments
273     size_t offset = args[0]; // in pages; of first page to modify from first
274                              // page in mapped region
275     size_t pages  = args[1]; // #pages to modify
276     size_t flags  = args[2]; // new flags
277     genvaddr_t va = args[3]; // virtual addr hint
278
279     errval_t err = page_mappings_modify_flags(mapping, offset, pages, flags, va);
280
281     return (struct sysret) {
282         .error = err,
283         .value = 0,
284     };
285 }
286
287 /// Different handler for cap operations performed by the monitor
288 static struct sysret monitor_handle_retype(struct capability *kernel_cap,
289                                            int cmd, uintptr_t *args)
290 {
291     return handle_retype_common(&dcb_current->cspace.cap, args, true);
292 }
293
294 static struct sysret monitor_handle_has_descendants(struct capability *kernel_cap,
295                                                     int cmd, uintptr_t *args)
296 {
297     struct capability *src = (struct capability *)args;
298
299     struct cte *next = mdb_find_greater(src, false);
300
301     return (struct sysret) {
302         .error = SYS_ERR_OK,
303         .value = (next && is_ancestor(&next->cap, src)),
304     };
305 }
306
307 static struct sysret monitor_handle_is_retypeable(struct capability *kernel_cap,
308                                                   int cmd, uintptr_t *args)
309 {
310     struct capability *src = (struct capability*)args;
311     int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
312
313     uintptr_t offset  = args[pos];
314     uintptr_t objsize = args[pos + 1];
315     uintptr_t count   = args[pos + 2];
316
317     return sys_monitor_is_retypeable(src, offset, objsize, count);
318 }
319
320 static struct sysret monitor_handle_delete_last(struct capability *kernel_cap,
321                                                 int cmd, uintptr_t *args)
322 {
323     capaddr_t root_caddr   = args[0];
324     uint8_t root_level     = args[1];
325     capaddr_t target_caddr = args[2];
326     uint8_t target_level   = args[3];
327     capaddr_t retcn_caddr  = args[4];
328     uint8_t retcn_level    = args[5];
329     cslot_t ret_slot       = args[6];
330
331     return sys_monitor_delete_last(root_caddr, root_level, target_caddr,
332                                    target_level, retcn_caddr, retcn_level,
333                                    ret_slot);
334 }
335
336 static struct sysret monitor_handle_delete_foreigns(struct capability *kernel_cap,
337                                                     int cmd, uintptr_t *args)
338 {
339     capaddr_t caddr = args[0];
340     uint8_t level   = args[1];
341     return sys_monitor_delete_foreigns(caddr, level);
342 }
343
344 static struct sysret monitor_handle_revoke_mark_tgt(struct capability *kernel_cap,
345                                                     int cmd, uintptr_t *args)
346 {
347     capaddr_t root_caddr   = args[0];
348     uint8_t root_level     = args[1];
349     capaddr_t target_caddr = args[2];
350     uint8_t target_level   = args[3];
351
352     return sys_monitor_revoke_mark_tgt(root_caddr, root_level,
353                                        target_caddr, target_level);
354 }
355
356 static struct sysret monitor_handle_revoke_mark_rels(struct capability *kernel_cap,
357                                                      int cmd, uintptr_t *args)
358 {
359     struct capability *base = (struct capability*)args;
360
361     return sys_monitor_revoke_mark_rels(base);
362 }
363
364 static struct sysret monitor_handle_delete_step(struct capability *kernel_cap,
365                                                 int cmd, uintptr_t *args)
366 {
367     capaddr_t ret_cn_addr  = args[0];
368     capaddr_t ret_cn_level = args[1];
369     capaddr_t ret_slot     = args[2];
370
371     return sys_monitor_delete_step(ret_cn_addr, ret_cn_level, ret_slot);
372 }
373
374 static struct sysret monitor_handle_clear_step(struct capability *kernel_cap,
375                                                int cmd, uintptr_t *args)
376 {
377     capaddr_t ret_cn_addr  = args[0];
378     capaddr_t ret_cn_level = args[1];
379     capaddr_t ret_slot     = args[2];
380
381     return sys_monitor_clear_step(ret_cn_addr, ret_cn_level, ret_slot);
382 }
383
384 static struct sysret monitor_handle_register(struct capability *kernel_cap,
385                                              int cmd, uintptr_t *args)
386 {
387     capaddr_t ep_caddr = args[0];
388
389     TRACE(KERNEL, SC_MONITOR_REGISTER, 0);
390     struct sysret sr = sys_monitor_register(ep_caddr);
391     TRACE(KERNEL, SC_MONITOR_REGISTER, 1);
392     return sr;
393 }
394
395 static struct sysret monitor_get_core_id(struct capability *kernel_cap,
396                                          int cmd, uintptr_t *args)
397 {
398     return (struct sysret){.error = SYS_ERR_OK, .value = my_core_id};
399 }
400
401 static struct sysret monitor_get_arch_id(struct capability *kernel_cap,
402                                          int cmd, uintptr_t *args)
403 {
404     return (struct sysret){.error = SYS_ERR_OK, .value = apic_id};
405 }
406
407 static struct sysret monitor_identify_cap_common(struct capability *kernel_cap,
408                                                  struct capability *root,
409                                                  uintptr_t *args)
410 {
411     capaddr_t cptr = args[0];
412     uint8_t level  = args[1];
413
414     struct capability *retbuf = (void *)args[2];
415
416     return sys_monitor_identify_cap(root, cptr, level, retbuf);
417 }
418
419 static struct sysret monitor_identify_cap(struct capability *kernel_cap,
420                                           int cmd, uintptr_t *args)
421 {
422     return monitor_identify_cap_common(kernel_cap, &dcb_current->cspace.cap, args);
423 }
424
425 static struct sysret monitor_identify_domains_cap(struct capability *kernel_cap,
426                                                   int cmd, uintptr_t *args)
427 {
428     errval_t err;
429
430     capaddr_t root_caddr = args[0];
431     capaddr_t root_level = args[1];
432
433     struct capability *root;
434     err = caps_lookup_cap(&dcb_current->cspace.cap, root_caddr, root_level,
435                           &root, CAPRIGHTS_READ);
436
437     if (err_is_fail(err)) {
438         return SYSRET(err_push(err, SYS_ERR_ROOT_CAP_LOOKUP));
439     }
440
441     /* XXX: this hides the first two arguments */
442     return monitor_identify_cap_common(kernel_cap, root, &args[2]);
443 }
444
445 static struct sysret monitor_cap_has_relations(struct capability *kernel_cap,
446                                                int cmd, uintptr_t *args)
447 {
448     capaddr_t caddr = args[0];
449     uint8_t level   = args[1];
450     uint8_t mask    = args[2];
451
452     return sys_cap_has_relations(caddr, level, mask);
453 }
454
455 static struct sysret monitor_remote_relations(struct capability *kernel_cap,
456                                               int cmd, uintptr_t *args)
457 {
458     capaddr_t root_addr = args[0];
459     int root_level      = args[1];
460     capaddr_t cptr      = args[2];
461     int level           = args[3];
462     uint8_t relations   = args[4]        & 0xFF;
463     uint8_t mask        = (args[4] >> 8) & 0xFF;
464
465     return sys_monitor_remote_relations(root_addr, root_level, cptr, level,
466                                         relations, mask);
467 }
468
469
470 static struct sysret monitor_create_cap(struct capability *kernel_cap,
471                                         int cmd, uintptr_t *args)
472 {
473     /* XXX: Get the raw metadata of the capability to create */
474     struct capability *src = (struct capability *)args;
475     int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
476
477     /* Cannot create null caps */
478     if (src->type == ObjType_Null) {
479         return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
480     }
481
482     coreid_t owner = args[pos + 3];
483
484     /* For certain types, only foreign copies can be created here */
485     if ((src->type == ObjType_EndPoint || src->type == ObjType_Dispatcher
486          || src->type == ObjType_Kernel || src->type == ObjType_IRQTable)
487         && owner == my_core_id)
488     {
489         return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
490     }
491
492     /* Create the cap in the destination */
493     capaddr_t cnode_cptr = args[pos];
494     int cnode_level      = args[pos + 1];
495     size_t slot          = args[pos + 2];
496     assert(cnode_level <= 2);
497
498     return SYSRET(caps_create_from_existing(&dcb_current->cspace.cap,
499                                             cnode_cptr, cnode_level,
500                                             slot, owner, src));
501 }
502
503 static struct sysret monitor_copy_existing(struct capability *kernel_cap,
504                                         int cmd, uintptr_t *args)
505 {
506     /* XXX: Get the raw metadata of the capability to create */
507     struct capability *src = (struct capability *)args;
508     int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
509
510     capaddr_t croot_cptr = args[pos];
511     capaddr_t cnode_cptr = args[pos + 1];
512     int cnode_level      = args[pos + 2];
513     size_t slot          = args[pos + 3];
514
515     return sys_monitor_copy_existing(src, croot_cptr, cnode_cptr, cnode_level, slot);
516 }
517
518 static struct sysret monitor_nullify_cap(struct capability *kernel_cap,
519                                          int cmd, uintptr_t *args)
520 {
521     capaddr_t cptr = args[0];
522     uint8_t level  = args[1];
523
524     return sys_monitor_nullify_cap(cptr, level);
525 }
526
527 static struct sysret monitor_handle_sync_timer(struct capability *kern_cap,
528                                                int cmd, uintptr_t *args)
529 {
530     uint64_t synctime = args[0];
531     return sys_monitor_handle_sync_timer(synctime);
532 }
533
534 static struct sysret monitor_get_platform(struct capability *kern_cap,
535                                           int cmd, uintptr_t *args)
536 {
537     if (!access_ok(ACCESS_WRITE, args[0], sizeof(struct platform_info))) {
538         return SYSRET(SYS_ERR_INVALID_USER_BUFFER);
539     }
540     struct platform_info *pi = (struct platform_info *)args[0];
541     // x86: only have PC as platform
542     pi->arch = PI_ARCH_X86;
543     pi->platform = PI_PLATFORM_PC;
544     return SYSRET(SYS_ERR_OK);
545 }
546
547 static struct sysret handle_frame_identify(struct capability *to,
548                                            int cmd, uintptr_t *args)
549 {
550     // Return with physical base address of frame
551     assert(to->type == ObjType_Frame || to->type == ObjType_DevFrame ||
552            to->type == ObjType_RAM);
553     assert((get_address(to) & BASE_PAGE_MASK) == 0);
554
555     struct frame_identity *fi = (struct frame_identity *)args[0];
556
557     if (!access_ok(ACCESS_WRITE, (lvaddr_t)fi, sizeof(struct frame_identity))) {
558         return SYSRET(SYS_ERR_INVALID_USER_BUFFER);
559     }
560
561     fi->base = get_address(to);
562     fi->bytes = get_size(to);
563
564     return SYSRET(SYS_ERR_OK);
565 }
566
567 static struct sysret handle_vnode_identify(struct capability *to,
568                                            int cmd, uintptr_t *args)
569 {
570     // Return with physical base address of the VNode
571     assert(to->type == ObjType_VNode_x86_64_pml4 ||
572            to->type == ObjType_VNode_x86_64_pdpt ||
573            to->type == ObjType_VNode_x86_64_pdir ||
574            to->type == ObjType_VNode_x86_64_ptable);
575
576     genpaddr_t base_addr = get_address(to);
577     assert((base_addr & BASE_PAGE_MASK) == 0);
578
579     return (struct sysret) {
580         .error = SYS_ERR_OK,
581         .value = (genpaddr_t)base_addr | ((uint8_t)to->type),
582     };
583 }
584
585
586 static struct sysret handle_io(struct capability *to, int cmd, uintptr_t *args)
587 {
588     uint64_t    port = args[0];
589     uint64_t    data = args[1]; // ignored for input
590
591     return sys_io(to, cmd, port, data);
592 }
593
594 static struct sysret handle_vmread(struct capability *to,
595                                    int cmd, uintptr_t *args)
596 {
597 #if defined(__k1om__) || defined(CONFIG_SVM)
598     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
599 #else
600     errval_t err;
601     struct dcb *dcb = to->u.dispatcher.dcb;
602     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
603     if (vmcs_base != vmptrst()) {
604         err = SYS_ERR_VMKIT_VMX_VMFAIL_INVALID;
605     } else {
606         err = vmread(args[0], (lvaddr_t *)args[1]);
607     }
608     return SYSRET(err);
609 #endif
610 }
611
612 static struct sysret handle_vmwrite(struct capability *to,
613                                     int cmd, uintptr_t *args)
614 {
615 #if defined(__k1om__) || defined(CONFIG_SVM)
616     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
617 #else
618     errval_t err;
619     struct dcb *dcb = to->u.dispatcher.dcb;
620     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
621     if (vmcs_base != vmptrst()) {
622         err = SYS_ERR_VMKIT_VMX_VMFAIL_INVALID;
623     } else {
624         err = vmwrite(args[0], args[1]);
625     }
626     return SYSRET(err);
627 #endif
628 }
629
630 static struct sysret handle_vmptrld(struct capability *to,
631                                     int cmd, uintptr_t *args)
632 {
633 #if defined(__k1om__) || defined(CONFIG_SVM)
634     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
635 #else
636     errval_t err;
637     struct dcb *dcb = to->u.dispatcher.dcb;
638     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
639     err = vmptrld(vmcs_base);
640     return SYSRET(err);
641 #endif
642 }
643
644 static struct sysret handle_vmclear(struct capability *to,
645                                     int cmd, uintptr_t *args)
646 {
647 #if defined(__k1om__) || defined(CONFIG_SVM)
648     return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
649 #else
650     errval_t err;
651     struct dcb *dcb = to->u.dispatcher.dcb;
652     lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
653     err = vmclear(vmcs_base);
654     return SYSRET(err);
655 #endif
656 }
657
658 #ifndef __k1om__
659 static struct sysret
660 handle_dispatcher_setup_guest (struct capability *to, int cmd, uintptr_t *args)
661 {
662     errval_t err;
663     struct dcb *dcb = to->u.dispatcher.dcb;
664
665     capaddr_t epp = args[0];
666     capaddr_t vnodep = args[1];
667     capaddr_t vmcbp = args[2];
668     capaddr_t ctrlp = args[3];
669
670     // 0. Enable VM extensions
671     err = vmkit_enable_virtualization();
672     if (err != SYS_ERR_OK) {
673         return SYSRET(err);
674     }
675
676     // 1. Check arguments
677     // Monitor endpoint for exits of this geust
678     struct cte *ep_cte;
679
680     err = caps_lookup_slot(&dcb_current->cspace.cap, epp, 2,
681                            &ep_cte, CAPRIGHTS_READ_WRITE);
682     if (err_is_fail(err)) {
683         return SYSRET(err);
684     }
685     if (ep_cte->cap.type != ObjType_EndPoint) {
686         return SYSRET(SYS_ERR_VMKIT_ENDPOINT_INVALID);
687     }
688     err = caps_copy_to_cte(&dcb->guest_desc.monitor_ep, ep_cte, false, 0, 0);
689     if (err_is_fail(err)) {
690         return SYSRET(err_push(err, SYS_ERR_VMKIT_ENDPOINT));
691     }
692
693     // Domain vspace
694     struct capability *vnode_cap;
695     err = caps_lookup_cap(&dcb_current->cspace.cap, vnodep, 2,
696                           &vnode_cap, CAPRIGHTS_WRITE);
697     if (err_is_fail(err)) {
698         return SYSRET(err);
699     }
700     if (vnode_cap->type != ObjType_VNode_x86_64_pml4) {
701         return SYSRET(SYS_ERR_DISP_VSPACE_INVALID);
702     }
703
704     assert(vnode_cap->type == ObjType_VNode_x86_64_pml4);
705
706     // VMCB
707     struct cte *vmcb_cte;
708     err = caps_lookup_slot(&dcb_current->cspace.cap, vmcbp, 2,
709                            &vmcb_cte, CAPRIGHTS_READ_WRITE);
710     if (err_is_fail(err)) {
711         return SYSRET(err);
712     }
713     if (vmcb_cte->cap.type != ObjType_Frame ||
714         vmcb_cte->cap.u.frame.bytes < BASE_PAGE_SIZE) {
715         return SYSRET(SYS_ERR_VMKIT_VMCB_INVALID);
716     }
717     err = caps_copy_to_cte(&dcb->guest_desc.vmcb, vmcb_cte, false, 0, 0);
718     if (err_is_fail(err)) {
719         return SYSRET(err_push(err, SYS_ERR_VMKIT_VMCB));
720     }
721
722     // guest control
723     struct cte *ctrl_cte;
724     err = caps_lookup_slot(&dcb_current->cspace.cap, ctrlp, 2,
725                            &ctrl_cte, CAPRIGHTS_READ_WRITE);
726     if (err_is_fail(err)) {
727         return SYSRET(err);
728     }
729     if (ctrl_cte->cap.type != ObjType_Frame ||
730         ctrl_cte->cap.u.frame.bytes < BASE_PAGE_SIZE) {
731         return SYSRET(SYS_ERR_VMKIT_CTRL_INVALID);
732     }
733     err = caps_copy_to_cte(&dcb->guest_desc.ctrl, ctrl_cte, false, 0, 0);
734     if (err_is_fail(err)) {
735         return SYSRET(err_push(err, SYS_ERR_VMKIT_CTRL));
736     }
737
738 #ifndef CONFIG_SVM
739     // Initialize VMCS for the single virtual-CPU here instead of in
740     // userspace, where the privilege level is not 0.
741     err = initialize_vmcs(vmcb_cte->cap.u.frame.base);
742     assert(err_is_ok(err));
743 #endif
744
745     // 2. Set up the target DCB
746 /*     dcb->guest_desc.monitor_ep = ep_cap; */
747     dcb->vspace = vnode_cap->u.vnode_x86_64_pml4.base;
748     dcb->is_vm_guest = true;
749 /*     dcb->guest_desc.vmcb = vmcb_cap->u.frame.base; */
750 /*     dcb->guest_desc.ctrl = (void *)x86_64_phys_to_mem(ctrl_cap->u.frame.base); */
751
752     return SYSRET(SYS_ERR_OK);
753 }
754 #endif
755
756 static struct sysret monitor_handle_domain_id(struct capability *monitor_cap,
757                                               int cmd, uintptr_t *args)
758 {
759     capaddr_t cptr = args[0];
760     domainid_t domain_id = args[1];
761
762     return sys_monitor_domain_id(cptr, domain_id);
763 }
764
765 static struct sysret monitor_get_cap_owner(struct capability *monitor_cap,
766                                            int cmd, uintptr_t *args)
767 {
768     capaddr_t root_addr = args[0];
769     uint8_t root_level = args[1];
770     capaddr_t cptr = args[2];
771     uint8_t level = args[3];
772
773     return sys_get_cap_owner(root_addr, root_level, cptr, level);
774 }
775
776 static struct sysret monitor_set_cap_owner(struct capability *monitor_cap,
777                                            int cmd, uintptr_t *args)
778 {
779     capaddr_t root_addr = args[0];
780     uint8_t root_level = args[1];
781     capaddr_t cptr = args[2];
782     uint8_t level = args[3];
783     coreid_t owner = args[4];
784
785     return sys_set_cap_owner(root_addr, root_level, cptr, level, owner);
786 }
787
788 static struct sysret monitor_lock_cap(struct capability *monitor_cap,
789                                       int cmd, uintptr_t *args)
790 {
791     capaddr_t root_addr = args[0];
792     uint8_t root_level = args[1];
793     capaddr_t cptr = args[2];
794     uint8_t level = args[3];
795
796     return sys_lock_cap(root_addr, root_level, cptr, level);
797 }
798
799 static struct sysret monitor_unlock_cap(struct capability *monitor_cap,
800                                         int cmd, uintptr_t *args)
801 {
802     capaddr_t root_addr = args[0];
803     uint8_t root_level = args[1];
804     capaddr_t cptr = args[2];
805     uint8_t level = args[3];
806
807     return sys_unlock_cap(root_addr, root_level, cptr, level);
808 }
809
810 /**
811  * \brief Set up tracing in the kernel
812  */
813 static struct sysret handle_trace_setup(struct capability *cap,
814                                         int cmd, uintptr_t *args)
815 {
816     struct capability *frame;
817     errval_t err;
818
819     /* lookup passed cap */
820     capaddr_t cptr = args[0];
821     err = caps_lookup_cap(&dcb_current->cspace.cap, cptr, 2, &frame,
822                           CAPRIGHTS_READ_WRITE);
823     if (err_is_fail(err)) {
824         return SYSRET(err);
825     }
826
827     lpaddr_t lpaddr = gen_phys_to_local_phys(frame->u.frame.base);
828     kernel_trace_buf = local_phys_to_mem(lpaddr);
829     //printf("kernel.%u: handle_trace_setup at %lx\n", apic_id, kernel_trace_buf);
830
831     // Copy boot applications.
832     trace_copy_boot_applications();
833
834     return SYSRET(SYS_ERR_OK);
835 }
836
837 static struct sysret handle_irqsrc_get_vec_start(struct capability * to, int cmd,
838         uintptr_t *args)
839 {
840     struct sysret ret;
841     ret.error = SYS_ERR_OK;
842     ret.value = to->u.irqsrc.vec_start;
843     return ret;
844
845 }
846
847 static struct sysret handle_irqsrc_get_vec_end(struct capability * to, int cmd,
848         uintptr_t *args)
849 {
850     struct sysret ret;
851     ret.error = SYS_ERR_OK;
852     ret.value = to->u.irqsrc.vec_end;
853     return ret;
854
855 }
856
857
858 static struct sysret handle_irqdest_get_vector(struct capability *to, int cmd,
859                                             uintptr_t *args)
860 {
861     struct sysret ret;
862     ret.error = SYS_ERR_OK;
863     ret.value = to->u.irqdest.vector;
864     return ret;
865 }
866
867 static struct sysret handle_irqdest_get_cpu(struct capability *to, int cmd,
868                                             uintptr_t *args)
869 {
870     struct sysret ret;
871     ret.error = SYS_ERR_OK;
872     ret.value = to->u.irqdest.cpu;
873     return ret;
874 }
875
876 static struct sysret handle_irqdest_connect(struct capability *to, int cmd,
877                                             uintptr_t *args)
878 {
879     return SYSRET(irq_connect(to, args[0]));
880 }
881
882 static struct sysret handle_irq_table_alloc(struct capability *to, int cmd,
883                                             uintptr_t *args)
884 {
885     struct sysret ret;
886     int outvec;
887     ret.error = irq_table_alloc(&outvec);
888     ret.value = outvec;
889     return ret;
890 }
891
892 static struct sysret handle_irq_table_alloc_dest_cap(struct capability *to, int cmd,
893                                             uintptr_t *args)
894 {
895     return SYSRET(irq_table_alloc_dest_cap(args[0],args[1],args[2]));
896 }
897
898
899 static struct sysret handle_irq_table_set(struct capability *to, int cmd,
900                                           uintptr_t *args)
901 {
902     return SYSRET(irq_table_set(args[0], args[1]));
903 }
904
905 static struct sysret handle_irq_table_delete(struct capability *to, int cmd,
906                                              uintptr_t *args)
907 {
908     return SYSRET(irq_table_delete(args[0]));
909 }
910
911 static struct sysret handle_ipi_notify_send(struct capability *cap,
912                                             int cmd, uintptr_t *args)
913 {
914     assert(cap->type == ObjType_Notify_IPI);
915     return ipi_raise_notify(cap->u.notify_ipi.coreid, cap->u.notify_ipi.chanid);
916 }
917
918 static struct sysret kernel_ipi_register(struct capability *cap,
919                                          int cmd, uintptr_t *args)
920 {
921     assert(cap->type == ObjType_Kernel);
922     capaddr_t ep = args[0];
923     int chanid = args[1];
924     return SYSRET(ipi_register_notification(ep, chanid));
925 }
926
927 static struct sysret kernel_ipi_delete(struct capability *cap,
928                                        int cmd, uintptr_t *args)
929 {
930     assert(cap->type == ObjType_Kernel);
931     assert(!"NYI");
932     return SYSRET(SYS_ERR_OK);
933 }
934
935 static struct sysret dispatcher_dump_ptables(struct capability *cap,
936                                              int cmd, uintptr_t *args)
937 {
938     assert(cap->type == ObjType_Dispatcher);
939
940     printf("kernel_dump_ptables\n");
941
942     struct dcb *dispatcher = cap->u.dispatcher.dcb;
943
944     paging_dump_tables(dispatcher);
945
946     return SYSRET(SYS_ERR_OK);
947 }
948
949 static struct sysret dispatcher_dump_capabilities(struct capability *cap,
950                                              int cmd, uintptr_t *args)
951 {
952     assert(cap->type == ObjType_Dispatcher);
953
954     printf("dispatcher_dump_capabilities\n");
955
956     struct dcb *dispatcher = cap->u.dispatcher.dcb;
957
958     errval_t err = debug_print_cababilities(dispatcher);
959
960     return SYSRET(err);
961 }
962
963 /*
964  * \brief Activate performance monitoring
965  *
966  * Activates performance monitoring.
967  * \param xargs Expected parameters in args:
968  * - performance monitoring type
969  * - mask for given type
970  * - Counter id
971  * - Also count in privileged mode
972  * - Number of counts before overflow. This parameter may be used to
973  *   set tradeoff between accuracy and overhead. Set the counter to 0
974  *   to deactivate the usage of APIC.
975  * - Endpoint capability to be invoked when the counter overflows.
976  *   The buffer associated with the endpoint needs to be large enough
977  *   to hold several overflow notifications depending on the overflow
978  *   frequency.
979  */
980 static struct sysret performance_counter_activate(struct capability *cap,
981                                                   int cmd, uintptr_t *args)
982 {
983     uint8_t event = args[0];
984     uint8_t umask = args[1];
985     uint8_t counter_id = args[2];
986     bool kernel = args[3];
987     uint64_t counter_value = args[4];
988     capaddr_t ep_addr = args[5];
989
990     errval_t err;
991     struct capability *ep;
992     extern struct capability perfmon_callback_ep;
993
994     // Make sure that
995     assert(ep_addr!=0 || counter_value==0);
996
997     perfmon_init();
998     perfmon_measure_start(event, umask, counter_id, kernel, counter_value);
999
1000     if(ep_addr!=0) {
1001
1002         err = caps_lookup_cap(&dcb_current->cspace.cap, ep_addr, 2, &ep,
1003                               CAPRIGHTS_READ);
1004         if(err_is_fail(err)) {
1005             return SYSRET(err);
1006         }
1007
1008         perfmon_callback_ep = *ep;
1009     }
1010
1011     return SYSRET(SYS_ERR_OK);
1012 }
1013
1014 /*
1015  * \brief Write counter values.
1016  */
1017 static struct sysret performance_counter_write(struct capability *cap,
1018                                                int cmd, uintptr_t *args)
1019 {
1020     uint8_t counter_id = args[0];
1021     uint64_t counter_value = args[1];
1022
1023     perfmon_measure_write(counter_id, counter_value);
1024     return SYSRET(SYS_ERR_OK);
1025 }
1026
1027 /*
1028  * \brief Deactivate performance counters again.
1029  */
1030 static struct sysret performance_counter_deactivate(struct capability *cap,
1031                                                   int cmd, uintptr_t *args)
1032 {
1033     perfmon_measure_stop();
1034     return SYSRET(SYS_ERR_OK);
1035 }
1036
1037 /*
1038  * \brief Return system-wide unique ID of this ID cap.
1039  */
1040 static struct sysret handle_idcap_identify(struct capability *cap, int cmd,
1041                                            uintptr_t *args)
1042 {
1043     idcap_id_t id;
1044     struct sysret sysret = sys_idcap_identify(cap, &id);
1045     sysret.value = id;
1046
1047     return sysret;
1048 }
1049
1050 static struct sysret kernel_send_init_ipi(struct capability *cap, int cmd,
1051                                           uintptr_t *args)
1052 {
1053     coreid_t destination = args[0];
1054 //    printk(LOG_DEBUG, "%s:%s:%d: destination=%"PRIuCOREID"\n",
1055 //           __FILE__, __FUNCTION__, __LINE__, destination);
1056
1057     apic_send_init_assert(destination, xapic_none);
1058     apic_send_init_deassert();
1059
1060     return SYSRET(SYS_ERR_OK);
1061 }
1062
1063 static struct sysret kernel_send_start_ipi(struct capability *cap,
1064                                            int cmd,
1065                                            uintptr_t *args)
1066 {
1067     coreid_t destination = args[0];
1068     genvaddr_t start_vector = X86_64_REAL_MODE_SEGMENT_TO_REAL_MODE_PAGE(X86_64_REAL_MODE_SEGMENT);
1069 //    printk(LOG_DEBUG, "%s:%d: destination=%"PRIuCOREID" start_vector=%"PRIxGENVADDR"\n",
1070 //           __FILE__, __LINE__, destination, start_vector);
1071
1072     apic_send_start_up(destination, xapic_none, start_vector);
1073
1074     return SYSRET(SYS_ERR_OK);
1075 }
1076
1077 static struct sysret kernel_get_global_phys(struct capability *cap,
1078                                            int cmd,
1079                                            uintptr_t *args)
1080 {
1081
1082     struct sysret sysret;
1083     sysret.value = mem_to_local_phys((lvaddr_t)global);
1084     sysret.error = SYS_ERR_OK;
1085
1086     return sysret;
1087 }
1088
1089 static struct sysret kernel_add_kcb(struct capability *kern_cap,
1090                                     int cmd, uintptr_t *args)
1091 {
1092     uintptr_t kcb_addr = args[0];
1093     struct kcb *new_kcb = (struct kcb *)kcb_addr;
1094
1095     return sys_kernel_add_kcb(new_kcb);
1096 }
1097
1098 static struct sysret kernel_remove_kcb(struct capability *kern_cap,
1099                                        int cmd, uintptr_t *args)
1100 {
1101     printk(LOG_NOTE, "in kernel_remove_kcb invocation!\n");
1102     uintptr_t kcb_addr = args[0];
1103     struct kcb *to_remove = (struct kcb *)kcb_addr;
1104
1105     return sys_kernel_remove_kcb(to_remove);
1106 }
1107
1108 static struct sysret kernel_suspend_kcb_sched(struct capability *kern_cap,
1109                                               int cmd, uintptr_t *args)
1110 {
1111     printk(LOG_NOTE, "in kernel_suspend_kcb_sched invocation!\n");
1112     return sys_kernel_suspend_kcb_sched((bool)args[0]);
1113 }
1114
1115 static struct sysret handle_kcb_identify(struct capability *to,
1116                                          int cmd, uintptr_t *args)
1117 {
1118     return sys_handle_kcb_identify(to, (struct frame_identity *)args[0]);
1119 }
1120
1121
1122 typedef struct sysret (*invocation_handler_t)(struct capability *to,
1123                                               int cmd, uintptr_t *args);
1124
1125 static invocation_handler_t invocations[ObjType_Num][CAP_MAX_CMD] = {
1126     [ObjType_Dispatcher] = {
1127         [DispatcherCmd_Setup] = handle_dispatcher_setup,
1128         [DispatcherCmd_Properties] = handle_dispatcher_properties,
1129 #ifndef __k1om__
1130         [DispatcherCmd_SetupGuest] = handle_dispatcher_setup_guest,
1131 #endif
1132         [DispatcherCmd_DumpPTables]  = dispatcher_dump_ptables,
1133         [DispatcherCmd_DumpCapabilities] = dispatcher_dump_capabilities,
1134         [DispatcherCmd_Vmread] = handle_vmread,
1135         [DispatcherCmd_Vmwrite] = handle_vmwrite,
1136         [DispatcherCmd_Vmptrld] = handle_vmptrld,
1137         [DispatcherCmd_Vmclear] = handle_vmclear,
1138     },
1139     [ObjType_KernelControlBlock] = {
1140         [FrameCmd_Identify] = handle_kcb_identify,
1141     },
1142     [ObjType_RAM] = {
1143         [RAMCmd_Identify] = handle_frame_identify,
1144     },
1145     [ObjType_Frame] = {
1146         [FrameCmd_Identify] = handle_frame_identify,
1147     },
1148     [ObjType_DevFrame] = {
1149         [FrameCmd_Identify] = handle_frame_identify,
1150     },
1151     [ObjType_L1CNode] = {
1152         [CNodeCmd_Copy]   = handle_copy,
1153         [CNodeCmd_Mint]   = handle_mint,
1154         [CNodeCmd_Retype] = handle_retype,
1155         [CNodeCmd_Create] = handle_create,
1156         [CNodeCmd_Delete] = handle_delete,
1157         [CNodeCmd_Revoke] = handle_revoke,
1158         [CNodeCmd_GetState] = handle_get_state,
1159         [CNodeCmd_GetSize] = handle_get_size,
1160         [CNodeCmd_Resize] = handle_resize,
1161     },
1162     [ObjType_L2CNode] = {
1163         [CNodeCmd_Copy]   = handle_copy,
1164         [CNodeCmd_Mint]   = handle_mint,
1165         [CNodeCmd_Retype] = handle_retype,
1166         [CNodeCmd_Create] = handle_create,
1167         [CNodeCmd_Delete] = handle_delete,
1168         [CNodeCmd_Revoke] = handle_revoke,
1169         [CNodeCmd_GetState] = handle_get_state,
1170         [CNodeCmd_Resize] = handle_resize,
1171     },
1172     [ObjType_VNode_x86_64_pml4] = {
1173         [VNodeCmd_Identify] = handle_vnode_identify,
1174         [VNodeCmd_Map]   = handle_map,
1175         [VNodeCmd_Unmap] = handle_unmap,
1176     },
1177     [ObjType_VNode_x86_64_pdpt] = {
1178         [VNodeCmd_Identify] = handle_vnode_identify,
1179         [VNodeCmd_Map]   = handle_map,
1180         [VNodeCmd_Unmap] = handle_unmap,
1181     },
1182     [ObjType_VNode_x86_64_pdir] = {
1183         [VNodeCmd_Identify] = handle_vnode_identify,
1184         [VNodeCmd_Map]   = handle_map,
1185         [VNodeCmd_Unmap] = handle_unmap,
1186     },
1187     [ObjType_VNode_x86_64_ptable] = {
1188         [VNodeCmd_Identify] = handle_vnode_identify,
1189         [VNodeCmd_Map]   = handle_map,
1190         [VNodeCmd_Unmap] = handle_unmap,
1191     },
1192     [ObjType_Frame_Mapping] = {
1193         [MappingCmd_Destroy] = handle_mapping_destroy,
1194         [MappingCmd_Modify] = handle_mapping_modify,
1195     },
1196     [ObjType_DevFrame_Mapping] = {
1197         [MappingCmd_Destroy] = handle_mapping_destroy,
1198         [MappingCmd_Modify] = handle_mapping_modify,
1199     },
1200     [ObjType_VNode_x86_64_pml4_Mapping] = {
1201         [MappingCmd_Destroy] = handle_mapping_destroy,
1202         [MappingCmd_Modify] = handle_mapping_modify,
1203     },
1204     [ObjType_VNode_x86_64_pdpt_Mapping] = {
1205         [MappingCmd_Destroy] = handle_mapping_destroy,
1206         [MappingCmd_Modify] = handle_mapping_modify,
1207     },
1208     [ObjType_VNode_x86_64_pdir_Mapping] = {
1209         [MappingCmd_Destroy] = handle_mapping_destroy,
1210         [MappingCmd_Modify] = handle_mapping_modify,
1211     },
1212     [ObjType_VNode_x86_64_ptable_Mapping] = {
1213         [MappingCmd_Destroy] = handle_mapping_destroy,
1214         [MappingCmd_Modify] = handle_mapping_modify,
1215     },
1216     [ObjType_Kernel] = {
1217         [KernelCmd_Get_core_id]  = monitor_get_core_id,
1218         [KernelCmd_Get_arch_id]  = monitor_get_arch_id,
1219         [KernelCmd_Identify_cap] = monitor_identify_cap,
1220         [KernelCmd_Identify_domains_cap] = monitor_identify_domains_cap,
1221         [KernelCmd_Remote_relations] = monitor_remote_relations,
1222         [KernelCmd_Cap_has_relations] = monitor_cap_has_relations,
1223         [KernelCmd_Create_cap]   = monitor_create_cap,
1224         [KernelCmd_Copy_existing] = monitor_copy_existing,
1225         [KernelCmd_Nullify_cap]  = monitor_nullify_cap,
1226         [KernelCmd_Setup_trace]  = handle_trace_setup,
1227         [KernelCmd_Register]     = monitor_handle_register,
1228         [KernelCmd_Domain_Id]    = monitor_handle_domain_id,
1229         [KernelCmd_Get_cap_owner] = monitor_get_cap_owner,
1230         [KernelCmd_Set_cap_owner] = monitor_set_cap_owner,
1231         [KernelCmd_Lock_cap]     = monitor_lock_cap,
1232         [KernelCmd_Unlock_cap]   = monitor_unlock_cap,
1233         [KernelCmd_Retype]       = monitor_handle_retype,
1234         [KernelCmd_Has_descendants] = monitor_handle_has_descendants,
1235         [KernelCmd_Is_retypeable] = monitor_handle_is_retypeable,
1236         [KernelCmd_Delete_last]  = monitor_handle_delete_last,
1237         [KernelCmd_Delete_foreigns] = monitor_handle_delete_foreigns,
1238         [KernelCmd_Revoke_mark_target] = monitor_handle_revoke_mark_tgt,
1239         [KernelCmd_Revoke_mark_relations] = monitor_handle_revoke_mark_rels,
1240         [KernelCmd_Delete_step] = monitor_handle_delete_step,
1241         [KernelCmd_Clear_step] = monitor_handle_clear_step,
1242         [KernelCmd_Sync_timer]   = monitor_handle_sync_timer,
1243         [KernelCmd_IPI_Register] = kernel_ipi_register,
1244         [KernelCmd_IPI_Delete]   = kernel_ipi_delete,
1245         [KernelCmd_GetGlobalPhys] = kernel_get_global_phys,
1246         [KernelCmd_Add_kcb]      = kernel_add_kcb,
1247         [KernelCmd_Remove_kcb]   = kernel_remove_kcb,
1248         [KernelCmd_Suspend_kcb_sched]   = kernel_suspend_kcb_sched,
1249         [KernelCmd_Get_platform] = monitor_get_platform,
1250     },
1251     [ObjType_IPI] = {
1252         [IPICmd_Send_Start] = kernel_send_start_ipi,
1253         [IPICmd_Send_Init] = kernel_send_init_ipi,
1254     },
1255         [ObjType_IRQDest] = {
1256         [IRQDestCmd_Connect] = handle_irqdest_connect,
1257         [IRQDestCmd_GetVector] = handle_irqdest_get_vector,
1258         [IRQDestCmd_GetCpu] = handle_irqdest_get_cpu
1259         },
1260         [ObjType_IRQSrc] = {
1261         [IRQSrcCmd_GetVecStart] = handle_irqsrc_get_vec_start,
1262         [IRQSrcCmd_GetVecEnd] = handle_irqsrc_get_vec_end
1263         },
1264     [ObjType_IRQTable] = {
1265         [IRQTableCmd_Alloc] = handle_irq_table_alloc,
1266         [IRQTableCmd_AllocDestCap] = handle_irq_table_alloc_dest_cap,
1267         [IRQTableCmd_Set] = handle_irq_table_set,
1268         [IRQTableCmd_Delete] = handle_irq_table_delete
1269     },
1270     [ObjType_IO] = {
1271         [IOCmd_Outb] = handle_io,
1272         [IOCmd_Outw] = handle_io,
1273         [IOCmd_Outd] = handle_io,
1274         [IOCmd_Inb] = handle_io,
1275         [IOCmd_Inw] = handle_io,
1276         [IOCmd_Ind] = handle_io
1277     },
1278     [ObjType_Notify_IPI] = {
1279         [NotifyCmd_Send] = handle_ipi_notify_send
1280     },
1281     [ObjType_PerfMon] = {
1282         [PerfmonCmd_Activate] = performance_counter_activate,
1283         [PerfmonCmd_Deactivate] = performance_counter_deactivate,
1284         [PerfmonCmd_Write] = performance_counter_write,
1285     },
1286     [ObjType_ID] = {
1287         [IDCmd_Identify] = handle_idcap_identify,
1288     }
1289 };
1290
1291 /* syscall C entry point; called only from entry.S so no prototype in header */
1292 struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1293                           uint64_t *args, uint64_t rflags, uint64_t rip);
1294 struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1295                           uint64_t *args, uint64_t rflags, uint64_t rip)
1296 {
1297     struct sysret retval = { .error = SYS_ERR_OK, .value = 0 };
1298
1299     // XXX
1300     // Set dcb_current->disabled correctly.  This should really be
1301     // done in entry.S
1302     // XXX
1303     assert(dcb_current != NULL);
1304     if (dispatcher_is_disabled_ip(dcb_current->disp, rip)) {
1305         dcb_current->disabled = true;
1306     } else {
1307         dcb_current->disabled = false;
1308     }
1309     assert(get_dispatcher_shared_generic(dcb_current->disp)->disabled ==
1310             dcb_current->disabled);
1311
1312     switch(syscall) {
1313     case SYSCALL_INVOKE: /* Handle capability invocation */
1314     {
1315         // unpack "header" word
1316         capaddr_t invoke_cptr = arg0 >> 32;
1317         uint8_t send_level = arg0 >> 24;
1318         uint8_t invoke_level = arg0 >> 16;
1319         uint8_t length_words = arg0 >> 8;
1320         uint8_t flags = arg0;
1321
1322         debug(SUBSYS_SYSCALL, "sys_invoke(0x%x(%d), 0x%lx)\n",
1323               invoke_cptr, invoke_level, arg1);
1324         //printk(LOG_NOTE, "sys_invoke(0x%x(%d), 0x%lx)\n",
1325         //      invoke_cptr, invoke_level, arg1);
1326
1327         // Capability to invoke
1328         struct capability *to = NULL;
1329         retval.error = caps_lookup_cap(&dcb_current->cspace.cap, invoke_cptr,
1330                                        invoke_level, &to, CAPRIGHTS_READ);
1331         if (err_is_fail(retval.error)) {
1332             break;
1333         }
1334
1335         assert(to != NULL);
1336         assert(to->type < ObjType_Num);
1337
1338         // Endpoint cap, do LMP
1339         if (to->type == ObjType_EndPoint) {
1340             struct dcb *listener = to->u.endpoint.listener;
1341             assert(listener != NULL);
1342
1343             if (listener->disp == 0) {
1344                 retval.error = SYS_ERR_LMP_NO_TARGET;
1345                 break;
1346             }
1347
1348             /* limit length of message from buggy/malicious sender */
1349             length_words = MIN(length_words, LMP_MSG_LENGTH);
1350
1351             // does the sender want to yield their timeslice on success?
1352             bool sync = flags & LMP_FLAG_SYNC;
1353             // does the sender want to yield to the target if undeliverable?
1354             bool yield = flags & LMP_FLAG_YIELD;
1355             // is the cap (if present) to be deleted on send?
1356             bool give_away = flags & LMP_FLAG_GIVEAWAY;
1357
1358             // try to deliver message
1359             retval.error = lmp_deliver(to, dcb_current, args, length_words,
1360                                        arg1, send_level, give_away);
1361
1362             /* Switch to receiver upon successful delivery with sync flag,
1363              * or (some cases of) unsuccessful delivery with yield flag */
1364             enum err_code err_code = err_no(retval.error);
1365             if ((sync && err_is_ok(retval.error)) ||
1366                 (yield && (err_code == SYS_ERR_LMP_BUF_OVERFLOW
1367                            || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_LOOKUP
1368                            || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_INVALID
1369                            || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_SLOT_OCCUPIED))
1370                     ) {
1371                 if (err_is_fail(retval.error)) {
1372                     struct dispatcher_shared_generic *current_disp =
1373                         get_dispatcher_shared_generic(dcb_current->disp);
1374                     struct dispatcher_shared_generic *listener_disp =
1375                         get_dispatcher_shared_generic(listener->disp);
1376                     debug(SUBSYS_DISPATCH, "LMP failed; %.*s yields to %.*s: %u\n",
1377                           DISP_NAME_LEN, current_disp->name,
1378                           DISP_NAME_LEN, listener_disp->name, err_code);
1379                 }
1380
1381                 // special-case context switch: ensure correct state in current DCB
1382                 dispatcher_handle_t handle = dcb_current->disp;
1383                 struct dispatcher_shared_x86_64 *disp =
1384                     get_dispatcher_shared_x86_64(handle);
1385                 dcb_current->disabled = dispatcher_is_disabled_ip(handle, rip);
1386                 struct registers_x86_64 *save_area;
1387                 if (dcb_current->disabled) {
1388                     save_area = &disp->disabled_save_area;
1389                 } else {
1390                     save_area = &disp->enabled_save_area;
1391                 }
1392
1393                 // Should be enabled. Else, how do we do an invocation??
1394                 if (dcb_current->disabled) {
1395                     panic("Dispatcher needs to be enabled for this invocation");
1396                 }
1397
1398                 // save calling dispatcher's registers, so that when the dispatcher
1399                 // next runs, it has a valid state in the relevant save area.
1400                 // Save RIP, RFLAGS, RSP and set RAX (return value) for later resume
1401                 save_area->rax = retval.error; // XXX: x86 1st return register
1402                 save_area->rip = rip;
1403                 save_area->eflags = rflags;
1404                 save_area->rsp = user_stack_save;
1405
1406                 if (!dcb_current->is_vm_guest) {
1407                     /* save and zero FS/GS selectors (they're unmodified by the syscall path) */
1408                     __asm ("mov     %%fs, %[fs]     \n\t"
1409                     "mov     %%gs, %[gs]     \n\t"
1410                     "mov     %[zero], %%fs   \n\t"
1411                     "mov     %[zero], %%gs   \n\t"
1412                     : /* No output */
1413                     :
1414                     [fs] "m" (save_area->fs),
1415                     [gs] "m" (save_area->gs),
1416                     [zero] "r" (0)
1417                     );
1418                 } else {
1419 #ifndef __k1om__
1420 #ifdef CONFIG_SVM
1421                     lpaddr_t lpaddr = gen_phys_to_local_phys(dcb_current->guest_desc.vmcb.cap.u.frame.base);
1422                     amd_vmcb_t vmcb;
1423                     amd_vmcb_initialize(&vmcb, (void *)local_phys_to_mem(lpaddr));
1424                     save_area->fs = amd_vmcb_fs_selector_rd(&vmcb);
1425                     save_area->gs = amd_vmcb_gs_selector_rd(&vmcb);
1426 #else
1427                     errval_t err;
1428                     err = vmread(VMX_GUEST_FS_SEL, (uint64_t *)&save_area->fs);
1429                     err += vmread(VMX_GUEST_GS_SEL, (uint64_t *)&save_area->gs);
1430                     assert(err_is_ok(err));
1431 #endif
1432 #else
1433                     panic("VM Guests not supported on Xeon Phi");
1434 #endif
1435                         }
1436                 dispatch(to->u.endpoint.listener);
1437                 panic("dispatch returned");
1438             } else {
1439                 struct dcb *dcb = to->u.endpoint.listener;
1440
1441                 schedule_now(dcb);
1442             }
1443         } else { // not endpoint cap, call kernel handler through dispatch table
1444             // printk(LOG_NOTE, "sys_invoke: to->type = %d, cmd = %"PRIu64"\n",
1445             //         to->type, args[0]);
1446
1447             uint64_t cmd = args[0];
1448             if (cmd >= CAP_MAX_CMD) {
1449                 retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1450                 break;
1451             }
1452
1453             // Call the invocation
1454             invocation_handler_t invocation = invocations[to->type][cmd];
1455             if(invocation == NULL) {
1456                 printk(LOG_WARN, "invocation not found. type: %"PRIu32", cmd: %"PRIu64"\n",
1457                               to->type, cmd);
1458                 retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1459             } else {
1460                 retval = invocation(to, cmd, &args[1]);
1461             }
1462         }
1463         break;
1464     }
1465
1466         // Yield the CPU to the next dispatcher
1467     case SYSCALL_YIELD:
1468         TRACE(KERNEL, SC_YIELD, 0);
1469         retval = sys_yield((capaddr_t)arg0);
1470         TRACE(KERNEL, SC_YIELD, 1);
1471         break;
1472
1473         // NOP system call for benchmarking purposes
1474     case SYSCALL_NOP:
1475         break;
1476
1477         // Debug print system call
1478     case SYSCALL_PRINT:
1479         TRACE(KERNEL, SC_PRINT, 0);
1480         retval.error = sys_print((char *)arg0, arg1);
1481         TRACE(KERNEL, SC_PRINT, 1);
1482         break;
1483
1484         // Reboot!
1485         // FIXME: this should be a kernel cap invocation or similarly restricted
1486     case SYSCALL_REBOOT:
1487         reboot();
1488         break;
1489
1490     case SYSCALL_X86_FPU_TRAP_ON:
1491         fpu_trap_on();
1492         break;
1493
1494     case SYSCALL_X86_RELOAD_LDT:
1495         maybe_reload_ldt(dcb_current, true);
1496         break;
1497
1498         // Temporarily suspend the CPU
1499     case SYSCALL_SUSPEND:
1500         TRACE(KERNEL, SC_SUSPEND, 0);
1501         retval = sys_suspend((bool)arg0);
1502         TRACE(KERNEL, SC_SUSPEND, 1);
1503         break;
1504
1505     case SYSCALL_GET_ABS_TIME:
1506         retval = sys_get_absolute_time();
1507         break;
1508
1509     case SYSCALL_DEBUG:
1510         switch(arg0) {
1511         case DEBUG_CONTEXT_COUNTER_RESET:
1512             dispatch_csc_reset();
1513             break;
1514
1515         case DEBUG_CONTEXT_COUNTER_READ:
1516             retval.value = dispatch_get_csc();
1517             break;
1518
1519         case DEBUG_TIMESLICE_COUNTER_READ:
1520             retval.value = systime_now();
1521             break;
1522
1523         case DEBUG_FLUSH_CACHE:
1524             wbinvd();
1525             break;
1526
1527         case DEBUG_SEND_IPI:
1528             apic_send_std_ipi(arg1, args[0], args[1]);
1529             break;
1530
1531         case DEBUG_SET_BREAKPOINT:
1532             debugregs_set_breakpoint(arg1, args[0], args[1]);
1533             break;
1534
1535         case DEBUG_GET_TSC_PER_MS:
1536             retval.value = timing_get_tsc_per_ms();
1537             break;
1538
1539         case DEBUG_GET_APIC_TIMER:
1540             retval.value = apic_timer_get_count();
1541             break;
1542
1543         case DEBUG_GET_APIC_TICKS_PER_SEC:
1544             retval.value = timing_get_apic_ticks_per_sec();
1545             break;
1546
1547         case DEBUG_TRACE_PMEM_CTRL:
1548 #ifdef TRACE_PMEM_CAPS
1549             if (arg1) {
1550                 caps_trace_ctrl(arg1, args[0], args[1]);
1551             } else {
1552                 caps_trace_ctrl(arg1, 0, 0);
1553             }
1554 #endif
1555             retval.value = 0;
1556             retval.error = SYS_ERR_OK;
1557             break;
1558
1559
1560         case DEBUG_GET_APIC_ID:
1561             retval.value = apic_get_id();
1562             break;
1563
1564         case DEBUG_CREATE_IRQ_SRC_CAP:
1565             retval.error = irq_debug_create_src_cap(arg1, args[0], args[1],
1566                     args[2], args[3]);
1567             break;
1568
1569         default:
1570             printk(LOG_ERR, "invalid sys_debug msg type\n");
1571         }
1572         break;
1573
1574     default:
1575         printk(LOG_ERR, "sys_syscall: Illegal system call! "
1576                "(0x%lx, 0x%lx, 0x%lx)\n", syscall, arg0, arg1);
1577         retval.error = SYS_ERR_ILLEGAL_SYSCALL;
1578         break;
1579     }
1580
1581     // If dcb_current got removed, dispatch someone else
1582     if (dcb_current == NULL) {
1583         assert(err_is_ok(retval.error));
1584         dispatch(schedule());
1585     }
1586
1587     if (syscall == SYSCALL_INVOKE) {
1588         debug(SUBSYS_SYSCALL, "invoke returning 0x%lx 0x%lx\n",
1589               retval.error, retval.value);
1590     }
1591
1592     return retval;
1593 }