3 * \brief Arch-generic system calls implementation.
7 * Copyright (c) 2007-2010,2012, ETH Zurich.
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development LP.
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
20 #include <barrelfish_kpi/syscalls.h>
21 #include <capabilities.h>
22 #include <cap_predicates.h>
25 #include <mdb/mdb_tree.h>
26 #include <cap_predicates.h>
30 #include <paging_kernel_helper.h>
31 #include <paging_kernel_arch.h>
34 #include <trace/trace.h>
35 #include <trace_definitions/trace_defs.h>
37 #include <useraccess.h>
39 errval_t sys_print(const char *str, size_t length)
41 /* FIXME: check that string is mapped and accessible to caller! */
42 printf("%.*s", (int)length, str);
46 /* FIXME: lots of missing argument checks in this function */
48 sys_dispatcher_setup(struct capability *to, capaddr_t cptr, int depth,
49 capaddr_t vptr, capaddr_t dptr, bool run, capaddr_t odptr)
51 errval_t err = SYS_ERR_OK;
52 assert(to->type == ObjType_Dispatcher);
53 struct dcb *dcb = to->u.dispatcher.dcb;
57 /* 1. set cspace root */
58 if (cptr != CPTR_NULL) {
60 err = caps_lookup_slot(&dcb_current->cspace.cap, cptr, depth,
61 &root, CAPRIGHTS_READ);
62 if (err_is_fail(err)) {
63 return SYSRET(err_push(err, SYS_ERR_DISP_CSPACE_ROOT));
65 if (root->cap.type != ObjType_CNode) {
66 return SYSRET(err_push(err, SYS_ERR_DISP_CSPACE_INVALID));
68 err = caps_copy_to_cte(&dcb->cspace, root, false, 0, 0);
69 if (err_is_fail(err)) {
70 return SYSRET(err_push(err, SYS_ERR_DISP_CSPACE_ROOT));
74 /* 2. set vspace root */
75 if (vptr != CPTR_NULL) {
76 struct capability *vroot;
77 err = caps_lookup_cap(&dcb_current->cspace.cap, vptr, CPTR_BITS,
78 &vroot, CAPRIGHTS_WRITE);
79 if (err_is_fail(err)) {
80 return SYSRET(err_push(err, SYS_ERR_DISP_VSPACE_ROOT));
83 // Insert as dispatcher's VSpace root
85 case ObjType_VNode_x86_64_pml4:
87 (lvaddr_t)gen_phys_to_local_phys(vroot->u.vnode_x86_64_pml4.base);
90 case ObjType_VNode_x86_32_pdpt:
92 (lvaddr_t)gen_phys_to_local_phys(vroot->u.vnode_x86_32_pdpt.base);
95 case ObjType_VNode_x86_32_pdir:
97 (lvaddr_t)gen_phys_to_local_phys(vroot->u.vnode_x86_32_pdir.base);
100 case ObjType_VNode_ARM_l1:
102 (lvaddr_t)gen_phys_to_local_phys(vroot->u.vnode_arm_l1.base);
105 case ObjType_VNode_AARCH64_l1:
107 (lvaddr_t)gen_phys_to_local_phys(vroot->u.vnode_aarch64_l1.base);
110 case ObjType_VNode_AARCH64_l2:
112 (lvaddr_t)gen_phys_to_local_phys(vroot->u.vnode_aarch64_l2.base);
116 return SYSRET(err_push(err, SYS_ERR_DISP_VSPACE_INVALID));
120 /* 3. set dispatcher frame pointer */
121 if (dptr != CPTR_NULL) {
123 err = caps_lookup_slot(&dcb_current->cspace.cap, dptr, CPTR_BITS,
124 &dispcte, CAPRIGHTS_WRITE);
125 if (err_is_fail(err)) {
126 return SYSRET(err_push(err, SYS_ERR_DISP_FRAME));
128 struct capability *dispcap = &dispcte->cap;
129 if (dispcap->type != ObjType_Frame) {
130 return SYSRET(err_push(err, SYS_ERR_DISP_FRAME_INVALID));
133 /* FIXME: check rights, check size */
135 lpaddr = gen_phys_to_local_phys(dispcap->u.frame.base);
136 dcb->disp = local_phys_to_mem(lpaddr);
137 // Copy the cap to dcb also
138 err = caps_copy_to_cte(&dcb->disp_cte, dispcte, false, 0, 0);
139 // If copy fails, something wrong in kernel
140 assert(err_is_ok(err));
143 /* 5. Make runnable if desired -- Set pointer to ipi_data */
145 if (dcb->vspace == 0 ||
146 (!dcb->is_vm_guest &&
147 (dcb->disp == 0 || dcb->cspace.cap.type != ObjType_CNode))) {
148 return SYSRET(err_push(err, SYS_ERR_DISP_NOT_RUNNABLE));
151 // XXX: dispatchers run disabled the first time they start
153 //printf("DCB: %p %.*s\n", dcb, DISP_NAME_LEN, dcb->disp->name);
157 /* 6. Copy domain ID off given dispatcher */
158 if(odptr != CPTR_NULL) {
159 struct capability *odisp;
160 err = caps_lookup_cap(&dcb_current->cspace.cap, odptr, CPTR_BITS,
161 &odisp, CAPRIGHTS_READ_WRITE);
162 if (err_is_fail(err)) {
163 return SYSRET(err_push(err, SYS_ERR_DISP_OCAP_LOOKUP));
165 dcb->domain_id = odisp->u.dispatcher.dcb->domain_id;
168 /* 7. (HACK) Set current core id */
170 struct dispatcher_shared_generic *disp =
171 get_dispatcher_shared_generic(dcb->disp);
173 disp->curr_core_id = my_core_id;
177 if(!dcb->is_vm_guest) {
178 struct dispatcher_shared_generic *disp =
179 get_dispatcher_shared_generic(dcb->disp);
180 err = trace_new_application(disp->name, (uintptr_t) dcb);
182 if (err == TRACE_ERR_NO_BUFFER) {
183 // Try to use the boot buffer.
184 trace_new_boot_application(disp->name, (uintptr_t) dcb);
188 return SYSRET(SYS_ERR_OK);
192 sys_dispatcher_properties(struct capability *to,
193 enum task_type type, unsigned long deadline,
194 unsigned long wcet, unsigned long period,
195 unsigned long release, unsigned short weight)
197 assert(to->type == ObjType_Dispatcher);
199 #ifdef CONFIG_SCHEDULER_RBED
200 struct dcb *dcb = to->u.dispatcher.dcb;
202 assert(type >= TASK_TYPE_BEST_EFFORT && type <= TASK_TYPE_HARD_REALTIME);
203 assert(wcet <= deadline);
204 assert(wcet <= period);
205 assert(type != TASK_TYPE_BEST_EFFORT || weight > 0);
207 trace_event(TRACE_SUBSYS_KERNEL, TRACE_EVENT_KERNEL_SCHED_REMOVE,
209 scheduler_remove(dcb);
211 /* Set task properties */
213 dcb->deadline = deadline;
215 dcb->period = period;
216 dcb->release_time = (release == 0) ? kernel_now : release;
217 dcb->weight = weight;
222 return SYSRET(SYS_ERR_OK);
226 * \param root Root CNode to invoke
227 * \param source_cptr Source capability cptr
228 * \param type Type to retype to
229 * \param objbits Object bits for variable-sized types
230 * \param dest_cnode_cptr Destination cnode cptr
231 * \param dest_slot Destination slot number
232 * \param dest_vbits Valid bits in destination cnode cptr
235 sys_retype(struct capability *root, capaddr_t source_cptr, enum objtype type,
236 uint8_t objbits, capaddr_t dest_cnode_cptr, cslot_t dest_slot,
237 uint8_t dest_vbits, bool from_monitor)
241 /* Parameter checking */
242 if (type == ObjType_Null || type >= ObjType_Num) {
243 return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
246 /* Source capability */
247 struct cte *source_cap;
248 err = caps_lookup_slot(root, source_cptr, CPTR_BITS, &source_cap,
250 if (err_is_fail(err)) {
251 return SYSRET(err_push(err, SYS_ERR_SOURCE_CAP_LOOKUP));
253 assert(source_cap != NULL);
255 /* Destination cnode */
256 struct capability *dest_cnode_cap;
257 err = caps_lookup_cap(root, dest_cnode_cptr, dest_vbits,
258 &dest_cnode_cap, CAPRIGHTS_READ_WRITE);
259 if (err_is_fail(err)) {
260 return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
262 if (dest_cnode_cap->type != ObjType_CNode) {
263 return SYSRET(SYS_ERR_DEST_CNODE_INVALID);
266 return SYSRET(caps_retype(type, objbits, dest_cnode_cap, dest_slot,
267 source_cap, from_monitor));
271 * \param root Root CNode to invoke
272 * \param source_cptr Source capability cptr
273 * \param offset Offset into source capability from which to retype
274 * \param type Type to retype to
275 * \param objsize Object size for variable-sized types
276 * \param count number of objects to create
277 * \param dest_cnode_cptr Destination cnode cptr
278 * \param dest_slot Destination slot number
279 * \param dest_vbits Valid bits in destination cnode cptr
282 sys_retype2(struct capability *root, capaddr_t source_cptr, gensize_t offset,
283 enum objtype type, gensize_t objsize, size_t count,
284 capaddr_t dest_cnode_cptr, cslot_t dest_slot,
285 uint8_t dest_vbits, bool from_monitor)
289 /* Parameter checking */
290 if (type == ObjType_Null || type >= ObjType_Num) {
291 return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
294 /* Source capability */
295 struct cte *source_cte;
296 err = caps_lookup_slot(root, source_cptr, CPTR_BITS, &source_cte,
298 if (err_is_fail(err)) {
299 return SYSRET(err_push(err, SYS_ERR_SOURCE_CAP_LOOKUP));
301 assert(source_cte != NULL);
303 /* Destination cnode */
304 struct capability *dest_cnode_cap;
305 err = caps_lookup_cap(root, dest_cnode_cptr, dest_vbits,
306 &dest_cnode_cap, CAPRIGHTS_READ_WRITE);
307 if (err_is_fail(err)) {
308 return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
310 if (dest_cnode_cap->type != ObjType_CNode) {
311 return SYSRET(SYS_ERR_DEST_CNODE_INVALID);
314 return SYSRET(caps_retype2(type, objsize, count, dest_cnode_cap, dest_slot,
315 source_cte, offset, from_monitor));
318 struct sysret sys_create(struct capability *root, enum objtype type,
319 uint8_t objbits, capaddr_t dest_cnode_cptr,
320 cslot_t dest_slot, int dest_vbits)
326 /* Paramter checking */
327 if (type == ObjType_Null || type >= ObjType_Num) {
328 return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
331 /* Destination CNode */
332 struct capability *dest_cnode_cap;
333 err = caps_lookup_cap(root, dest_cnode_cptr, dest_vbits,
334 &dest_cnode_cap, CAPRIGHTS_READ_WRITE);
335 if (err_is_fail(err)) {
336 return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
339 /* Destination slot */
340 struct cte *dest_cte;
341 dest_cte = caps_locate_slot(dest_cnode_cap->u.cnode.cnode, dest_slot);
342 if (dest_cte->cap.type != ObjType_Null) {
343 return SYSRET(SYS_ERR_SLOTS_IN_USE);
346 /* List capabilities allowed to be created at runtime. */
352 // only certain types of capabilities can be created at runtime
354 return SYSRET(SYS_ERR_TYPE_NOT_CREATABLE);
357 return SYSRET(caps_create_new(type, base, bits, objbits, my_core_id, dest_cte));
361 * Common code for copying and minting except the mint flag and param passing
364 sys_copy_or_mint(struct capability *root, capaddr_t destcn_cptr, cslot_t dest_slot,
365 capaddr_t source_cptr, int destcn_vbits, int source_vbits,
366 uintptr_t param1, uintptr_t param2, bool mint)
374 /* Lookup source cap */
376 err = caps_lookup_slot(root, source_cptr, source_vbits,
377 &src_cap, CAPRIGHTS_READ);
378 if (err_is_fail(err)) {
379 return SYSRET(err_push(err, SYS_ERR_SOURCE_CAP_LOOKUP));
382 /* Lookup destination cnode cap */
383 struct cte *dest_cnode_cap;
384 err = caps_lookup_slot(root, destcn_cptr, destcn_vbits,
385 &dest_cnode_cap, CAPRIGHTS_READ_WRITE);
386 if (err_is_fail(err)) {
387 return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
391 if (dest_cnode_cap->cap.type == ObjType_CNode) {
392 return SYSRET(caps_copy_to_cnode(dest_cnode_cap, dest_slot, src_cap,
393 mint, param1, param2));
395 return SYSRET(SYS_ERR_DEST_TYPE_INVALID);
400 sys_map(struct capability *ptable, cslot_t slot, capaddr_t source_cptr,
401 int source_vbits, uintptr_t flags, uintptr_t offset,
402 uintptr_t pte_count, capaddr_t mapping_cnptr, int mapping_cnvbits,
403 cslot_t mapping_slot)
405 assert (type_is_vnode(ptable->type));
409 struct capability *root = &dcb_current->cspace.cap;
411 /* Lookup source cap */
413 err = caps_lookup_slot(root, source_cptr, source_vbits, &src_cte,
415 if (err_is_fail(err)) {
416 return SYSRET(err_push(err, SYS_ERR_SOURCE_CAP_LOOKUP));
419 /* Lookup mapping slot */
420 struct cte *mapping_cnode_cte;
421 err = caps_lookup_slot(root, mapping_cnptr, mapping_cnvbits,
422 &mapping_cnode_cte, CAPRIGHTS_READ_WRITE);
423 if (err_is_fail(err)) {
424 return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
427 if (mapping_cnode_cte->cap.type != ObjType_CNode) {
428 return SYSRET(SYS_ERR_DEST_TYPE_INVALID);
431 struct cte *mapping_cte = caps_locate_slot(get_address(&mapping_cnode_cte->cap),
433 if (mapping_cte->cap.type != ObjType_Null) {
434 return SYSRET(SYS_ERR_SLOT_IN_USE);
438 // XXX: this does not check if we do have CAPRIGHTS_READ_WRITE on
439 // the destination cap (the page table we're inserting into)
440 return SYSRET(caps_copy_to_vnode(cte_for_cap(ptable), slot, src_cte, flags,
441 offset, pte_count, mapping_cte));
444 struct sysret sys_delete(struct capability *root, capaddr_t cptr, uint8_t bits)
448 err = caps_lookup_slot(root, cptr, bits, &slot, CAPRIGHTS_READ_WRITE);
449 if (err_is_fail(err)) {
453 err = caps_delete(slot);
457 struct sysret sys_revoke(struct capability *root, capaddr_t cptr, uint8_t bits)
461 err = caps_lookup_slot(root, cptr, bits, &slot, CAPRIGHTS_READ_WRITE);
462 if (err_is_fail(err)) {
466 err = caps_revoke(slot);
470 struct sysret sys_get_state(struct capability *root, capaddr_t cptr, uint8_t bits)
474 err = caps_lookup_slot(root, cptr, bits, &slot, CAPRIGHTS_READ);
475 if (err_is_fail(err)) {
479 distcap_state_t state = distcap_get_state(slot);
480 return (struct sysret) { .error = SYS_ERR_OK, .value = state };
483 struct sysret sys_yield(capaddr_t target)
485 dispatcher_handle_t handle = dcb_current->disp;
486 struct dispatcher_shared_generic *disp =
487 get_dispatcher_shared_generic(handle);
490 debug(SUBSYS_DISPATCH, "%.*s yields%s\n", DISP_NAME_LEN, disp->name,
491 !disp->haswork && disp->lmp_delivered == disp->lmp_seen
492 ? " and is removed from the runq" : "");
494 if (!disp->disabled) {
495 printk(LOG_ERR, "SYSCALL_YIELD while enabled\n");
496 return SYSRET(SYS_ERR_CALLER_ENABLED);
499 struct capability *yield_to = NULL;
500 if (target != CPTR_NULL) {
504 err = caps_lookup_cap(&dcb_current->cspace.cap, target, CPTR_BITS,
505 &yield_to, CAPRIGHTS_READ);
506 if (err_is_fail(err)) {
508 } else if (yield_to == NULL ||
509 (yield_to->type != ObjType_EndPoint
510 && yield_to->type != ObjType_Dispatcher)) {
511 return SYSRET(SYS_ERR_INVALID_YIELD_TARGET);
513 /* FIXME: check rights? */
516 disp->disabled = false;
517 dcb_current->disabled = false;
519 // Remove from queue when no work and no more messages and no missed wakeup
520 systime_t wakeup = disp->wakeup;
521 if (!disp->haswork && disp->lmp_delivered == disp->lmp_seen
522 && (wakeup == 0 || wakeup > (kernel_now + kcb_current->kernel_off))) {
524 trace_event(TRACE_SUBSYS_NNET, TRACE_EVENT_NNET_SCHED_REMOVE,
525 (uint32_t)(lvaddr_t)dcb_current & 0xFFFFFFFF);
526 trace_event(TRACE_SUBSYS_KERNEL, TRACE_EVENT_KERNEL_SCHED_REMOVE,
529 scheduler_remove(dcb_current);
531 wakeup_set(dcb_current, wakeup);
534 // Otherwise yield for the timeslice
535 scheduler_yield(dcb_current);
538 if (yield_to != NULL) {
539 struct dcb *target_dcb = NULL;
540 if (yield_to->type == ObjType_EndPoint) {
541 target_dcb = yield_to->u.endpoint.listener;
542 } else if (yield_to->type == ObjType_Dispatcher) {
543 target_dcb = yield_to->u.dispatcher.dcb;
545 panic("invalid type in yield cap");
548 trace_event(TRACE_SUBSYS_NNET, TRACE_EVENT_NNET_YIELD,
549 (uint32_t)(lvaddr_t)target_dcb & 0xFFFFFFFF);
550 make_runnable(target_dcb);
551 dispatch(target_dcb);
553 // trace_event(TRACE_SUBSYS_BNET, TRACE_EVENT_BNET_YIELD,
556 /* undirected yield */
557 dispatch(schedule());
560 panic("Yield returned!");
563 struct sysret sys_suspend(bool do_halt)
565 dispatcher_handle_t handle = dcb_current->disp;
566 struct dispatcher_shared_generic *disp =
567 get_dispatcher_shared_generic(handle);
569 debug(SUBSYS_DISPATCH, "%.*s suspends (halt: %d)\n", DISP_NAME_LEN, disp->name, do_halt);
571 if (!disp->disabled) {
572 printk(LOG_ERR, "SYSCALL_SUSPEND while enabled\n");
573 return SYSRET(SYS_ERR_CALLER_ENABLED);
576 disp->disabled = false;
577 dcb_current->disabled = false;
580 //printf("%s:%s:%d: before halt of core (%"PRIuCOREID")\n",
581 // __FILE__, __FUNCTION__, __LINE__, my_core_id);
584 // Note this only works if we're calling this inside
585 // the kcb we're currently running
586 printk(LOG_NOTE, "in sys_suspend(<no_halt>)!\n");
587 printk(LOG_NOTE, "calling switch_kcb!\n");
588 struct kcb *next = kcb_current->next;
589 kcb_current->next = NULL;
591 // enable kcb scheduler
592 printk(LOG_NOTE, "enabling kcb scheduler!\n");
593 kcb_sched_suspended = false;
594 // schedule something in the other kcb
595 dispatch(schedule());
598 panic("Yield returned!");
603 * The format of the returned ID is:
605 * --------------------------------------------------------------------
606 * | 0 (unused) | coreid | core_local_id |
607 * --------------------------------------------------------------------
611 struct sysret sys_idcap_identify(struct capability *cap, idcap_id_t *id)
613 STATIC_ASSERT_SIZEOF(coreid_t, 1);
615 idcap_id_t coreid = (idcap_id_t) cap->u.id.coreid;
616 *id = coreid << 32 | cap->u.id.core_local_id;
618 return SYSRET(SYS_ERR_OK);
622 * Calls correct handler function to spawn an app core.
624 * At the moment spawn_core_handlers is set-up per
625 * architecture inside text_init() usually found in init.c.
627 * \note Generally the x86 terms of BSP and APP core are used
628 * throughout Barrelfish to distinguish between bootstrap core (BSP)
629 * and application cores (APP).
631 * \param core_id Identifier of the core which we want to boot
632 * \param cpu_type Architecture of the core.
633 * \param entry Entry point for code to start execution.
635 * \retval SYS_ERR_OK Core successfully booted.
636 * \retval SYS_ERR_ARCHITECTURE_NOT_SUPPORTED No handler registered for
637 * the specified cpu_type.
638 * \retval SYS_ERR_CORE_NOT_FOUND Core failed to boot.
640 struct sysret sys_monitor_spawn_core(coreid_t core_id, enum cpu_type cpu_type,
643 assert(cpu_type < CPU_TYPE_NUM);
644 // TODO(gz): assert core_id valid
645 // TODO(gz): assert entry range?
647 if (cpu_type < CPU_TYPE_NUM &&
648 coreboot_get_spawn_handler(cpu_type) == NULL) {
649 assert(!"Architecture not supported -- " \
650 "or you failed to register spawn handler?");
651 return SYSRET(SYS_ERR_ARCHITECTURE_NOT_SUPPORTED);
654 int r = (coreboot_get_spawn_handler(cpu_type))(core_id, entry);
656 return SYSRET(SYS_ERR_CORE_NOT_FOUND);
659 return SYSRET(SYS_ERR_OK);
662 struct sysret sys_kernel_add_kcb(struct kcb *new_kcb)
666 // update kernel_now offset
667 new_kcb->kernel_off -= kernel_now;
668 // reset scheduler statistics
669 scheduler_reset_time();
670 // update current core id of all domains
671 kcb_update_core_id(new_kcb);
672 // upcall domains with registered interrupts to tell them to re-register
673 irq_table_notify_domains(new_kcb);
675 return SYSRET(SYS_ERR_OK);
678 struct sysret sys_kernel_remove_kcb(struct kcb * to_remove)
680 return SYSRET(kcb_remove(to_remove));
683 struct sysret sys_kernel_suspend_kcb_sched(bool suspend)
685 printk(LOG_NOTE, "in kernel_suspend_kcb_sched invocation!\n");
686 kcb_sched_suspended = suspend;
687 return SYSRET(SYS_ERR_OK);
690 struct sysret sys_handle_kcb_identify(struct capability* to, struct frame_identity *fi)
692 // Return with physical base address of frame
693 // XXX: pack size into bottom bits of base address
694 assert(to->type == ObjType_KernelControlBlock);
695 lvaddr_t vkcb = (lvaddr_t) to->u.kernelcontrolblock.kcb;
696 assert((vkcb & BASE_PAGE_MASK) == 0);
698 if (!access_ok(ACCESS_WRITE, (lvaddr_t)fi, sizeof(struct frame_identity))) {
699 return SYSRET(SYS_ERR_INVALID_USER_BUFFER);
702 fi->base = get_address(to);
703 fi->bytes = get_size(to);
705 return SYSRET(SYS_ERR_OK);
708 struct sysret sys_get_absolute_time(void)
710 // Return kernel_now.
711 // XXX: this may not provide all the properties of absolute time we want,
712 // but should be sufficient to implement stuff that needs timing with 1/10
713 // of a second accuracy range.
714 return (struct sysret) {
716 .value = kernel_now + kcb_current->kernel_off,