3 * \brief Kernel management of dispatchers (implementation).
7 * Copyright (c) 2007, 2008, 2009, 2010, 2011, 2013, ETH Zurich.
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
16 #include <barrelfish_kpi/cpu.h>
17 #include <exec.h> /* XXX wait_for_interrupt, resume, execute */
18 #include <paging_kernel_arch.h>
22 #include <barrelfish_kpi/syscalls.h>
23 #include <barrelfish_kpi/lmp.h>
24 #include <trace/trace.h>
25 #include <trace_definitions/trace_defs.h>
26 #include <barrelfish_kpi/dispatcher_shared_target.h>
27 #include <barrelfish_kpi/cpu_arch.h>
28 #include <barrelfish_kpi/registers_arch.h>
30 #if defined(__x86_64__) || defined(__i386__)
31 # include <arch/x86/apic.h>
38 #ifdef FPU_LAZY_CONTEXT_SWITCH
42 #define MIN(a,b) ((a) < (b) ? (a) : (b))
45 * \brief The kernel timeslice given in milliseconds.
47 int kernel_timeslice = CONFIG_TIMESLICE;
49 /// Counter for number of context switches
50 uint64_t context_switch_counter = 0;
52 /// Current execution dispatcher (when in system call or exception)
53 struct dcb *dcb_current = NULL;
55 /// Remembered FPU-using DCB (NULL if none)
56 struct dcb *fpu_dcb = NULL;
59 * \brief Switch context to 'dcb'.
61 * This is a wrapper function to call the real, hardware-dependent
62 * context-switch function to switch to the dispatcher, pointed to by
63 * 'dcb'. It also sets 'dcb_current'.
65 * \param dcb Pointer to dispatcher to which to switch context.
67 static inline void context_switch(struct dcb *dcb)
69 // printf("Executing the context switch\n");
71 assert(dcb->vspace != 0);
73 // VM guests do not have a user space dispatcher
74 if (!dcb->is_vm_guest) {
75 assert(dcb->disp != 0);
78 #ifdef FPU_LAZY_CONTEXT_SWITCH
79 // XXX: It should be possible to merge this code fragment with the
80 // other FPU restore fragment below
81 if(fpu_dcb != NULL && !dcb->is_vm_guest) {
82 struct dispatcher_shared_generic *disp =
83 get_dispatcher_shared_generic(dcb->disp);
85 // Switch FPU trap on if we switch away from FPU DCB and target is enabled
86 // If target disabled, we eagerly restore the FPU
87 if(fpu_dcb != dcb && !dcb->disabled) {
91 // Restore FPU trap state
100 paging_context_switch(dcb->vspace);
101 context_switch_counter++;
103 if (!dcb->is_vm_guest) {
104 assert(dcb->disp_cte.cap.type == ObjType_Frame);
106 /* FIXME: incomplete clean-up of "thread_register" in progress here.
107 * Complain vigorously to AB if he checks this mess in
109 #if defined(__x86_64__) || defined(__k1om__) /* Setup new LDT */
110 maybe_reload_ldt(dcb, false);
112 struct dispatcher_shared_generic *disp =
113 get_dispatcher_shared_generic(dcb->disp);
115 #ifdef FPU_LAZY_CONTEXT_SWITCH
116 // Eagerly restore FPU if it was used disabled and set FPU trap accordingly
117 if(disp->fpu_used && dcb->disabled) {
118 // Context switch if FPU state is stale
120 // XXX: Need to reset fpu_dcb when that DCB is deleted
121 struct dispatcher_shared_generic *dst =
122 get_dispatcher_shared_generic(fpu_dcb->disp);
126 // Store old FPU state if it was used
127 if(fpu_dcb->disabled) {
128 fpu_save(dispatcher_get_disabled_fpu_save_area(fpu_dcb->disp));
131 assert(!fpu_dcb->disabled);
132 fpu_save(dispatcher_get_enabled_fpu_save_area(fpu_dcb->disp));
136 if(disp->fpu_used == 1) {
137 fpu_restore(dispatcher_get_disabled_fpu_save_area(dcb->disp));
139 assert(disp->fpu_used == 2);
140 fpu_restore(dispatcher_get_enabled_fpu_save_area(dcb->disp));
143 // Restore trap state once more, since we modified it
152 #endif /* FPU_LAZY_CONTEXT_SWITCH */
155 * The name of the function is somewhat misleading. we need an unused
156 * user register that always stores the pointer to the current
157 * dispatcher. most ABIs define a register for thread-local storage,
158 * and we have been abusing that on x64 for the dispatcher pointer
159 * --arch_set_thread_ register sets this pointer. Obviously this
160 * needs to change to support thread-local storage using a standard
161 * ABI, so we will have to figure out how to get to the dispatcher
162 * from something like a thread-local variable. The reason that this
163 * is in the switch path and not in resume/execute is that on x86_64
164 * loading the thread register (fs) is stupidly expensive, so we avoid
165 * doing it unless we switch contexts -- presumably that could be a
166 * local optimisation in the x86_64 dispatch paths rather than the
167 * generic context_switch path/
169 arch_set_thread_register(disp->udisp);
175 struct dcb *run_next = NULL;
178 #if CONFIG_TRACE && NETWORK_STACK_BENCHMARK
180 #endif // CONFIG_TRACE && NETWORK_STACK_BENCHMARK
183 void __attribute__ ((noreturn)) dispatch(struct dcb *dcb)
185 #ifdef FPU_LAZY_CONTEXT_SWITCH
186 // Save state of FPU trap for this domain (treat it like normal context switched state)
187 if(dcb_current != NULL && !dcb_current->is_vm_guest) {
188 struct dispatcher_shared_generic *disp =
189 get_dispatcher_shared_generic(dcb_current->disp);
190 disp->fpu_trap = fpu_trap_get();
194 // XXX FIXME: Why is this null pointer check on the fast path ?
195 // If we have nothing to do we should call something other than dispatch
198 #if defined(__x86_64__) || defined(__i386__) || defined(__k1om__)
199 // Can this be moved into wait_for_interrupt?
200 // Or wait_for_nonscheduling_interrupt()?
201 if (!wakeup_is_pending()) {
205 wait_for_interrupt();
208 // XXX: run_next scheduling hack
210 if(run_next != NULL) {
216 // Don't context switch if we are current already
217 if (dcb_current != dcb) {
220 trace_event(TRACE_SUBSYS_KERNEL,
221 TRACE_EVENT_KERNEL_CSWITCH,
222 (uint32_t)(lvaddr_t)dcb & 0xFFFFFFFF);
231 dispatcher_handle_t handle = dcb->disp;
232 struct dispatcher_shared_generic *disp =
233 get_dispatcher_shared_generic(handle);
234 arch_registers_state_t *disabled_area =
235 dispatcher_get_disabled_save_area(handle);
238 disp->systime = kernel_now + kcb_current->kernel_off;
240 TRACE(KERNEL, SC_YIELD, 1);
244 debug(SUBSYS_DISPATCH, "resume %.*s at 0x%" PRIx64 "\n", DISP_NAME_LEN,
245 disp->name, (uint64_t)registers_get_ip(disabled_area));
246 assert(dispatcher_is_disabled_ip(handle,
247 registers_get_ip(disabled_area)));
250 if(!dcb->is_vm_guest) {
251 resume(disabled_area);
252 #if defined(__x86_64__) && !defined(__k1om__)
259 debug(SUBSYS_DISPATCH, "dispatch %.*s\n", DISP_NAME_LEN, disp->name);
260 assert(disp->dispatcher_run != 0);
263 if(!dcb->is_vm_guest) {
264 execute(disp->dispatcher_run);
265 #if defined(__x86_64__) && !defined(__k1om__)
267 vmkit_vmexec(dcb, (disp) ? disp->dispatcher_run : 0);
271 } // end function: dispatch
274 * \brief Transfer cap from 'send' to 'ep', according to 'msg'.
276 * Reads the cap transfer spec in the LMP message 'msg' and transfers
277 * the cap from CSpace in DCB 'send' accordingly.
279 * \param ep Endpoint capability of destination
280 * \param send Pointer to sending DCB.
281 * \param send_cptr Address of capability in sender's cspace
282 * \param send_bits Valid bits in #send_cptr
286 static errval_t lmp_transfer_cap(struct capability *ep, struct dcb *send,
287 capaddr_t send_cptr, uint8_t send_bits,
291 /* Parameter checking */
292 assert(send_cptr != CPTR_NULL);
293 assert(send != NULL);
295 assert(ep->type == ObjType_EndPoint);
296 struct dcb *recv = ep->u.endpoint.listener;
297 assert(recv != NULL);
298 assert(ep->u.endpoint.epoffset != 0);
300 /* Look up the slot receiver can receive caps in */
301 struct lmp_endpoint_kern *recv_ep
302 = (void *)((uint8_t *)recv->disp + ep->u.endpoint.epoffset);
305 struct capability *recv_cnode_cap;
306 err = caps_lookup_cap(&recv->cspace.cap, recv_ep->recv_cptr,
307 recv_ep->recv_bits, &recv_cnode_cap,
308 CAPRIGHTS_READ_WRITE);
309 if (err_is_fail(err)) {
310 return err_push(err, SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_LOOKUP);
312 // Check for cnode type
313 if (recv_cnode_cap->type != ObjType_CNode) {
314 return SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_INVALID;
316 // The slot within the cnode
317 struct cte *recv_cte;
318 recv_cte = caps_locate_slot(recv_cnode_cap->u.cnode.cnode,
321 /* Look up source slot in sender */
322 struct cte *send_cte;
323 err = caps_lookup_slot(&send->cspace.cap, send_cptr, send_bits, &send_cte,
325 if (err_is_fail(err)) {
326 return err_push(err, SYS_ERR_LMP_CAPTRANSFER_SRC_LOOKUP);
329 /* Is destination empty */
330 if (recv_cte->cap.type != ObjType_Null) {
331 return SYS_ERR_LMP_CAPTRANSFER_DST_SLOT_OCCUPIED;
334 //caps_trace(__func__, __LINE__, send_cte, "transferring");
335 //TRACE_CAP_MSG("transferring", send_cte);
337 /* Insert send cap into recv cap */
338 err = caps_copy_to_cte(recv_cte, send_cte, false, 0, 0);
339 assert(err_is_ok(err)); // Cannot fail after checking that slot is empty
342 err = caps_delete(send_cte);
343 if (err_is_fail(err)) {
344 printk(LOG_NOTE, "deleting source of lmp captransfer failed: %"PRIuERRV"\n", err);
346 assert(err_is_ok(err)); // A copy now exists in the recv slot, so this
354 * \brief Check if it would be possible to deliver LMP payload, but do not deliver it
356 * \param ep Endpoint capability to send to
357 * \param payload_len Length (in number of words) of payload
359 errval_t lmp_can_deliver_payload(struct capability *ep,
363 assert(ep->type == ObjType_EndPoint);
364 struct dcb *recv = ep->u.endpoint.listener;
365 assert(recv != NULL);
367 /* check that receiver exists and has specified an endpoint buffer */
368 if (recv->disp == 0 || ep->u.endpoint.epoffset == 0) {
369 return SYS_ERR_LMP_NO_TARGET;
372 /* locate receiver's endpoint buffer */
373 struct lmp_endpoint_kern *recv_ep
374 = (void *)((uint8_t *)recv->disp + ep->u.endpoint.epoffset);
376 /* check delivered/consumed state */
377 uint32_t epbuflen = ep->u.endpoint.epbuflen;
378 uint32_t pos = recv_ep->delivered;
379 uint32_t consumed = recv_ep->consumed;
380 if (pos >= epbuflen || consumed >= epbuflen) {
381 return SYS_ERR_LMP_EP_STATE_INVALID;
384 /* compute space available in endpoint */
386 if (pos >= consumed) {
387 epspace = epbuflen - (pos - consumed);
389 epspace = consumed - pos;
392 /* Check if there's enough space for another msg.
393 * We always keep one word free, to avoid having the special case where
394 * delivered == consumed may mean the buffer is both completely full and
395 * completely empty */
396 if (epspace <= payload_len + LMP_RECV_HEADER_LENGTH) {
397 return SYS_ERR_LMP_BUF_OVERFLOW;
404 * \brief Deliver the payload of an LMP message to a dispatcher.
406 * \param ep Endpoint capability to send to
407 * \param send DCB of the sender. Can be NULL for kernel-originated messages
408 * \param payload Message payload
409 * \param payload_len Length (in number of words) of payload
410 * \param captransfer True iff a cap has also been delivered
414 errval_t lmp_deliver_payload(struct capability *ep, struct dcb *send,
415 uintptr_t *payload, size_t payload_len,
419 assert(ep->type == ObjType_EndPoint);
420 struct dcb *recv = ep->u.endpoint.listener;
421 assert(recv != NULL);
422 assert(payload != NULL || payload_len == 0);
426 err = lmp_can_deliver_payload(ep, payload_len);
427 if (err_is_fail(err)) {
431 /* locate receiver's endpoint buffer */
432 struct lmp_endpoint_kern *recv_ep
433 = (void *)((uint8_t *)recv->disp + ep->u.endpoint.epoffset);
435 /* read current pos and buflen */
436 uint32_t epbuflen = ep->u.endpoint.epbuflen;
437 uint32_t pos = recv_ep->delivered;
439 struct dispatcher_shared_generic *send_disp =
440 send ? get_dispatcher_shared_generic(send->disp) : NULL;
441 struct dispatcher_shared_generic *recv_disp =
442 get_dispatcher_shared_generic(recv->disp);
443 debug(SUBSYS_DISPATCH, "LMP %.*s -> %.*s\n",
444 DISP_NAME_LEN, send ? send_disp->name : "kernel",
445 DISP_NAME_LEN, recv_disp->name);
447 // Setup receiver's message flags
448 union lmp_recv_header recvheader = { .raw = 0 };
449 recvheader.x.flags.captransfer = captransfer;
450 recvheader.x.length = payload_len;
453 recv_ep->buf[pos] = recvheader.raw;
454 if (++pos == epbuflen) {
458 /* Transfer the msg */
459 for(int i = 0; i < payload_len; i++) {
460 recv_ep->buf[pos] = payload[i];
461 if (++pos == epbuflen) {
466 // update the delivered pos
467 recv_ep->delivered = pos;
469 // tell the dispatcher that it has an outstanding message in one of its EPs
470 recv_disp->lmp_delivered += payload_len + LMP_RECV_HEADER_LENGTH;
472 // ... and give it a hint which one to look at
473 recv_disp->lmp_hint = ep->u.endpoint.epoffset;
475 // Make target runnable
482 * \brief Deliver an LMP message to a dispatcher.
484 * \param ep Endpoint capability to send to
485 * \param send DCB of the sender. Can be NULL for kernel-originated messages
486 * \param payload Buffer containing message payload
487 * \param len Length of message payload, as number of words
488 * \param send_cptr Capability to be transferred with LMP
489 * \param send_bits Valid bits in #send_cptr
491 errval_t lmp_deliver(struct capability *ep, struct dcb *send,
492 uintptr_t *payload, size_t len,
493 capaddr_t send_cptr, uint8_t send_bits, bool give_away)
497 assert(ep->type == ObjType_EndPoint);
498 struct dcb *recv = ep->u.endpoint.listener;
499 assert(recv != NULL);
500 assert(payload != NULL);
504 /* Is the sender trying to send a cap? */
505 if (send_cptr != CPTR_NULL) {
506 /* Don't attempt to transfer the cap if we can't send the payload */
507 err = lmp_can_deliver_payload(ep, len);
508 if (err_is_fail(err)) {
512 err = lmp_transfer_cap(ep, send, send_cptr, send_bits, give_away);
513 if (err_is_fail(err)) {
523 err = lmp_deliver_payload(ep, send, payload, len, captransfer);
524 // shouldn't fail, if we delivered the cap successfully
525 assert(!(captransfer && err_is_fail(err)));