3 * \brief Process management service.
7 * Copyright (c) 2017, ETH Zurich.
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
15 #include <barrelfish/barrelfish.h>
16 #include <barrelfish/nameservice_client.h>
17 #include <barrelfish/proc_mgmt_client.h>
18 #include <barrelfish/spawn_client.h>
19 #include <if/monitor_defs.h>
20 #include <if/proc_mgmt_defs.h>
21 #include <if/spawn_defs.h>
25 #include "pending_clients.h"
26 #include "spawnd_state.h"
28 static void add_spawnd_handler(struct proc_mgmt_binding *b, coreid_t core_id,
31 if (spawnd_state_exists(core_id)) {
32 DEBUG_ERR(PROC_MGMT_ERR_SPAWND_EXISTS, "spawnd_state_exists");
36 // Bind with the spawnd.
37 struct spawn_binding *spawnb;
38 errval_t err = spawn_bind_iref(iref, &spawnb);
39 if (err_is_fail(err)) {
40 DEBUG_ERR(err, "spawn_bind_iref");
44 err = spawnd_state_alloc(core_id, spawnb);
45 if (err_is_fail(err)) {
46 DEBUG_ERR(err, "spawnd_state_alloc");
49 debug_printf("Process manager bound with spawnd.%u on iref %u\n", core_id,
53 static void add_spawnd_handler_non_monitor(struct proc_mgmt_binding *b,
54 coreid_t core_id, iref_t iref)
56 // debug_printf("Ignoring add_spawnd call: %s\n",
57 // err_getstring(PROC_MGMT_ERR_NOT_MONITOR));
60 static void spawn_reply_handler(struct spawn_binding *b,
61 struct capref domain_cap, errval_t spawn_err)
63 struct pending_client *cl;
64 errval_t err = pending_clients_release(domain_cap, &cl);
65 if (err_is_fail(err)) {
66 // This might be a kill request issued after a successful spawn/span
67 // followed by a local error in the process manager (see below). If that
68 // is the case, then we won't have a client, as it has already been
70 DEBUG_ERR(err, "failed to retrieve pending client based on domain cap "
71 "returned by spawnd");
75 errval_t resp_err = SYS_ERR_OK;
76 struct domain_entry *entry;
78 case ClientType_Spawn:
80 if (err_is_ok(spawn_err)) {
81 err = domain_spawn(domain_cap, cl->core_id);
83 resp_err = cl->b->tx_vtbl.spawn_response(cl->b, NOP_CONT, err,
87 case ClientType_SpawnWithCaps:
89 if (err_is_ok(spawn_err)) {
90 err = domain_spawn(domain_cap, cl->core_id);
92 resp_err = cl->b->tx_vtbl.spawn_with_caps_response(cl->b, NOP_CONT,
98 if (err_is_ok(spawn_err)) {
99 err = domain_span(domain_cap, cl->core_id);
101 resp_err = cl->b->tx_vtbl.span_response(cl->b, NOP_CONT, err);
104 case ClientType_Kill:
105 if (err_is_fail(spawn_err)) {
106 // Looks like some spawnd was unable to successfully kill
107 // its dispatcher for this domain. Not much the process
108 // manager can do about it; return the error to the client.
109 resp_err = cl->b->tx_vtbl.kill_response(cl->b, NOP_CONT,
114 err = domain_get_by_cap(domain_cap, &entry);
115 if (err_is_fail(err)) {
116 DEBUG_ERR(err, "failed to retrieve domain by domain_cap "
117 "returned by spawnd after kill");
121 assert(entry->num_spawnds_resources > 0 ||
122 entry->num_spawnds_running > 0);
123 assert(entry->status != DOMAIN_STATUS_CLEANED);
125 if (entry->num_spawnds_running > 0) {
126 --entry->num_spawnds_running;
128 err = pending_clients_add(domain_cap, cl->b,
129 ClientType_Kill, MAX_COREID);
130 if (err_is_fail(err)) {
131 DEBUG_ERR(err, "pending_clients_add in reply handler");
134 if (entry->num_spawnds_running == 0) {
135 entry->status = DOMAIN_STATUS_STOPPED;
136 entry->exit_status = EXIT_STATUS_KILLED;
138 // TODO(razvan): Might it be more sane if we respond back
139 // to the client after the domain has been cleaned up (i.e.
140 // the cspace root has been revoked for all dispatchers)?
141 resp_err = cl->b->tx_vtbl.kill_response(cl->b, NOP_CONT,
144 // TODO(razvan): Same problem applies to the waiters: would
145 // it be better if we sent them wait_responses after the
146 // cspace root has been revoked, too? (here and in the exit
148 struct domain_waiter *waiter = entry->waiters;
149 while (waiter != NULL) {
150 waiter->b->tx_vtbl.wait_response(waiter->b, NOP_CONT,
153 struct domain_waiter *aux = waiter;
154 waiter = waiter->next;
158 for (coreid_t i = 0; i < MAX_COREID; ++i) {
159 if (entry->spawnds[i] == NULL) {
163 struct spawn_binding *spb = entry->spawnds[i]->b;
164 spb->rx_vtbl.spawn_reply = spawn_reply_handler;
165 errval_t req_err = spb->tx_vtbl.cleanup_request(spb,
166 NOP_CONT, cap_procmng, domain_cap);
167 if (err_is_fail(req_err)) {
168 DEBUG_ERR(req_err, "failed to send cleanup_request "
169 "to spawnd %u\n", i);
174 --entry->num_spawnds_resources;
176 if (entry->num_spawnds_resources == 0) {
177 entry->status = DOMAIN_STATUS_CLEANED;
179 // At this point, the domain exists in state CLEANED for
180 // history reasons. For instance, if some other domain
181 // issues a wait call for this one, the process manager can
182 // return the exit status directly.
183 // At some point, however, we might want to just clean up
184 // the domain entry and recycle the domain cap.
186 // Expecting to receive further cleanup replies from other
187 // spawnds for the same domain cap, hence re-add the
189 err = pending_clients_add(domain_cap, cl->b,
190 ClientType_Exit, MAX_COREID);
191 if (err_is_fail(err)) {
192 DEBUG_ERR(err, "pending_clients_add in reply handler");
198 case ClientType_Exit:
199 if (err_is_fail(spawn_err)) {
200 // Looks like some spawnd was unable to successfully kill
201 // its dispatcher for this domain. Not much the process
202 // manager can do about it. Furthermore, this was an exit call,
203 // so there's no client to reply back to.
207 err = domain_get_by_cap(domain_cap, &entry);
208 if (err_is_fail(err)) {
209 DEBUG_ERR(err, "failed to retrieve domain by domain_cap "
210 "returned by spawnd after kill");
214 assert(entry->num_spawnds_resources > 0 ||
215 entry->num_spawnds_running > 0);
216 assert(entry->status != DOMAIN_STATUS_CLEANED);
218 if (entry->num_spawnds_running > 0) {
219 --entry->num_spawnds_running;
221 err = pending_clients_add(domain_cap, cl->b,
222 ClientType_Exit, MAX_COREID);
223 if (err_is_fail(err)) {
224 DEBUG_ERR(err, "pending_clients_add in reply handler");
227 if (entry->num_spawnds_running == 0) {
228 entry->status = DOMAIN_STATUS_STOPPED;
230 struct domain_waiter *waiter = entry->waiters;
231 while (waiter != NULL) {
232 waiter->b->tx_vtbl.wait_response(waiter->b, NOP_CONT,
235 struct domain_waiter *aux = waiter;
236 waiter = waiter->next;
240 for (coreid_t i = 0; i < MAX_COREID; ++i) {
241 if (entry->spawnds[i] == NULL) {
245 struct spawn_binding *spb = entry->spawnds[i]->b;
246 spb->rx_vtbl.spawn_reply = spawn_reply_handler;
247 errval_t req_err = spb->tx_vtbl.cleanup_request(spb,
248 NOP_CONT, cap_procmng, domain_cap);
249 if (err_is_fail(req_err)) {
250 DEBUG_ERR(req_err, "failed to send cleanup_request "
251 "to spawnd %u\n", i);
256 --entry->num_spawnds_resources;
258 if (entry->num_spawnds_resources == 0) {
259 entry->status = DOMAIN_STATUS_CLEANED;
261 // At this point, the domain exists in state CLEANED for
262 // history reasons. For instance, if some other domain
263 // issues a wait call for this one, the process manager can
264 // return the exit status directly.
265 // At some point, however, we might want to just clean up
266 // the domain entry and recycle the domain cap.
268 // Expecting to receive further cleanup replies from other
269 // spawnds for the same domain cap, hence re-add the
271 err = pending_clients_add(domain_cap, cl->b,
272 ClientType_Exit, MAX_COREID);
273 if (err_is_fail(err)) {
274 DEBUG_ERR(err, "pending_clients_add in reply handler");
281 // TODO(razvan): Handle the other cases, e.g. wait.
282 debug_printf("Unknown client type %u\n", cl->type);
286 if (err_is_ok(spawn_err) && err_is_fail(err)) {
287 // Spawnd has successfully completed its end of the operation, but
288 // there's been an error in the process manager's book-keeping
289 // of domains. Therefore, if the request was a spawn or span one, spawnd
290 // needs to be asked to stop the dispatcher which it has just enqueued.
291 if (cl->type == ClientType_Spawn ||
292 cl->type == ClientType_SpawnWithCaps ||
293 cl->type == ClientType_Span) {
294 struct spawnd_state *state = spawnd_state_get(cl->core_id);
295 assert(state != NULL);
296 struct spawn_binding *spb = state->b;
299 err = spb->tx_vtbl.kill_request(spb, NOP_CONT, cap_procmng,
301 if (err_is_fail(err)) {
302 // XXX: How severe is this? Maybe we want something more
303 // assertive than logging an error message.
304 DEBUG_ERR(err, "failed to send kill request for dangling "
307 pending_clients_add(domain_cap, cl->b, ClientType_Kill,
315 if (err_is_fail(resp_err)) {
316 DEBUG_ERR(resp_err, "failed to send response to client");
320 static errval_t spawn_handler_common(struct proc_mgmt_binding *b,
321 enum ClientType type,
322 coreid_t core_id, const char *path,
323 const char *argvbuf, size_t argvbytes,
324 const char *envbuf, size_t envbytes,
325 struct capref inheritcn_cap,
326 struct capref argcn_cap, uint8_t flags,
327 struct capref *ret_domain_cap)
329 assert(ret_domain_cap != NULL);
331 if (!spawnd_state_exists(core_id)) {
332 return PROC_MGMT_ERR_INVALID_SPAWND;
335 struct spawnd_state *state = spawnd_state_get(core_id);
336 assert(state != NULL);
337 struct spawn_binding *cl = state->b;
340 struct capref domain_cap;
341 errval_t err = slot_alloc(&domain_cap);
342 if (err_is_fail(err)) {
343 DEBUG_ERR(err, "slot_alloc domain_cap");
344 return err_push(err, PROC_MGMT_ERR_CREATE_DOMAIN_CAP);
346 err = cap_retype(domain_cap, cap_procmng, 0, ObjType_Domain, 0, 1);
347 if (err_is_fail(err)) {
348 DEBUG_ERR(err, "cap_retype domain_cap");
349 return err_push(err, PROC_MGMT_ERR_CREATE_DOMAIN_CAP);
352 err = pending_clients_add(domain_cap, b, type, core_id);
353 if (err_is_fail(err)) {
354 DEBUG_ERR(err, "pending_clients_add");
358 cl->rx_vtbl.spawn_reply = spawn_reply_handler;
359 if (capref_is_null(inheritcn_cap) && capref_is_null(argcn_cap)) {
360 err = cl->tx_vtbl.spawn_request(cl, NOP_CONT, cap_procmng, domain_cap,
361 path, argvbuf, argvbytes, envbuf,
364 err = cl->tx_vtbl.spawn_with_caps_request(cl, NOP_CONT, cap_procmng,
365 domain_cap, path, argvbuf,
366 argvbytes, envbuf, envbytes,
367 inheritcn_cap, argcn_cap,
370 if (err_is_fail(err)) {
371 DEBUG_ERR(err, "sending spawn request");
372 pending_clients_release(domain_cap, NULL);
373 return err_push(err, PROC_MGMT_ERR_SPAWND_REQUEST);
379 static void spawn_handler(struct proc_mgmt_binding *b, coreid_t core_id,
380 const char *path, const char *argvbuf,
381 size_t argvbytes, const char *envbuf, size_t envbytes,
384 errval_t err, resp_err;
385 struct capref domain_cap;
386 err = spawn_handler_common(b, ClientType_Spawn, core_id, path, argvbuf,
387 argvbytes, envbuf, envbytes, NULL_CAP, NULL_CAP,
389 if (err_is_ok(err)) {
390 // Will respond to client when we get the reply from spawnd.
394 resp_err = b->tx_vtbl.spawn_response(b, NOP_CONT, err, NULL_CAP);
395 if (err_is_fail(resp_err)) {
396 DEBUG_ERR(resp_err, "failed to send spawn_response");
400 static void spawn_with_caps_handler(struct proc_mgmt_binding *b,
401 coreid_t core_id, const char *path,
402 const char *argvbuf, size_t argvbytes,
403 const char *envbuf, size_t envbytes,
404 struct capref inheritcn_cap,
405 struct capref argcn_cap, uint8_t flags)
407 errval_t err, resp_err;
408 struct capref domain_cap;
409 err = spawn_handler_common(b, ClientType_SpawnWithCaps, core_id, path,
410 argvbuf, argvbytes, envbuf, envbytes,
411 inheritcn_cap, argcn_cap, flags, &domain_cap);
412 if (err_is_ok(err)) {
413 // Will respond to client when we get the reply from spawnd.
417 resp_err = b->tx_vtbl.spawn_with_caps_response(b, NOP_CONT, err,
419 if (err_is_fail(resp_err)) {
420 DEBUG_ERR(resp_err, "failed to send spawn_with_caps_response");
424 static void span_handler(struct proc_mgmt_binding *b, struct capref domain_cap,
425 coreid_t core_id, struct capref vroot,
426 struct capref dispframe)
428 errval_t err, resp_err;
429 err = domain_can_span(domain_cap, core_id);
430 if (err_is_fail(err)) {
431 goto respond_with_err;
434 if (!spawnd_state_exists(core_id)) {
435 err = PROC_MGMT_ERR_INVALID_SPAWND;
436 goto respond_with_err;
439 struct spawnd_state *state = spawnd_state_get(core_id);
440 assert(state != NULL);
441 struct spawn_binding *cl = state->b;
444 err = pending_clients_add(domain_cap, b, ClientType_Span, core_id);
445 if (err_is_fail(err)) {
446 goto respond_with_err;
449 cl->rx_vtbl.spawn_reply = spawn_reply_handler;
450 err = cl->tx_vtbl.span_request(cl, NOP_CONT, cap_procmng, domain_cap, vroot,
452 if (err_is_ok(err)) {
453 // Will respond to client when we get the reply from spawnd.
456 DEBUG_ERR(err, "sending span request");
457 pending_clients_release(domain_cap, NULL);
458 err = err_push(err, PROC_MGMT_ERR_SPAWND_REQUEST);
462 resp_err = b->tx_vtbl.span_response(b, NOP_CONT, err);
463 if (err_is_fail(resp_err)) {
464 DEBUG_ERR(resp_err, "failed to send span_response");
468 static errval_t kill_handler_common(struct proc_mgmt_binding *b,
469 struct capref domain_cap,
470 enum ClientType type,
473 errval_t err = pending_clients_add(domain_cap, b, type, MAX_COREID);
474 if (err_is_fail(err)) {
478 struct domain_entry *entry;
479 err = domain_get_by_cap(domain_cap, &entry);
480 if (err_is_fail(err)) {
484 entry->exit_status = exit_status;
485 domain_stop_pending(entry);
487 for (coreid_t i = 0; i < MAX_COREID; ++i) {
488 if (entry->spawnds[i] == NULL) {
492 struct spawn_binding *spb = entry->spawnds[i]->b;
493 spb->rx_vtbl.spawn_reply = spawn_reply_handler;
494 errval_t req_err = spb->tx_vtbl.kill_request(spb, NOP_CONT, cap_procmng,
496 if (err_is_fail(req_err)) {
497 DEBUG_ERR(req_err, "failed to send kill_request to spawnd %u\n", i);
504 static void kill_handler(struct proc_mgmt_binding *b, struct capref domain_cap)
506 errval_t err = kill_handler_common(b, domain_cap, ClientType_Kill,
508 if (err_is_fail(err)) {
509 errval_t resp_err = b->tx_vtbl.kill_response(b, NOP_CONT, err);
510 if (err_is_fail(resp_err)) {
511 DEBUG_ERR(resp_err, "failed to send kill_response");
516 static void exit_handler(struct proc_mgmt_binding *b, struct capref domain_cap,
519 errval_t err = kill_handler_common(b, domain_cap, ClientType_Exit,
521 if (err_is_fail(err)) {
522 DEBUG_ERR(err, "processing exit_handler for requesting domain, exit "
523 "code %u", exit_status);
525 // Error or not, there's no client to reply to anymore.
528 static void wait_handler(struct proc_mgmt_binding *b, struct capref domain_cap)
530 errval_t err, resp_err;
531 struct domain_entry *entry;
532 err = domain_get_by_cap(domain_cap, &entry);
533 if (err_is_fail(err)) {
537 if (entry->status == DOMAIN_STATUS_STOPPED) {
538 // Domain has already been stopped, so just reply with exit status.
542 struct domain_waiter *waiter = (struct domain_waiter*) malloc(
543 sizeof(struct domain_waiter));
545 waiter->next = entry->waiters;
546 entry->waiters = waiter;
547 // Will respond when domain is stopped.
551 resp_err = b->tx_vtbl.wait_response(b, NOP_CONT, err, entry->exit_status);
552 if (err_is_fail(resp_err)) {
553 DEBUG_ERR(resp_err, "failed to send wait_response");
557 static struct proc_mgmt_rx_vtbl monitor_vtbl = {
558 .add_spawnd = add_spawnd_handler,
559 .spawn_call = spawn_handler,
560 .spawn_with_caps_call = spawn_with_caps_handler,
561 .span_call = span_handler,
562 .kill_call = kill_handler,
563 .exit = exit_handler,
564 .wait_call = wait_handler
567 static struct proc_mgmt_rx_vtbl non_monitor_vtbl = {
568 .add_spawnd = add_spawnd_handler_non_monitor,
569 .spawn_call = spawn_handler,
570 .spawn_with_caps_call = spawn_with_caps_handler,
571 .span_call = span_handler,
572 .kill_call = kill_handler,
573 .exit = exit_handler,
574 .wait_call = wait_handler
577 static errval_t alloc_ep_for_monitor(struct capref *ep)
579 struct proc_mgmt_lmp_binding *lmpb =
580 malloc(sizeof(struct proc_mgmt_lmp_binding));
581 assert(lmpb != NULL);
583 // setup our end of the binding
584 errval_t err = proc_mgmt_client_lmp_accept(lmpb, get_default_waitset(),
585 DEFAULT_LMP_BUF_WORDS);
586 if (err_is_fail(err)) {
588 return err_push(err, LIB_ERR_PROC_MGMT_CLIENT_ACCEPT);
591 *ep = lmpb->chan.local_cap;
592 lmpb->b.rx_vtbl = monitor_vtbl;
597 static void export_cb(void *st, errval_t err, iref_t iref)
599 if (err_is_fail(err)) {
600 USER_PANIC_ERR(err, "export failed");
603 // Allocate an endpoint for the local monitor, who will use it to inform
604 // us about new spawnd irefs on behalf of other monitors.
606 err = alloc_ep_for_monitor(&ep);
607 if (err_is_fail(err)) {
608 USER_PANIC_ERR(err, "failed to allocate LMP EP for local monitor");
611 // Send the endpoint to the monitor, so it can finish the handshake.
612 struct monitor_binding *mb = get_monitor_binding();
613 err = mb->tx_vtbl.set_proc_mgmt_ep_request(mb, NOP_CONT, ep);
614 if (err_is_fail(err)) {
615 USER_PANIC_ERR(err, "failed to send set_proc_mgmt_ep_request to "
619 // Also register this iref with the name service, for arbitrary client
620 // domains to use for spawn-related ops.
621 err = nameservice_register(SERVICE_BASENAME, iref);
622 if (err_is_fail(err)) {
623 USER_PANIC_ERR(err, "nameservice_register failed");
627 static errval_t connect_cb(void *st, struct proc_mgmt_binding *b)
629 b->rx_vtbl = non_monitor_vtbl;
633 errval_t start_service(void)
635 return proc_mgmt_export(NULL, export_cb, connect_cb, get_default_waitset(),
636 IDC_EXPORT_FLAGS_DEFAULT);