Add extra layer of queuing above the Flounder UMP one.
[barrelfish] / usr / proc_mgmt / service.c
1 /**
2  * \file
3  * \brief Process management service.
4  */
5
6 /*
7  * Copyright (c) 2017, ETH Zurich.
8  * All rights reserved.
9  *
10  * This file is distributed under the terms in the attached LICENSE file.
11  * If you do not find this file, copies can be found by writing to:
12  * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13  */
14
15 #include <barrelfish/barrelfish.h>
16 #include <barrelfish/nameservice_client.h>
17 #include <barrelfish/proc_mgmt_client.h>
18 #include <barrelfish/spawn_client.h>
19 #include <if/monitor_defs.h>
20 #include <if/proc_mgmt_defs.h>
21 #include <if/spawn_defs.h>
22
23 #include "domain.h"
24 #include "internal.h"
25 #include "pending_clients.h"
26 #include "spawnd_state.h"
27
28 static void add_spawnd_handler(struct proc_mgmt_binding *b, coreid_t core_id,
29                                iref_t iref)
30 {
31     if (spawnd_state_exists(core_id)) {
32         DEBUG_ERR(PROC_MGMT_ERR_SPAWND_EXISTS, "spawnd_state_exists");
33         return;
34     }
35
36     // Bind with the spawnd.
37     struct spawn_binding *spawnb;
38     errval_t err = spawn_bind_iref(iref, &spawnb);
39     if (err_is_fail(err)) {
40         DEBUG_ERR(err, "spawn_bind_iref");
41         return;
42     }
43
44     err = spawnd_state_alloc(core_id, spawnb);
45     if (err_is_fail(err)) {
46         DEBUG_ERR(err, "spawnd_state_alloc");
47     }
48
49     debug_printf("Process manager bound with spawnd.%u on iref %u\n", core_id,
50             iref);
51 }
52
53 static void add_spawnd_handler_non_monitor(struct proc_mgmt_binding *b,
54                                            coreid_t core_id, iref_t iref)
55 {
56     // debug_printf("Ignoring add_spawnd call: %s\n",
57     //              err_getstring(PROC_MGMT_ERR_NOT_MONITOR));
58 }
59
60 static void spawn_reply_handler(struct spawn_binding *b,
61                                 struct capref domain_cap, errval_t spawn_err);
62 static void spawn_with_caps_reply_handler(struct spawn_binding *b,
63                                           struct capref domain_cap,
64                                           errval_t spawn_err);
65 static void span_reply_handler(struct spawn_binding *b,
66                                struct capref domain_cap, errval_t span_err);
67 static void kill_reply_handler(struct spawn_binding *b,
68                                struct capref domain_cap, errval_t kill_err);
69 static void exit_reply_handler(struct spawn_binding *b,
70                                struct capref domain_cap, errval_t exit_err);
71 static void cleanup_reply_handler(struct spawn_binding *b,
72                                   struct capref domain_cap,
73                                   errval_t cleanup_err);
74
75 static bool spawn_request_sender(struct msg_queue_elem *m)
76 {
77     struct pending_spawn *spawn = (struct pending_spawn*) m->st;
78
79     errval_t err;
80     bool with_caps = !(capref_is_null(spawn->inheritcn_cap) &&
81                        capref_is_null(spawn->argcn_cap));
82     if (with_caps) {
83         spawn->b->rx_vtbl.spawn_with_caps_reply = spawn_with_caps_reply_handler;
84         err = spawn->b->tx_vtbl.spawn_with_caps_request(spawn->b, NOP_CONT,
85                                                         cap_procmng,
86                                                         spawn->domain_cap,
87                                                         spawn->path,
88                                                         spawn->argvbuf,
89                                                         spawn->argvbytes,
90                                                         spawn->envbuf,
91                                                         spawn->envbytes,
92                                                         spawn->inheritcn_cap,
93                                                         spawn->argcn_cap,
94                                                         spawn->flags);
95     } else {
96         spawn->b->rx_vtbl.spawn_reply = spawn_reply_handler;
97         err = spawn->b->tx_vtbl.spawn_request(spawn->b, NOP_CONT, cap_procmng,
98                                               spawn->domain_cap, spawn->path,
99                                               spawn->argvbuf, spawn->argvbytes,
100                                               spawn->envbuf, spawn->envbytes,
101                                               spawn->flags);
102     }
103
104     if (err_is_fail(err)) {
105         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
106             return false;
107         } else {
108             USER_PANIC_ERR(err, "sending spawn request");
109         }
110     }
111
112     free(spawn);
113     free(m);
114
115     return true;
116 }
117
118 static bool span_request_sender(struct msg_queue_elem *m)
119 {
120     struct pending_span *span = (struct pending_span*) m->st;
121
122     errval_t err;
123     span->b->rx_vtbl.span_reply = span_reply_handler;
124     err = span->b->tx_vtbl.span_request(span->b, NOP_CONT, cap_procmng,
125                                         span->domain_cap, span->vroot,
126                                         span->dispframe);
127
128     if (err_is_fail(err)) {
129         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
130             return false;
131         } else {
132             USER_PANIC_ERR(err, "sending span request");
133         }
134     }
135
136     free(span);
137     free(m);
138
139     return true;
140 }
141
142 static bool kill_request_sender(struct msg_queue_elem *m)
143 {
144     struct pending_kill_exit_cleanup *kill = (struct pending_kill_exit_cleanup*) m->st;
145
146     errval_t err;
147     kill->sb->rx_vtbl.kill_reply = kill_reply_handler;
148     err = kill->sb->tx_vtbl.kill_request(kill->sb, NOP_CONT, cap_procmng,
149                                         kill->domain_cap);
150
151     if (err_is_fail(err)) {
152         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
153             return false;
154         } else {
155             USER_PANIC_ERR(err, "sending kill request");
156         }
157     }
158
159     free(kill);
160     free(m);
161
162     return true;
163 }
164
165 static bool exit_request_sender(struct msg_queue_elem *m)
166 {
167     struct pending_kill_exit_cleanup *exit = (struct pending_kill_exit_cleanup*) m->st;
168
169     errval_t err;
170     exit->sb->rx_vtbl.exit_reply = exit_reply_handler;
171     err = exit->sb->tx_vtbl.exit_request(exit->sb, NOP_CONT, cap_procmng,
172                                         exit->domain_cap);
173
174     if (err_is_fail(err)) {
175         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
176             return false;
177         } else {
178             USER_PANIC_ERR(err, "sending exit request");
179         }
180     }
181
182     free(exit);
183     free(m);
184
185     return true;
186 }
187
188 static bool cleanup_request_sender(struct msg_queue_elem *m)
189 {
190     struct pending_kill_exit_cleanup *cleanup = (struct pending_kill_exit_cleanup*) m->st;
191
192     errval_t err;
193     cleanup->sb->rx_vtbl.cleanup_reply = cleanup_reply_handler;
194     err = cleanup->sb->tx_vtbl.cleanup_request(cleanup->sb, NOP_CONT, cap_procmng,
195                                               cleanup->domain_cap);
196
197     if (err_is_fail(err)) {
198         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
199             return false;
200         } else {
201             USER_PANIC_ERR(err, "sending cleanup request");
202         }
203     }
204
205     free(cleanup);
206     free(m);
207
208     return true;
209 }
210
211 static void spawn_reply_handler(struct spawn_binding *b,
212                                 struct capref domain_cap, errval_t spawn_err)
213 {
214     struct pending_client *cl;
215     errval_t err = pending_clients_release(domain_cap, ClientType_Spawn, &cl);
216     if (err_is_fail(err)) {
217         DEBUG_ERR(err, "failed to retrieve pending spawn client based on domain"
218                   " cap");
219         return;
220     }
221
222     err = spawn_err;
223     if (err_is_ok(spawn_err)) {
224         err = domain_spawn(domain_cap, cl->core_id);
225     }
226
227     errval_t resp_err = cl->b->tx_vtbl.spawn_response(cl->b, NOP_CONT, err,
228                                                       domain_cap);
229     if (err_is_fail(resp_err)) {
230         DEBUG_ERR(resp_err, "failed to send spawn_response to client");
231     }
232     
233     free(cl);
234 }
235
236 static void spawn_with_caps_reply_handler(struct spawn_binding *b,
237                                           struct capref domain_cap,
238                                           errval_t spawn_err)
239 {
240     struct pending_client *cl;
241     errval_t err = pending_clients_release(domain_cap, ClientType_SpawnWithCaps,
242                                            &cl);
243     if (err_is_fail(err)) {
244         DEBUG_ERR(err, "failed to retrieve pending spawn_with_caps client based"
245                   " on domain cap");
246         return;
247     }
248
249     err = spawn_err;
250     if (err_is_ok(spawn_err)) {
251         err = domain_spawn(domain_cap, cl->core_id);
252     }
253
254     errval_t resp_err = cl->b->tx_vtbl.spawn_with_caps_response(cl->b, NOP_CONT,
255                                                                 err,
256                                                                 domain_cap);
257     if (err_is_fail(resp_err)) {
258         DEBUG_ERR(resp_err, "failed to send spawn_with_caps_response to "
259                   "client");
260     }
261     
262     free(cl);
263 }
264
265 static void span_reply_handler(struct spawn_binding *b,
266                                struct capref domain_cap, errval_t span_err)
267 {
268     struct pending_client *cl;
269     errval_t err = pending_clients_release(domain_cap, ClientType_Span, &cl);
270     if (err_is_fail(err)) {
271         DEBUG_ERR(err, "failed to retrieve pending span client based on domain"
272                   " cap");
273         return;
274     }
275
276     struct domain_entry *entry;
277     err = domain_get_by_cap(cl->domain_cap, &entry);
278     if (err_is_fail(err)) {
279         DEBUG_ERR(err, "failed to retrieve span client by domain cap");
280         return;
281     }
282
283     if (entry->status != DOMAIN_STATUS_RUNNING) {
284         // Domain has been stopped while we were serving the request; there's
285         // no one to respond to.
286         free(cl);
287         return;
288     }
289
290     err = cl->b->tx_vtbl.span_response(cl->b, NOP_CONT, span_err);
291     if (err_is_fail(err)) {
292         DEBUG_ERR(err, "failed to send span_response to client");
293     }
294     
295     free(cl);
296 }
297
298 static void cleanup_reply_handler(struct spawn_binding *b,
299                                   struct capref domain_cap,
300                                   errval_t cleanup_err)
301 {
302     struct pending_client *cl;
303     errval_t err = pending_clients_release(domain_cap, ClientType_Cleanup, &cl);
304     if (err_is_fail(err)) {
305         DEBUG_ERR(err, "failed to retrieve pending cleanup client based on "
306                   "domain cap");
307         return;
308     }
309
310     if (err_is_fail(cleanup_err)) {
311         // TODO(razvan): Here, spawnd has failed deleting its local cspace.
312         // Should we send another cleanup message, until it might succeed?
313         free(cl);
314         return;
315     }
316
317     struct domain_entry *entry;
318     err = domain_get_by_cap(domain_cap, &entry);
319     if (err_is_fail(err)) {
320         DEBUG_ERR(err, "failed to retrieve domain by cap returned by spawnd "
321                   "after cleanup");
322         return;
323     }
324
325     assert(entry->num_spawnds_resources > 0);
326     assert(entry->status != DOMAIN_STATUS_CLEANED);
327
328     --entry->num_spawnds_resources;
329
330     if (entry->num_spawnds_resources == 0) {
331         entry->status = DOMAIN_STATUS_CLEANED;
332
333         // At this point, the domain exists in state CLEANED for
334         // history reasons. For instance, if some other domain
335         // issues a wait call for this one, the process manager can
336         // return the exit status directly.
337         // At some point, however, we might want to just clean up
338         // the domain entry and recycle the domain cap.
339     } else {
340         // Expecting to receive further cleanup replies from other
341         // spawnds for the same domain cap, hence re-add the
342         // pending client.
343         err = pending_clients_add(domain_cap, cl->b,
344                                   ClientType_Cleanup, MAX_COREID);
345         if (err_is_fail(err)) {
346             DEBUG_ERR(err, "pending_clients_add in cleanup_reply_handler");
347         }
348     }
349 }
350
351 static void kill_reply_handler(struct spawn_binding *b,
352                                struct capref domain_cap, errval_t kill_err)
353 {
354     struct pending_client *cl;
355     errval_t err = pending_clients_release(domain_cap, ClientType_Kill, &cl);
356     if (err_is_fail(err)) {
357         DEBUG_ERR(err, "failed to retrieve pending kill client based on domain "
358                   "cap");
359         return;
360     }
361
362     errval_t resp_err;
363     if (err_is_fail(kill_err)) {
364         // TODO(razvan): Here, spawnd has failed deleting its local dispatcher.
365         // Should we send another kill message, until it might succeed?
366         while (cl != NULL) {
367             resp_err = cl->b->tx_vtbl.kill_response(cl->b, NOP_CONT,
368                                                      kill_err);
369             if (err_is_fail(resp_err)) {
370                 DEBUG_ERR(resp_err, "failed to send kill_response to client");
371             }
372             struct pending_client *tmp = cl;
373             cl = cl->next;
374             free(tmp);
375         }
376         return;
377     }
378
379     struct domain_entry *entry;
380     err = domain_get_by_cap(domain_cap, &entry);
381     if (err_is_fail(err)) {
382         DEBUG_ERR(err, "failed to retrieve domain by cap returned by spawnd "
383                   "after kill");
384         return;
385     }
386
387     assert(entry->num_spawnds_running > 0);
388     assert(entry->status != DOMAIN_STATUS_STOPPED);
389
390     --entry->num_spawnds_running;
391
392     if (entry->num_spawnds_running == 0) {
393         entry->status = DOMAIN_STATUS_STOPPED;
394         entry->exit_status = EXIT_STATUS_KILLED;
395
396         err = pending_clients_add(domain_cap, NULL, ClientType_Cleanup,
397                                   MAX_COREID);
398         if (err_is_fail(err)) {
399             DEBUG_ERR(err, "pending_clients_add in kill_reply_handler");
400         }
401
402         // TODO(razvan): Might it be more sane if we respond back
403         // to the client after the domain has been cleaned up (i.e.
404         // the cspace root has been revoked for all dispatchers)?
405         while (cl != NULL) {
406             resp_err = cl->b->tx_vtbl.kill_response(cl->b, NOP_CONT,
407                                                      kill_err);
408             if (err_is_fail(resp_err)) {
409                 DEBUG_ERR(resp_err, "failed to send kill_response to client");
410             }
411             struct pending_client *tmp = cl;
412             cl = cl->next;
413             free(tmp);
414         }
415         
416         // TODO(razvan): Same problem applies to the waiters: would
417         // it be better if we sent them wait_responses after the
418         // cspace root has been revoked, too? (here and in the exit
419         // case).
420         struct domain_waiter *waiter = entry->waiters;
421         while (waiter != NULL) {
422             waiter->b->tx_vtbl.wait_response(waiter->b, NOP_CONT,
423                                              SYS_ERR_OK,
424                                              entry->exit_status);
425             struct domain_waiter *tmp = waiter;
426             waiter = waiter->next;
427             free(tmp);
428         }
429
430         for (coreid_t i = 0; i < MAX_COREID; ++i) {
431             if (entry->spawnds[i] == NULL) {
432                 continue;
433             }
434
435             struct spawn_binding *spb = entry->spawnds[i]->b;
436
437             struct pending_kill_exit_cleanup *cleanup = (struct pending_kill_exit_cleanup*) malloc(
438                     sizeof(struct pending_kill_exit_cleanup));
439             cleanup->sb = spb;
440             cleanup->domain_cap = domain_cap;
441
442             struct msg_queue_elem *msg = (struct msg_queue_elem*) malloc(
443                     sizeof(struct msg_queue_elem));
444             msg->st = cleanup;
445             msg->cont = cleanup_request_sender;
446
447             err = spawnd_state_enqueue_send(entry->spawnds[i], msg);
448
449             if (err_is_fail(err)) {
450                 DEBUG_ERR(err, "enqueuing cleanup request");
451                 free(cleanup);
452                 free(msg);
453             }
454         }
455     } else {
456         err = pending_clients_add(domain_cap, cl->b, ClientType_Kill,
457                                   MAX_COREID);
458         if (err_is_fail(err)) {
459             DEBUG_ERR(err, "pending_clients_add in kill_reply_handler");
460         }
461     }
462 }
463
464 static void exit_reply_handler(struct spawn_binding *b,
465                                struct capref domain_cap, errval_t exit_err)
466 {
467     struct pending_client *cl;
468     errval_t err = pending_clients_release(domain_cap, ClientType_Exit, &cl);
469     if (err_is_fail(err)) {
470         DEBUG_ERR(err, "failed to retrieve pending exit client based on domain "
471                   "cap");
472         return;
473     }
474
475     if (err_is_fail(exit_err)) {
476         // TODO(razvan): Here, spawnd has failed deleting its local dispatcher.
477         // Should we send another kill message, until it might succeed?
478         free(cl);
479         return;
480     }
481
482     struct domain_entry *entry;
483     err = domain_get_by_cap(domain_cap, &entry);
484     if (err_is_fail(err)) {
485         DEBUG_ERR(err, "failed to retrieve domain by cap returned by spawnd "
486                   "after exit");
487         return;
488     }
489
490     assert(entry->num_spawnds_running > 0);
491     assert(entry->status != DOMAIN_STATUS_STOPPED);
492
493     --entry->num_spawnds_running;
494
495     if (entry->num_spawnds_running == 0) {
496         entry->status = DOMAIN_STATUS_STOPPED;
497
498         err = pending_clients_add(domain_cap, NULL, ClientType_Cleanup,
499                                   MAX_COREID);
500         if (err_is_fail(err)) {
501             DEBUG_ERR(err, "pending_clients_add in exit_reply_handler");
502         }
503
504         free(cl);
505
506         // TODO(razvan): Same problem applies to the waiters: would
507         // it be better if we sent them wait_responses after the
508         // cspace root has been revoked, too? (here and in the exit
509         // case).
510         struct domain_waiter *waiter = entry->waiters;
511         while (waiter != NULL) {
512             waiter->b->tx_vtbl.wait_response(waiter->b, NOP_CONT,
513                                              SYS_ERR_OK,
514                                              entry->exit_status);
515             struct domain_waiter *tmp = waiter;
516             waiter = waiter->next;
517             free(tmp);
518         }
519
520         for (coreid_t i = 0; i < MAX_COREID; ++i) {
521             if (entry->spawnds[i] == NULL) {
522                 continue;
523             }
524
525             struct spawn_binding *spb = entry->spawnds[i]->b;
526
527             struct pending_kill_exit_cleanup *cleanup = (struct pending_kill_exit_cleanup*) malloc(
528                     sizeof(struct pending_kill_exit_cleanup));
529             cleanup->sb = spb;
530             cleanup->domain_cap = domain_cap;
531
532             struct msg_queue_elem *msg = (struct msg_queue_elem*) malloc(
533                     sizeof(struct msg_queue_elem));
534             msg->st = cleanup;
535             msg->cont = cleanup_request_sender;
536
537             err = spawnd_state_enqueue_send(entry->spawnds[i], msg);
538
539             if (err_is_fail(err)) {
540                 DEBUG_ERR(err, "enqueuing cleanup request");
541                 free(cleanup);
542                 free(msg);
543             }
544         }
545     } else {
546         err = pending_clients_add(domain_cap, cl->b, ClientType_Exit,
547                                   MAX_COREID);
548         if (err_is_fail(err)) {
549             DEBUG_ERR(err, "pending_clients_add in kill_reply_handler");
550         }
551     }
552 }
553
554 static errval_t spawn_handler_common(struct proc_mgmt_binding *b,
555                                      enum ClientType type,
556                                      coreid_t core_id, const char *path,
557                                      const char *argvbuf, size_t argvbytes,
558                                      const char *envbuf, size_t envbytes,
559                                      struct capref inheritcn_cap,
560                                      struct capref argcn_cap, uint8_t flags)
561 {
562     if (!spawnd_state_exists(core_id)) {
563         return PROC_MGMT_ERR_INVALID_SPAWND;
564     }
565
566     struct spawnd_state *spawnd = spawnd_state_get(core_id);
567     assert(spawnd != NULL);
568     struct spawn_binding *cl = spawnd->b;
569     assert(cl != NULL);
570
571     struct capref domain_cap;
572     errval_t err = slot_alloc(&domain_cap);
573     if (err_is_fail(err)) {
574         DEBUG_ERR(err, "slot_alloc domain_cap");
575         return err_push(err, PROC_MGMT_ERR_CREATE_DOMAIN_CAP);
576     }
577     err = cap_retype(domain_cap, cap_procmng, 0, ObjType_Domain, 0, 1);
578     if (err_is_fail(err)) {
579         DEBUG_ERR(err, "cap_retype domain_cap");
580         return err_push(err, PROC_MGMT_ERR_CREATE_DOMAIN_CAP);
581     }
582
583     err = pending_clients_add(domain_cap, b, type, core_id);
584     if (err_is_fail(err)) {
585         DEBUG_ERR(err, "pending_clients_add");
586         return err;
587     }
588
589     struct pending_spawn *spawn = (struct pending_spawn*) malloc(
590             sizeof(struct pending_spawn));
591     spawn->domain_cap = domain_cap;
592     spawn->b = cl;
593     spawn->core_id = core_id;
594     spawn->path = path;
595     spawn->argvbuf = argvbuf;
596     spawn->argvbytes = argvbytes;
597     spawn->envbuf = envbuf;
598     spawn->envbytes = envbytes;
599     spawn->inheritcn_cap = inheritcn_cap;
600     spawn->argcn_cap = argcn_cap;
601     spawn->flags = flags;
602
603     struct msg_queue_elem *msg = (struct msg_queue_elem*) malloc(
604             sizeof(struct msg_queue_elem));
605     msg->st = spawn;
606     msg->cont = spawn_request_sender;
607
608     err = spawnd_state_enqueue_send(spawnd, msg);
609
610     if (err_is_fail(err)) {
611         DEBUG_ERR(err, "enqueuing spawn request");
612         free(spawn);
613         free(msg);
614     }
615
616     return SYS_ERR_OK;
617 }
618
619 static void spawn_handler(struct proc_mgmt_binding *b, coreid_t core_id,
620                           const char *path, const char *argvbuf,
621                           size_t argvbytes, const char *envbuf, size_t envbytes,
622                           uint8_t flags)
623 {
624     errval_t err, resp_err;
625     err = spawn_handler_common(b, ClientType_Spawn, core_id, path, argvbuf,
626                                argvbytes, envbuf, envbytes, NULL_CAP, NULL_CAP,
627                                flags);
628     if (err_is_ok(err)) {
629         // Will respond to client when we get the reply from spawnd.
630         return;
631     }
632
633     resp_err = b->tx_vtbl.spawn_response(b, NOP_CONT, err, NULL_CAP);
634     if (err_is_fail(resp_err)) {
635         DEBUG_ERR(resp_err, "failed to send spawn_response");
636     }
637 }
638
639 static void spawn_with_caps_handler(struct proc_mgmt_binding *b,
640                                     coreid_t core_id, const char *path,
641                                     const char *argvbuf, size_t argvbytes,
642                                     const char *envbuf, size_t envbytes,
643                                     struct capref inheritcn_cap,
644                                     struct capref argcn_cap, uint8_t flags)
645 {
646     errval_t err, resp_err;
647     err = spawn_handler_common(b, ClientType_SpawnWithCaps, core_id, path,
648                                argvbuf, argvbytes, envbuf, envbytes,
649                                inheritcn_cap, argcn_cap, flags);
650     if (err_is_ok(err)) {
651         // Will respond to client when we get the reply from spawnd.
652         return;
653     }
654
655     resp_err = b->tx_vtbl.spawn_with_caps_response(b, NOP_CONT, err,
656                                                             NULL_CAP);
657     if (err_is_fail(resp_err)) {
658         DEBUG_ERR(resp_err, "failed to send spawn_with_caps_response");
659     }
660 }
661
662 static void span_handler(struct proc_mgmt_binding *b, struct capref domain_cap,
663                          coreid_t core_id, struct capref vroot,
664                          struct capref dispframe)
665 {
666     errval_t err, resp_err;
667     err = domain_can_span(domain_cap, core_id);
668     if (err_is_fail(err)) {
669         goto respond_with_err;
670     }
671
672     if (!spawnd_state_exists(core_id)) {
673         err = PROC_MGMT_ERR_INVALID_SPAWND;
674         goto respond_with_err;
675     }
676
677     struct spawnd_state *spawnd = spawnd_state_get(core_id);
678     assert(spawnd != NULL);
679     struct spawn_binding *cl = spawnd->b;
680     assert(cl != NULL);
681
682     err = pending_clients_add(domain_cap, b, ClientType_Span, core_id);
683     if (err_is_fail(err)) {
684         goto respond_with_err;
685     }
686
687     struct pending_span *span = (struct pending_span*) malloc(
688             sizeof(struct pending_span));
689     span->domain_cap = domain_cap;
690     span->b = cl;
691     span->core_id = core_id;
692     span->vroot = vroot;
693     span->dispframe = dispframe;
694
695     struct msg_queue_elem *msg = (struct msg_queue_elem*) malloc(
696             sizeof(struct msg_queue_elem));
697     msg->st = span;
698     msg->cont = span_request_sender;
699
700     err = spawnd_state_enqueue_send(spawnd, msg);
701
702     if (err_is_fail(err)) {
703         DEBUG_ERR(err, "enqueuing span request");
704         free(span);
705         free(msg);
706     }
707
708 respond_with_err:
709     resp_err = b->tx_vtbl.span_response(b, NOP_CONT, err);
710     if (err_is_fail(resp_err)) {
711         DEBUG_ERR(resp_err, "failed to send span_response");
712     }
713 }
714
715 static errval_t kill_handler_common(struct proc_mgmt_binding *b,
716                                     struct capref domain_cap,
717                                     enum ClientType type,
718                                     uint8_t exit_status)
719 {
720     errval_t err = pending_clients_add(domain_cap, b, type, MAX_COREID);
721     if (err_is_fail(err)) {
722         return err;
723     }
724
725     struct domain_entry *entry;
726     err = domain_get_by_cap(domain_cap, &entry);
727     if (err_is_fail(err)) {
728         return err;
729     }
730
731     entry->exit_status = exit_status;
732     domain_stop_pending(entry);
733
734     for (coreid_t i = 0; i < MAX_COREID; ++i) {
735         if (entry->spawnds[i] == NULL) {
736             continue;
737         }
738
739         struct spawn_binding *spb = entry->spawnds[i]->b;
740
741         struct pending_kill_exit_cleanup *cmd = (struct pending_kill_exit_cleanup*) malloc(
742                 sizeof(struct pending_kill_exit_cleanup));
743         cmd->domain_cap = domain_cap;
744         cmd->sb = spb;
745
746         struct msg_queue_elem *msg = (struct msg_queue_elem*) malloc(
747                 sizeof(struct msg_queue_elem));
748         msg->st = cmd;
749
750         switch (type) {
751             case ClientType_Kill:
752                 cmd->pmb = b;
753                 msg->cont = kill_request_sender;
754
755                 err = spawnd_state_enqueue_send(entry->spawnds[i], msg);
756
757                 if (err_is_fail(err)) {
758                     DEBUG_ERR(err, "enqueuing kill request");
759                     free(cmd);
760                     free(msg);
761                 }
762                 break;
763
764             case ClientType_Exit:
765                 msg->cont = exit_request_sender;
766
767                 err = spawnd_state_enqueue_send(entry->spawnds[i], msg);
768
769                 if (err_is_fail(err)) {
770                     DEBUG_ERR(err, "enqueuing exit request");
771                     free(cmd);
772                     free(msg);
773                 }
774                 break;
775
776             default:
777                 USER_PANIC("invalid client type for kill: %u\n", type);
778         }
779     }
780
781     return SYS_ERR_OK;
782 }
783
784 static void kill_handler(struct proc_mgmt_binding *b,
785                          struct capref victim_domain_cap)
786 {
787     errval_t err = kill_handler_common(b, victim_domain_cap, ClientType_Kill,
788                                        EXIT_STATUS_KILLED);
789     if (err_is_fail(err)) {
790         errval_t resp_err = b->tx_vtbl.kill_response(b, NOP_CONT, err);
791         if (err_is_fail(resp_err)) {
792             DEBUG_ERR(resp_err, "failed to send kill_response");
793         }
794     }
795 }
796
797 static void exit_handler(struct proc_mgmt_binding *b, struct capref domain_cap,
798                          uint8_t exit_status)
799 {
800     errval_t err = kill_handler_common(b, domain_cap, ClientType_Exit,
801                                        exit_status);
802     if (err_is_fail(err)) {
803         DEBUG_ERR(err, "processing exit_handler for requesting domain, exit "
804                   "code %u", exit_status);
805     }
806     // Error or not, there's no client to respond to anymore.
807 }
808
809 static void wait_handler(struct proc_mgmt_binding *b, struct capref domain_cap)
810 {
811     errval_t err, resp_err;
812     struct domain_entry *entry;
813     err = domain_get_by_cap(domain_cap, &entry);
814     if (err_is_fail(err)) {
815         goto respond;
816     }
817
818     if (entry->status == DOMAIN_STATUS_STOPPED) {
819         // Domain has already been stopped, so just reply with exit status.
820         goto respond;
821     }
822
823     struct domain_waiter *waiter = (struct domain_waiter*) malloc(
824             sizeof(struct domain_waiter));
825     waiter->b = b;
826     waiter->next = entry->waiters;
827     entry->waiters = waiter;
828     // Will respond when domain is stopped.
829     return;
830
831 respond:
832     resp_err = b->tx_vtbl.wait_response(b, NOP_CONT, err, entry->exit_status);
833     if (err_is_fail(resp_err)) {
834         DEBUG_ERR(resp_err, "failed to send wait_response");
835     }
836 }
837
838 static struct proc_mgmt_rx_vtbl monitor_vtbl = {
839     .add_spawnd           = add_spawnd_handler,
840     .spawn_call           = spawn_handler,
841     .spawn_with_caps_call = spawn_with_caps_handler,
842     .span_call            = span_handler,
843     .kill_call            = kill_handler,
844     .exit_call            = exit_handler,
845     .wait_call            = wait_handler
846 };
847
848 static struct proc_mgmt_rx_vtbl non_monitor_vtbl = {
849     .add_spawnd           = add_spawnd_handler_non_monitor,
850     .spawn_call           = spawn_handler,
851     .spawn_with_caps_call = spawn_with_caps_handler,
852     .span_call            = span_handler,
853     .kill_call            = kill_handler,
854     .exit_call            = exit_handler,
855     .wait_call            = wait_handler
856 };
857
858 static errval_t alloc_ep_for_monitor(struct capref *ep)
859 {
860     struct proc_mgmt_lmp_binding *lmpb =
861         malloc(sizeof(struct proc_mgmt_lmp_binding));
862     assert(lmpb != NULL);
863
864     // setup our end of the binding
865     errval_t err = proc_mgmt_client_lmp_accept(lmpb, get_default_waitset(),
866                                                DEFAULT_LMP_BUF_WORDS);
867     if (err_is_fail(err)) {
868         free(lmpb);
869         return err_push(err, LIB_ERR_PROC_MGMT_CLIENT_ACCEPT);
870     }
871
872     *ep = lmpb->chan.local_cap;
873     lmpb->b.rx_vtbl = monitor_vtbl;
874
875     return SYS_ERR_OK;
876 }
877
878 static void export_cb(void *st, errval_t err, iref_t iref)
879 {
880     if (err_is_fail(err)) {
881         USER_PANIC_ERR(err, "export failed");
882     }
883
884     // Allocate an endpoint for the local monitor, who will use it to inform
885     // us about new spawnd irefs on behalf of other monitors.
886     struct capref ep;
887     err = alloc_ep_for_monitor(&ep);
888     if (err_is_fail(err)) {
889         USER_PANIC_ERR(err, "failed to allocate LMP EP for local monitor");
890     }
891
892     // Send the endpoint to the monitor, so it can finish the handshake.
893     struct monitor_binding *mb = get_monitor_binding();
894     err = mb->tx_vtbl.set_proc_mgmt_ep_request(mb, NOP_CONT, ep);
895     if (err_is_fail(err)) {
896         USER_PANIC_ERR(err, "failed to send set_proc_mgmt_ep_request to "
897                        "monitor");
898     }
899
900     // Also register this iref with the name service, for arbitrary client
901     // domains to use for spawn-related ops.
902     err = nameservice_register(SERVICE_BASENAME, iref);
903     if (err_is_fail(err)) {
904         USER_PANIC_ERR(err, "nameservice_register failed");
905     }
906 }
907
908 static errval_t connect_cb(void *st, struct proc_mgmt_binding *b)
909 {
910     b->rx_vtbl = non_monitor_vtbl;
911     return SYS_ERR_OK;
912 }
913
914 errval_t start_service(void)
915 {
916     return proc_mgmt_export(NULL, export_cb, connect_cb, get_default_waitset(),
917             IDC_EXPORT_FLAGS_DEFAULT);
918 }