Fix a bug where pending cleanup clients wouldn't be added on exit calls.
[barrelfish] / usr / proc_mgmt / service.c
1 /**
2  * \file
3  * \brief Process management service.
4  */
5
6 /*
7  * Copyright (c) 2017, ETH Zurich.
8  * All rights reserved.
9  *
10  * This file is distributed under the terms in the attached LICENSE file.
11  * If you do not find this file, copies can be found by writing to:
12  * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13  */
14
15 #include <barrelfish/barrelfish.h>
16 #include <barrelfish/nameservice_client.h>
17 #include <barrelfish/proc_mgmt_client.h>
18 #include <barrelfish/spawn_client.h>
19 #include <if/monitor_defs.h>
20 #include <if/proc_mgmt_defs.h>
21 #include <if/spawn_defs.h>
22
23 #include "domain.h"
24 #include "internal.h"
25 #include "pending_clients.h"
26 #include "spawnd_state.h"
27
28 static void add_spawnd_handler(struct proc_mgmt_binding *b, coreid_t core_id,
29                                iref_t iref)
30 {
31     if (spawnd_state_exists(core_id)) {
32         DEBUG_ERR(PROC_MGMT_ERR_SPAWND_EXISTS, "spawnd_state_exists");
33         return;
34     }
35
36     // Bind with the spawnd.
37     struct spawn_binding *spawnb;
38     errval_t err = spawn_bind_iref(iref, &spawnb);
39     if (err_is_fail(err)) {
40         DEBUG_ERR(err, "spawn_bind_iref");
41         return;
42     }
43
44     err = spawnd_state_alloc(core_id, spawnb);
45     if (err_is_fail(err)) {
46         DEBUG_ERR(err, "spawnd_state_alloc");
47     }
48
49     debug_printf("Process manager bound with spawnd.%u on iref %u\n", core_id,
50             iref);
51 }
52
53 static void add_spawnd_handler_non_monitor(struct proc_mgmt_binding *b,
54                                            coreid_t core_id, iref_t iref)
55 {
56     // debug_printf("Ignoring add_spawnd call: %s\n",
57     //              err_getstring(PROC_MGMT_ERR_NOT_MONITOR));
58 }
59
60 static void spawn_reply_handler(struct spawn_binding *b,
61                                 struct capref domain_cap, errval_t spawn_err);
62 static void spawn_with_caps_reply_handler(struct spawn_binding *b,
63                                           struct capref domain_cap,
64                                           errval_t spawn_err);
65 static void span_reply_handler(struct spawn_binding *b,
66                                struct capref domain_cap, errval_t span_err);
67 static void kill_reply_handler(struct spawn_binding *b,
68                                struct capref domain_cap, errval_t kill_err);
69 static void exit_reply_handler(struct spawn_binding *b,
70                                struct capref domain_cap, errval_t exit_err);
71 static void cleanup_reply_handler(struct spawn_binding *b,
72                                   struct capref domain_cap,
73                                   errval_t cleanup_err);
74
75 static void spawn_request_sender(void *arg)
76 {
77     struct pending_spawn *spawn = (struct pending_spawn*) arg;
78
79     errval_t err;
80     bool with_caps = !(capref_is_null(spawn->inheritcn_cap) &&
81                        capref_is_null(spawn->argcn_cap));
82     if (with_caps) {
83         spawn->b->rx_vtbl.spawn_with_caps_reply = spawn_with_caps_reply_handler;
84         err = spawn->b->tx_vtbl.spawn_with_caps_request(spawn->b, NOP_CONT,
85                                                         cap_procmng,
86                                                         spawn->domain_cap,
87                                                         spawn->path,
88                                                         spawn->argvbuf,
89                                                         spawn->argvbytes,
90                                                         spawn->envbuf,
91                                                         spawn->envbytes,
92                                                         spawn->inheritcn_cap,
93                                                         spawn->argcn_cap,
94                                                         spawn->flags);
95     } else {
96         spawn->b->rx_vtbl.spawn_reply = spawn_reply_handler;
97         err = spawn->b->tx_vtbl.spawn_request(spawn->b, NOP_CONT, cap_procmng,
98                                               spawn->domain_cap, spawn->path,
99                                               spawn->argvbuf, spawn->argvbytes,
100                                               spawn->envbuf, spawn->envbytes,
101                                               spawn->flags);
102     }
103     if (err_is_ok(err)) {
104         free(spawn);
105     } else {
106         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
107             err = spawn->b->register_send(spawn->b, spawn->b->waitset,
108                                           MKCONT(spawn_request_sender, arg));
109             if (err_is_fail(err)) {
110                 DEBUG_ERR(err, "registering for spawn request");
111                 pending_clients_release(spawn->domain_cap,
112                                         with_caps ? ClientType_SpawnWithCaps
113                                                   : ClientType_Spawn,
114                                         NULL);
115                 event_mutex_unlock(&spawn->b->mutex);
116                 free(spawn);
117             }
118         } else {
119             DEBUG_ERR(err, "sending spawn request");
120             pending_clients_release(spawn->domain_cap,
121                                     with_caps ? ClientType_SpawnWithCaps
122                                               : ClientType_Spawn,
123                                     NULL);
124             event_mutex_unlock(&spawn->b->mutex);
125             free(spawn);
126         }
127     }
128 }
129
130 static void span_request_sender(void *arg)
131 {
132     struct pending_span *span = (struct pending_span*) arg;
133
134     errval_t err;
135     span->b->rx_vtbl.span_reply = span_reply_handler;
136     err = span->b->tx_vtbl.span_request(span->b, NOP_CONT, cap_procmng,
137                                         span->domain_cap, span->vroot,
138                                         span->dispframe);
139     if (err_is_ok(err)) {
140         err = domain_span(span->domain_cap, span->core_id);
141         if (err_is_fail(err)) {
142             DEBUG_ERR(err, "failed domain_span to core %u\n", span->core_id);
143         }
144         free(span);
145     } else {
146         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
147             err = span->b->register_send(span->b, span->b->waitset,
148                                          MKCONT(span_request_sender, arg));
149             if (err_is_fail(err)) {
150                 DEBUG_ERR(err, "registering for span request");
151                 pending_clients_release(span->domain_cap, ClientType_Span,
152                                         NULL);
153                 event_mutex_unlock(&span->b->mutex);
154                 free(span);
155             }
156         } else {
157             DEBUG_ERR(err, "sending span request");
158             pending_clients_release(span->domain_cap, ClientType_Span, NULL);
159             event_mutex_unlock(&span->b->mutex);
160             free(span);
161         }
162     }
163 }
164
165 static void kill_request_sender(void *arg)
166 {
167     struct pending_kill_exit_cleanup *kill = (struct pending_kill_exit_cleanup*) arg;
168
169     errval_t err;
170     kill->sb->rx_vtbl.kill_reply = kill_reply_handler;
171     err = kill->sb->tx_vtbl.kill_request(kill->sb, NOP_CONT, cap_procmng,
172                                         kill->domain_cap);
173     if (err_is_ok(err)) {
174         free(kill);
175     } else {
176         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
177             err = kill->sb->register_send(kill->sb, kill->sb->waitset,
178                                          MKCONT(kill_request_sender, arg));
179             if (err_is_fail(err)) {
180                 DEBUG_ERR(err, "registering for kill request");
181
182                 struct pending_client *cl;
183                 err = pending_clients_release_one(kill->domain_cap,
184                                                   ClientType_Kill,
185                                                   kill->pmb, &cl);
186                 if (err_is_ok(err)) {
187                     while (cl != NULL) {
188                         struct pending_client *tmp = cl;
189                         cl = cl->next;
190                         free(tmp);
191                     }
192                 }
193
194                 event_mutex_unlock(&kill->sb->mutex);
195                 free(kill);
196             }
197         } else {
198             DEBUG_ERR(err, "sending kill request");
199             
200             struct pending_client *cl;
201             err = pending_clients_release_one(kill->domain_cap,
202                                               ClientType_Kill,
203                                               kill->pmb, &cl);
204             if (err_is_ok(err)) {
205                 while (cl != NULL) {
206                     struct pending_client *tmp = cl;
207                     cl = cl->next;
208                     free(tmp);
209                 }
210             }
211
212             event_mutex_unlock(&kill->sb->mutex);
213             free(kill);
214         }
215     }
216 }
217
218 static void exit_request_sender(void *arg)
219 {
220     struct pending_kill_exit_cleanup *exit = (struct pending_kill_exit_cleanup*) arg;
221
222     errval_t err;
223     exit->sb->rx_vtbl.exit_reply = exit_reply_handler;
224     err = exit->sb->tx_vtbl.exit_request(exit->sb, NOP_CONT, cap_procmng,
225                                         exit->domain_cap);
226     if (err_is_ok(err)) {
227         free(exit);
228     } else {
229         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
230             err = exit->sb->register_send(exit->sb, exit->sb->waitset,
231                                          MKCONT(exit_request_sender, arg));
232             if (err_is_fail(err)) {
233                 DEBUG_ERR(err, "registering for exit request");
234                 err = pending_clients_release(exit->domain_cap, ClientType_Exit,
235                                               NULL);
236                 event_mutex_unlock(&exit->sb->mutex);
237                 free(exit);
238             }
239         } else {
240             DEBUG_ERR(err, "sending exit request");
241             err = pending_clients_release(exit->domain_cap, ClientType_Exit,
242                                           NULL);
243             event_mutex_unlock(&exit->sb->mutex);
244             free(exit);
245         }
246     }
247 }
248
249 static void cleanup_request_sender(void *arg)
250 {
251     struct pending_kill_exit_cleanup *cleanup = (struct pending_kill_exit_cleanup*) arg;
252
253     errval_t err;
254     cleanup->sb->rx_vtbl.cleanup_reply = cleanup_reply_handler;
255     err = cleanup->sb->tx_vtbl.cleanup_request(cleanup->sb, NOP_CONT, cap_procmng,
256                                               cleanup->domain_cap);
257     if (err_is_ok(err)) {
258         free(cleanup);
259     } else {
260         if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
261             err = cleanup->sb->register_send(cleanup->sb, cleanup->sb->waitset,
262                                             MKCONT(cleanup_request_sender, arg));
263             if (err_is_fail(err)) {
264                 DEBUG_ERR(err, "registering for cleanup request");
265                 pending_clients_release(cleanup->domain_cap, ClientType_Cleanup,
266                                         NULL);
267                 event_mutex_unlock(&cleanup->sb->mutex);
268                 free(cleanup);
269             }
270         } else {
271             DEBUG_ERR(err, "sending cleanup request");
272             pending_clients_release(cleanup->domain_cap, ClientType_Cleanup,
273                                     NULL);
274             event_mutex_unlock(&cleanup->sb->mutex);
275             free(cleanup);
276         }
277     }
278 }
279
280 static void spawn_reply_handler(struct spawn_binding *b,
281                                 struct capref domain_cap, errval_t spawn_err)
282 {
283     event_mutex_unlock(&b->mutex);
284
285     struct pending_client *cl;
286     errval_t err = pending_clients_release(domain_cap, ClientType_Spawn, &cl);
287     if (err_is_fail(err)) {
288         DEBUG_ERR(err, "failed to retrieve pending spawn client based on domain"
289                   " cap");
290         return;
291     }
292
293     err = spawn_err;
294     if (err_is_ok(spawn_err)) {
295         err = domain_spawn(domain_cap, cl->core_id);
296     }
297
298     errval_t resp_err = cl->b->tx_vtbl.spawn_response(cl->b, NOP_CONT, err,
299                                                       domain_cap);
300     if (err_is_fail(resp_err)) {
301         DEBUG_ERR(resp_err, "failed to send spawn_response to client");
302     }
303     
304     free(cl);
305 }
306
307 static void spawn_with_caps_reply_handler(struct spawn_binding *b,
308                                           struct capref domain_cap,
309                                           errval_t spawn_err)
310 {
311     event_mutex_unlock(&b->mutex);
312     
313     struct pending_client *cl;
314     errval_t err = pending_clients_release(domain_cap, ClientType_SpawnWithCaps,
315                                            &cl);
316     if (err_is_fail(err)) {
317         DEBUG_ERR(err, "failed to retrieve pending spawn_with_caps client based"
318                   " on domain cap");
319         return;
320     }
321
322     err = spawn_err;
323     if (err_is_ok(spawn_err)) {
324         err = domain_spawn(domain_cap, cl->core_id);
325     }
326
327     errval_t resp_err = cl->b->tx_vtbl.spawn_with_caps_response(cl->b, NOP_CONT,
328                                                                 err,
329                                                                 domain_cap);
330     if (err_is_fail(resp_err)) {
331         DEBUG_ERR(resp_err, "failed to send spawn_with_caps_response to "
332                   "client");
333     }
334     
335     free(cl);
336 }
337
338 static void span_reply_handler(struct spawn_binding *b,
339                                struct capref domain_cap, errval_t span_err)
340 {
341     event_mutex_unlock(&b->mutex);
342
343     struct pending_client *cl;
344     errval_t err = pending_clients_release(domain_cap, ClientType_Span, &cl);
345     if (err_is_fail(err)) {
346         DEBUG_ERR(err, "failed to retrieve pending span client based on domain"
347                   " cap");
348         return;
349     }
350
351     struct domain_entry *entry;
352     err = domain_get_by_cap(cl->domain_cap, &entry);
353     if (err_is_fail(err)) {
354         DEBUG_ERR(err, "failed to retrieve span client by domain cap");
355         return;
356     }
357
358     if (entry->status != DOMAIN_STATUS_RUNNING) {
359         // Domain has been stopped while we were serving the request; there's
360         // no one to respond to.
361         free(cl);
362         return;
363     }
364
365     err = cl->b->tx_vtbl.span_response(cl->b, NOP_CONT, span_err);
366     if (err_is_fail(err)) {
367         DEBUG_ERR(err, "failed to send span_response to client");
368     }
369     
370     free(cl);
371 }
372
373 static void cleanup_reply_handler(struct spawn_binding *b,
374                                   struct capref domain_cap,
375                                   errval_t cleanup_err)
376 {
377     event_mutex_unlock(&b->mutex);
378
379     struct pending_client *cl;
380     errval_t err = pending_clients_release(domain_cap, ClientType_Cleanup, &cl);
381     if (err_is_fail(err)) {
382         DEBUG_ERR(err, "failed to retrieve pending cleanup client based on "
383                   "domain cap");
384         return;
385     }
386
387     if (err_is_fail(cleanup_err)) {
388         // TODO(razvan): Here, spawnd has failed deleting its local cspace.
389         // Should we send another cleanup message, until it might succeed?
390         free(cl);
391         return;
392     }
393
394     struct domain_entry *entry;
395     err = domain_get_by_cap(domain_cap, &entry);
396     if (err_is_fail(err)) {
397         DEBUG_ERR(err, "failed to retrieve domain by cap returned by spawnd "
398                   "after cleanup");
399         return;
400     }
401
402     assert(entry->num_spawnds_resources > 0);
403     assert(entry->status != DOMAIN_STATUS_CLEANED);
404
405     --entry->num_spawnds_resources;
406
407     if (entry->num_spawnds_resources == 0) {
408         entry->status = DOMAIN_STATUS_CLEANED;
409
410         // At this point, the domain exists in state CLEANED for
411         // history reasons. For instance, if some other domain
412         // issues a wait call for this one, the process manager can
413         // return the exit status directly.
414         // At some point, however, we might want to just clean up
415         // the domain entry and recycle the domain cap.
416     } else {
417         // Expecting to receive further cleanup replies from other
418         // spawnds for the same domain cap, hence re-add the
419         // pending client.
420         err = pending_clients_add(domain_cap, cl->b,
421                                   ClientType_Cleanup, MAX_COREID);
422         if (err_is_fail(err)) {
423             DEBUG_ERR(err, "pending_clients_add in cleanup_reply_handler");
424         }
425     }
426 }
427
428 static void kill_reply_handler(struct spawn_binding *b,
429                                struct capref domain_cap, errval_t kill_err)
430 {
431     event_mutex_unlock(&b->mutex);
432
433     struct pending_client *cl;
434     errval_t err = pending_clients_release(domain_cap, ClientType_Kill, &cl);
435     if (err_is_fail(err)) {
436         DEBUG_ERR(err, "failed to retrieve pending kill client based on domain "
437                   "cap");
438         return;
439     }
440
441     errval_t resp_err;
442     if (err_is_fail(kill_err)) {
443         // TODO(razvan): Here, spawnd has failed deleting its local dispatcher.
444         // Should we send another kill message, until it might succeed?
445         while (cl != NULL) {
446             resp_err = cl->b->tx_vtbl.kill_response(cl->b, NOP_CONT,
447                                                      kill_err);
448             if (err_is_fail(resp_err)) {
449                 DEBUG_ERR(resp_err, "failed to send kill_response to client");
450             }
451             struct pending_client *tmp = cl;
452             cl = cl->next;
453             free(tmp);
454         }
455         return;
456     }
457
458     struct domain_entry *entry;
459     err = domain_get_by_cap(domain_cap, &entry);
460     if (err_is_fail(err)) {
461         DEBUG_ERR(err, "failed to retrieve domain by cap returned by spawnd "
462                   "after kill");
463         return;
464     }
465
466     assert(entry->num_spawnds_running > 0);
467     assert(entry->status != DOMAIN_STATUS_STOPPED);
468
469     --entry->num_spawnds_running;
470
471     if (entry->num_spawnds_running == 0) {
472         entry->status = DOMAIN_STATUS_STOPPED;
473         entry->exit_status = EXIT_STATUS_KILLED;
474
475         err = pending_clients_add(domain_cap, NULL, ClientType_Cleanup,
476                                   MAX_COREID);
477         if (err_is_fail(err)) {
478             DEBUG_ERR(err, "pending_clients_add in kill_reply_handler");
479         }
480
481         // TODO(razvan): Might it be more sane if we respond back
482         // to the client after the domain has been cleaned up (i.e.
483         // the cspace root has been revoked for all dispatchers)?
484         while (cl != NULL) {
485             resp_err = cl->b->tx_vtbl.kill_response(cl->b, NOP_CONT,
486                                                      kill_err);
487             if (err_is_fail(resp_err)) {
488                 DEBUG_ERR(resp_err, "failed to send kill_response to client");
489             }
490             struct pending_client *tmp = cl;
491             cl = cl->next;
492             free(tmp);
493         }
494         
495         // TODO(razvan): Same problem applies to the waiters: would
496         // it be better if we sent them wait_responses after the
497         // cspace root has been revoked, too? (here and in the exit
498         // case).
499         struct domain_waiter *waiter = entry->waiters;
500         while (waiter != NULL) {
501             waiter->b->tx_vtbl.wait_response(waiter->b, NOP_CONT,
502                                              SYS_ERR_OK,
503                                              entry->exit_status);
504             struct domain_waiter *tmp = waiter;
505             waiter = waiter->next;
506             free(tmp);
507         }
508
509         for (coreid_t i = 0; i < MAX_COREID; ++i) {
510             if (entry->spawnds[i] == NULL) {
511                 continue;
512             }
513
514             struct spawn_binding *spb = entry->spawnds[i]->b;
515
516             struct pending_kill_exit_cleanup *cleanup = (struct pending_kill_exit_cleanup*) malloc(
517                     sizeof(struct pending_kill_exit_cleanup));
518             cleanup->sb = spb;
519             cleanup->domain_cap = domain_cap;
520
521             spb->rx_vtbl.cleanup_reply = cleanup_reply_handler;
522             event_mutex_enqueue_lock(&spb->mutex,
523                                      &cleanup->qn,
524                                      (struct event_closure) {
525                                          .handler = cleanup_request_sender,
526                                          .arg = cleanup });
527         }
528     } else {
529         err = pending_clients_add(domain_cap, cl->b, ClientType_Kill,
530                                   MAX_COREID);
531         if (err_is_fail(err)) {
532             DEBUG_ERR(err, "pending_clients_add in kill_reply_handler");
533         }
534     }
535 }
536
537 static void exit_reply_handler(struct spawn_binding *b,
538                                struct capref domain_cap, errval_t exit_err)
539 {
540     event_mutex_unlock(&b->mutex);
541
542     struct pending_client *cl;
543     errval_t err = pending_clients_release(domain_cap, ClientType_Exit, &cl);
544     if (err_is_fail(err)) {
545         DEBUG_ERR(err, "failed to retrieve pending exit client based on domain "
546                   "cap");
547         return;
548     }
549
550     if (err_is_fail(exit_err)) {
551         // TODO(razvan): Here, spawnd has failed deleting its local dispatcher.
552         // Should we send another kill message, until it might succeed?
553         free(cl);
554         return;
555     }
556
557     struct domain_entry *entry;
558     err = domain_get_by_cap(domain_cap, &entry);
559     if (err_is_fail(err)) {
560         DEBUG_ERR(err, "failed to retrieve domain by cap returned by spawnd "
561                   "after exit");
562         return;
563     }
564
565     assert(entry->num_spawnds_running > 0);
566     assert(entry->status != DOMAIN_STATUS_STOPPED);
567
568     --entry->num_spawnds_running;
569
570     if (entry->num_spawnds_running == 0) {
571         entry->status = DOMAIN_STATUS_STOPPED;
572
573         err = pending_clients_add(domain_cap, NULL, ClientType_Cleanup,
574                                   MAX_COREID);
575         if (err_is_fail(err)) {
576             DEBUG_ERR(err, "pending_clients_add in exit_reply_handler");
577         }
578
579         free(cl);
580
581         // TODO(razvan): Same problem applies to the waiters: would
582         // it be better if we sent them wait_responses after the
583         // cspace root has been revoked, too? (here and in the exit
584         // case).
585         struct domain_waiter *waiter = entry->waiters;
586         while (waiter != NULL) {
587             waiter->b->tx_vtbl.wait_response(waiter->b, NOP_CONT,
588                                              SYS_ERR_OK,
589                                              entry->exit_status);
590             struct domain_waiter *tmp = waiter;
591             waiter = waiter->next;
592             free(tmp);
593         }
594
595         for (coreid_t i = 0; i < MAX_COREID; ++i) {
596             if (entry->spawnds[i] == NULL) {
597                 continue;
598             }
599
600             struct spawn_binding *spb = entry->spawnds[i]->b;
601
602             struct pending_kill_exit_cleanup *cleanup = (struct pending_kill_exit_cleanup*) malloc(
603                     sizeof(struct pending_kill_exit_cleanup));
604             cleanup->sb = spb;
605             cleanup->domain_cap = domain_cap;
606
607             spb->rx_vtbl.cleanup_reply = cleanup_reply_handler;
608             event_mutex_enqueue_lock(&spb->mutex,
609                                      &cleanup->qn,
610                                      (struct event_closure) {
611                                          .handler = cleanup_request_sender,
612                                          .arg = cleanup });
613         }
614     } else {
615         err = pending_clients_add(domain_cap, cl->b, ClientType_Exit,
616                                   MAX_COREID);
617         if (err_is_fail(err)) {
618             DEBUG_ERR(err, "pending_clients_add in kill_reply_handler");
619         }
620     }
621 }
622
623 static errval_t spawn_handler_common(struct proc_mgmt_binding *b,
624                                      enum ClientType type,
625                                      coreid_t core_id, const char *path,
626                                      const char *argvbuf, size_t argvbytes,
627                                      const char *envbuf, size_t envbytes,
628                                      struct capref inheritcn_cap,
629                                      struct capref argcn_cap, uint8_t flags)
630 {
631     if (!spawnd_state_exists(core_id)) {
632         return PROC_MGMT_ERR_INVALID_SPAWND;
633     }
634
635     struct spawnd_state *state = spawnd_state_get(core_id);
636     assert(state != NULL);
637     struct spawn_binding *cl = state->b;
638     assert(cl != NULL);
639
640     struct capref domain_cap;
641     errval_t err = slot_alloc(&domain_cap);
642     if (err_is_fail(err)) {
643         DEBUG_ERR(err, "slot_alloc domain_cap");
644         return err_push(err, PROC_MGMT_ERR_CREATE_DOMAIN_CAP);
645     }
646     err = cap_retype(domain_cap, cap_procmng, 0, ObjType_Domain, 0, 1);
647     if (err_is_fail(err)) {
648         DEBUG_ERR(err, "cap_retype domain_cap");
649         return err_push(err, PROC_MGMT_ERR_CREATE_DOMAIN_CAP);
650     }
651
652     err = pending_clients_add(domain_cap, b, type, core_id);
653     if (err_is_fail(err)) {
654         DEBUG_ERR(err, "pending_clients_add");
655         return err;
656     }
657
658     struct pending_spawn *spawn = (struct pending_spawn*) malloc(
659             sizeof(struct pending_spawn));
660     spawn->domain_cap = domain_cap;
661     spawn->b = cl;
662     spawn->core_id = core_id;
663     spawn->path = path;
664     spawn->argvbuf = argvbuf;
665     spawn->argvbytes = argvbytes;
666     spawn->envbuf = envbuf;
667     spawn->envbytes = envbytes;
668     spawn->inheritcn_cap = inheritcn_cap;
669     spawn->argcn_cap = argcn_cap;
670     spawn->flags = flags;
671
672     event_mutex_enqueue_lock(&cl->mutex, &spawn->qn,
673                              (struct event_closure) {
674                                  .handler = spawn_request_sender,
675                                  .arg = spawn });
676
677     return SYS_ERR_OK;
678 }
679
680 static void spawn_handler(struct proc_mgmt_binding *b, coreid_t core_id,
681                           const char *path, const char *argvbuf,
682                           size_t argvbytes, const char *envbuf, size_t envbytes,
683                           uint8_t flags)
684 {
685     errval_t err, resp_err;
686     err = spawn_handler_common(b, ClientType_Spawn, core_id, path, argvbuf,
687                                argvbytes, envbuf, envbytes, NULL_CAP, NULL_CAP,
688                                flags);
689     if (err_is_ok(err)) {
690         // Will respond to client when we get the reply from spawnd.
691         return;
692     }
693
694     resp_err = b->tx_vtbl.spawn_response(b, NOP_CONT, err, NULL_CAP);
695     if (err_is_fail(resp_err)) {
696         DEBUG_ERR(resp_err, "failed to send spawn_response");
697     }
698 }
699
700 static void spawn_with_caps_handler(struct proc_mgmt_binding *b,
701                                     coreid_t core_id, const char *path,
702                                     const char *argvbuf, size_t argvbytes,
703                                     const char *envbuf, size_t envbytes,
704                                     struct capref inheritcn_cap,
705                                     struct capref argcn_cap, uint8_t flags)
706 {
707     errval_t err, resp_err;
708     err = spawn_handler_common(b, ClientType_SpawnWithCaps, core_id, path,
709                                argvbuf, argvbytes, envbuf, envbytes,
710                                inheritcn_cap, argcn_cap, flags);
711     if (err_is_ok(err)) {
712         // Will respond to client when we get the reply from spawnd.
713         return;
714     }
715
716     resp_err = b->tx_vtbl.spawn_with_caps_response(b, NOP_CONT, err,
717                                                             NULL_CAP);
718     if (err_is_fail(resp_err)) {
719         DEBUG_ERR(resp_err, "failed to send spawn_with_caps_response");
720     }
721 }
722
723 static void span_handler(struct proc_mgmt_binding *b, struct capref domain_cap,
724                          coreid_t core_id, struct capref vroot,
725                          struct capref dispframe)
726 {
727     errval_t err, resp_err;
728     err = domain_can_span(domain_cap, core_id);
729     if (err_is_fail(err)) {
730         goto respond_with_err;
731     }
732
733     if (!spawnd_state_exists(core_id)) {
734         err = PROC_MGMT_ERR_INVALID_SPAWND;
735         goto respond_with_err;
736     }
737
738     struct spawnd_state *state = spawnd_state_get(core_id);
739     assert(state != NULL);
740     struct spawn_binding *cl = state->b;
741     assert(cl != NULL);
742
743     err = pending_clients_add(domain_cap, b, ClientType_Span, core_id);
744     if (err_is_fail(err)) {
745         goto respond_with_err;
746     }
747
748     struct pending_span *span = (struct pending_span*) malloc(
749             sizeof(struct pending_span));
750     span->domain_cap = domain_cap;
751     span->b = cl;
752     span->core_id = core_id;
753     span->vroot = vroot;
754     span->dispframe = dispframe;
755
756     event_mutex_enqueue_lock(&cl->mutex, &span->qn,
757                              (struct event_closure) {
758                                  .handler = span_request_sender,
759                                  .arg = span });
760
761 respond_with_err:
762     resp_err = b->tx_vtbl.span_response(b, NOP_CONT, err);
763     if (err_is_fail(resp_err)) {
764         DEBUG_ERR(resp_err, "failed to send span_response");
765     }
766 }
767
768 static errval_t kill_handler_common(struct proc_mgmt_binding *b,
769                                     struct capref domain_cap,
770                                     enum ClientType type,
771                                     uint8_t exit_status)
772 {
773     errval_t err = pending_clients_add(domain_cap, b, type, MAX_COREID);
774     if (err_is_fail(err)) {
775         return err;
776     }
777
778     struct domain_entry *entry;
779     err = domain_get_by_cap(domain_cap, &entry);
780     if (err_is_fail(err)) {
781         return err;
782     }
783
784     entry->exit_status = exit_status;
785     domain_stop_pending(entry);
786
787     for (coreid_t i = 0; i < MAX_COREID; ++i) {
788         if (entry->spawnds[i] == NULL) {
789             continue;
790         }
791
792         struct spawn_binding *spb = entry->spawnds[i]->b;
793
794         struct pending_kill_exit_cleanup *cmd = (struct pending_kill_exit_cleanup*) malloc(
795                 sizeof(struct pending_kill_exit_cleanup));
796         cmd->domain_cap = domain_cap;
797         cmd->sb = spb;
798
799         switch (type) {
800             case ClientType_Kill:
801                 cmd->pmb = b;
802
803                 event_mutex_enqueue_lock(&spb->mutex,
804                                          &cmd->qn,
805                                          (struct event_closure) {
806                                             .handler = kill_request_sender,
807                                             .arg = cmd });
808                 break;
809
810             case ClientType_Exit:
811                 event_mutex_enqueue_lock(&spb->mutex,
812                                          &cmd->qn,
813                                          (struct event_closure) {
814                                             .handler = exit_request_sender,
815                                             .arg = cmd });
816                 break;
817             default:
818                 USER_PANIC("invalid client type for kill: %u\n", type);
819         }
820     }
821
822     return SYS_ERR_OK;
823 }
824
825 static void kill_handler(struct proc_mgmt_binding *b,
826                          struct capref victim_domain_cap)
827 {
828     errval_t err = kill_handler_common(b, victim_domain_cap, ClientType_Kill,
829                                        EXIT_STATUS_KILLED);
830     if (err_is_fail(err)) {
831         errval_t resp_err = b->tx_vtbl.kill_response(b, NOP_CONT, err);
832         if (err_is_fail(resp_err)) {
833             DEBUG_ERR(resp_err, "failed to send kill_response");
834         }
835     }
836 }
837
838 static void exit_handler(struct proc_mgmt_binding *b, struct capref domain_cap,
839                          uint8_t exit_status)
840 {
841     errval_t err = kill_handler_common(b, domain_cap, ClientType_Exit,
842                                        exit_status);
843     if (err_is_fail(err)) {
844         DEBUG_ERR(err, "processing exit_handler for requesting domain, exit "
845                   "code %u", exit_status);
846     }
847     // Error or not, there's no client to respond to anymore.
848 }
849
850 static void wait_handler(struct proc_mgmt_binding *b, struct capref domain_cap)
851 {
852     errval_t err, resp_err;
853     struct domain_entry *entry;
854     err = domain_get_by_cap(domain_cap, &entry);
855     if (err_is_fail(err)) {
856         goto respond;
857     }
858
859     if (entry->status == DOMAIN_STATUS_STOPPED) {
860         // Domain has already been stopped, so just reply with exit status.
861         goto respond;
862     }
863
864     struct domain_waiter *waiter = (struct domain_waiter*) malloc(
865             sizeof(struct domain_waiter));
866     waiter->b = b;
867     waiter->next = entry->waiters;
868     entry->waiters = waiter;
869     // Will respond when domain is stopped.
870     return;
871
872 respond:
873     resp_err = b->tx_vtbl.wait_response(b, NOP_CONT, err, entry->exit_status);
874     if (err_is_fail(resp_err)) {
875         DEBUG_ERR(resp_err, "failed to send wait_response");
876     }
877 }
878
879 static struct proc_mgmt_rx_vtbl monitor_vtbl = {
880     .add_spawnd           = add_spawnd_handler,
881     .spawn_call           = spawn_handler,
882     .spawn_with_caps_call = spawn_with_caps_handler,
883     .span_call            = span_handler,
884     .kill_call            = kill_handler,
885     .exit_call            = exit_handler,
886     .wait_call            = wait_handler
887 };
888
889 static struct proc_mgmt_rx_vtbl non_monitor_vtbl = {
890     .add_spawnd           = add_spawnd_handler_non_monitor,
891     .spawn_call           = spawn_handler,
892     .spawn_with_caps_call = spawn_with_caps_handler,
893     .span_call            = span_handler,
894     .kill_call            = kill_handler,
895     .exit_call            = exit_handler,
896     .wait_call            = wait_handler
897 };
898
899 static errval_t alloc_ep_for_monitor(struct capref *ep)
900 {
901     struct proc_mgmt_lmp_binding *lmpb =
902         malloc(sizeof(struct proc_mgmt_lmp_binding));
903     assert(lmpb != NULL);
904
905     // setup our end of the binding
906     errval_t err = proc_mgmt_client_lmp_accept(lmpb, get_default_waitset(),
907                                                DEFAULT_LMP_BUF_WORDS);
908     if (err_is_fail(err)) {
909         free(lmpb);
910         return err_push(err, LIB_ERR_PROC_MGMT_CLIENT_ACCEPT);
911     }
912
913     *ep = lmpb->chan.local_cap;
914     lmpb->b.rx_vtbl = monitor_vtbl;
915
916     return SYS_ERR_OK;
917 }
918
919 static void export_cb(void *st, errval_t err, iref_t iref)
920 {
921     if (err_is_fail(err)) {
922         USER_PANIC_ERR(err, "export failed");
923     }
924
925     // Allocate an endpoint for the local monitor, who will use it to inform
926     // us about new spawnd irefs on behalf of other monitors.
927     struct capref ep;
928     err = alloc_ep_for_monitor(&ep);
929     if (err_is_fail(err)) {
930         USER_PANIC_ERR(err, "failed to allocate LMP EP for local monitor");
931     }
932
933     // Send the endpoint to the monitor, so it can finish the handshake.
934     struct monitor_binding *mb = get_monitor_binding();
935     err = mb->tx_vtbl.set_proc_mgmt_ep_request(mb, NOP_CONT, ep);
936     if (err_is_fail(err)) {
937         USER_PANIC_ERR(err, "failed to send set_proc_mgmt_ep_request to "
938                        "monitor");
939     }
940
941     // Also register this iref with the name service, for arbitrary client
942     // domains to use for spawn-related ops.
943     err = nameservice_register(SERVICE_BASENAME, iref);
944     if (err_is_fail(err)) {
945         USER_PANIC_ERR(err, "nameservice_register failed");
946     }
947 }
948
949 static errval_t connect_cb(void *st, struct proc_mgmt_binding *b)
950 {
951     b->rx_vtbl = non_monitor_vtbl;
952     return SYS_ERR_OK;
953 }
954
955 errval_t start_service(void)
956 {
957     return proc_mgmt_export(NULL, export_cb, connect_cb, get_default_waitset(),
958             IDC_EXPORT_FLAGS_DEFAULT);
959 }