usr/src/cmd/svc/startd/restarter.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  25  */
  26
  27 /*
  28  * restarter.c - service manipulation
  29  *
  30  * This component manages services whose restarter is svc.startd, the standard
  31  * restarter.  It translates restarter protocol events from the graph engine
  32  * into actions on processes, as a delegated restarter would do.
  33  *
  34  * The master restarter manages a number of always-running threads:
  35  *   - restarter event thread: events from the graph engine
  36  *   - timeout thread: thread to fire queued timeouts
  37  *   - contract thread: thread to handle contract events
  38  *   - wait thread: thread to handle wait-based services
  39  *
  40  * The other threads are created as-needed:
  41  *   - per-instance method threads
  42  *   - per-instance event processing threads
  43  *
  44  * The interaction of all threads must result in the following conditions
  45  * being satisfied (on a per-instance basis):
  46  *   - restarter events must be processed in order
  47  *   - method execution must be serialized
  48  *   - instance delete must be held until outstanding methods are complete
  49  *   - contract events shouldn't be processed while a method is running
  50  *   - timeouts should fire even when a method is running
  51  *
  52  * Service instances are represented by restarter_inst_t's and are kept in the
  53  * instance_list list.
  54  *
  55  * Service States
  56  *   The current state of a service instance is kept in
  57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
  58  *   some time, then before we effect the transition we set
  59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
  60  *   rotate i_next_state to i_state and set i_next_state to
  61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
  62  *   held.  The exception is when we launch methods, which are done with
  63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
  64  *   method_thread() does, we set ri_method_thread to the thread id of the
  65  *   method thread, and when it is nonzero any thread with a different thread id
  66  *   waits on ri_method_cv.
  67  *
  68  * Method execution is serialized by blocking on ri_method_cv in
  69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
  70  * also prevents the instance structure from being deleted until all
  71  * outstanding operations such as method_thread() have finished.
  72  *
  73  * Lock ordering:
  74  *
  75  * dgraph_lock [can be held when taking:]
  76  *   utmpx_lock
  77  *   dictionary->dict_lock
  78  *   st->st_load_lock
  79  *   wait_info_lock
  80  *   ru->restarter_update_lock
  81  *     restarter_queue->rpeq_lock
  82  *   instance_list.ril_lock
  83  *     inst->ri_lock
  84  *   st->st_configd_live_lock
  85  *
  86  * instance_list.ril_lock
  87  *   graph_queue->gpeq_lock
  88  *   gu->gu_lock
  89  *   st->st_configd_live_lock
  90  *   dictionary->dict_lock
  91  *   inst->ri_lock
  92  *     graph_queue->gpeq_lock
  93  *     gu->gu_lock
  94  *     tu->tu_lock
  95  *     tq->tq_lock
  96  *     inst->ri_queue_lock
  97  *       wait_info_lock
  98  *       bp->cb_lock
  99  *     utmpx_lock
 100  *
 101  * single_user_thread_lock
 102  *   wait_info_lock
 103  *   utmpx_lock
 104  *
 105  * gu_freeze_lock
 106  *
 107  * logbuf_mutex nests inside pretty much everything.
 108  */
 109
 110 #include <sys/contract/process.h>
 111 #include <sys/ctfs.h>
 112 #include <sys/stat.h>
 113 #include <sys/time.h>
 114 #include <sys/types.h>
 115 #include <sys/uio.h>
 116 #include <sys/wait.h>
 117 #include <assert.h>
 118 #include <errno.h>
 119 #include <fcntl.h>
 120 #include <libcontract.h>
 121 #include <libcontract_priv.h>
 122 #include <libintl.h>
 123 #include <librestart.h>
 124 #include <librestart_priv.h>
 125 #include <libuutil.h>
 126 #include <limits.h>
 127 #include <poll.h>
 128 #include <port.h>
 129 #include <pthread.h>
 130 #include <stdarg.h>
 131 #include <stdio.h>
 132 #include <strings.h>
 133 #include <unistd.h>
 134
 135 #include "startd.h"
 136 #include "protocol.h"
 137
 138 static uu_list_pool_t *restarter_instance_pool;
 139 static restarter_instance_list_t instance_list;
 140
 141 static uu_list_pool_t *restarter_queue_pool;
 142
 143 /*
 144  * Function used to reset the restart times for an instance, when
 145  * an administrative task comes along and essentially makes the times
 146  * in this array ineffective.
 147  */
 148 static void
 149 reset_start_times(restarter_inst_t *inst)
 150 {
 151         inst->ri_start_index = 0;
 152         bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
 153 }
 154
 155 /*ARGSUSED*/
 156 static int
 157 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
 158     void *private)
 159 {
 160         int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
 161         int rc_id = *(int *)rc_arg;
 162
 163         if (lc_id > rc_id)
 164                 return (1);
 165         if (lc_id < rc_id)
 166                 return (-1);
 167         return (0);
 168 }
 169
 170 static restarter_inst_t *
 171 inst_lookup_by_name(const char *name)
 172 {
 173         int id;
 174
 175         id = dict_lookup_byname(name);
 176         if (id == -1)
 177                 return (NULL);
 178
 179         return (inst_lookup_by_id(id));
 180 }
 181
 182 restarter_inst_t *
 183 inst_lookup_by_id(int id)
 184 {
 185         restarter_inst_t *inst;
 186
 187         MUTEX_LOCK(&instance_list.ril_lock);
 188         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 189         if (inst != NULL)
 190                 MUTEX_LOCK(&inst->ri_lock);
 191         MUTEX_UNLOCK(&instance_list.ril_lock);
 192
 193         if (inst != NULL) {
 194                 while (inst->ri_method_thread != 0 &&
 195                     !pthread_equal(inst->ri_method_thread, pthread_self())) {
 196                         ++inst->ri_method_waiters;
 197                         (void) pthread_cond_wait(&inst->ri_method_cv,
 198                             &inst->ri_lock);
 199                         assert(inst->ri_method_waiters > 0);
 200                         --inst->ri_method_waiters;
 201                 }
 202         }
 203
 204         return (inst);
 205 }
 206
 207 static restarter_inst_t *
 208 inst_lookup_queue(const char *name)
 209 {
 210         int id;
 211         restarter_inst_t *inst;
 212
 213         id = dict_lookup_byname(name);
 214         if (id == -1)
 215                 return (NULL);
 216
 217         MUTEX_LOCK(&instance_list.ril_lock);
 218         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 219         if (inst != NULL)
 220                 MUTEX_LOCK(&inst->ri_queue_lock);
 221         MUTEX_UNLOCK(&instance_list.ril_lock);
 222
 223         return (inst);
 224 }
 225
 226 const char *
 227 service_style(int flags)
 228 {
 229         switch (flags & RINST_STYLE_MASK) {
 230         case RINST_CONTRACT:    return ("contract");
 231         case RINST_TRANSIENT:   return ("transient");
 232         case RINST_WAIT:        return ("wait");
 233
 234         default:
 235 #ifndef NDEBUG
 236                 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
 237 #endif
 238                 abort();
 239                 /* NOTREACHED */
 240         }
 241 }
 242
 243 /*
 244  * Fails with ECONNABORTED or ECANCELED.
 245  */
 246 static int
 247 check_contract(restarter_inst_t *inst, boolean_t primary,
 248     scf_instance_t *scf_inst)
 249 {
 250         ctid_t *ctidp;
 251         int fd, r;
 252
 253         ctidp = primary ? &inst->ri_i.i_primary_ctid :
 254             &inst->ri_i.i_transient_ctid;
 255
 256         assert(*ctidp >= 1);
 257
 258         fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
 259         if (fd >= 0) {
 260                 r = close(fd);
 261                 assert(r == 0);
 262                 return (0);
 263         }
 264
 265         r = restarter_remove_contract(scf_inst, *ctidp, primary ?
 266             RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
 267         switch (r) {
 268         case 0:
 269         case ECONNABORTED:
 270         case ECANCELED:
 271                 *ctidp = 0;
 272                 return (r);
 273
 274         case ENOMEM:
 275                 uu_die("Out of memory\n");
 276                 /* NOTREACHED */
 277
 278         case EPERM:
 279                 uu_die("Insufficient privilege.\n");
 280                 /* NOTREACHED */
 281
 282         case EACCES:
 283                 uu_die("Repository backend access denied.\n");
 284                 /* NOTREACHED */
 285
 286         case EROFS:
 287                 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
 288                     "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
 289                 return (0);
 290
 291         case EINVAL:
 292         case EBADF:
 293         default:
 294                 assert(0);
 295                 abort();
 296                 /* NOTREACHED */
 297         }
 298 }
 299
 300 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
 301
 302 /*
 303  * int restarter_insert_inst(scf_handle_t *, char *)
 304  *   If the inst is already in the restarter list, return its id.  If the inst
 305  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
 306  *   states, insert it into the list, and return 0.
 307  *
 308  *   Fails with
 309  *     ENOENT - name is not in the repository
 310  */
 311 static int
 312 restarter_insert_inst(scf_handle_t *h, const char *name)
 313 {
 314         int id, r;
 315         restarter_inst_t *inst;
 316         uu_list_index_t idx;
 317         scf_service_t *scf_svc;
 318         scf_instance_t *scf_inst;
 319         scf_snapshot_t *snap = NULL;
 320         scf_propertygroup_t *pg;
 321         char *svc_name, *inst_name;
 322         char logfilebuf[PATH_MAX];
 323         char *c;
 324         boolean_t do_commit_states;
 325         restarter_instance_state_t state, next_state;
 326         protocol_states_t *ps;
 327         pid_t start_pid;
 328         restarter_str_t reason = restarter_str_insert_in_graph;
 329
 330         MUTEX_LOCK(&instance_list.ril_lock);
 331
 332         /*
 333          * We don't use inst_lookup_by_name() here because we want the lookup
 334          * & insert to be atomic.
 335          */
 336         id = dict_lookup_byname(name);
 337         if (id != -1) {
 338                 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
 339                     &idx);
 340                 if (inst != NULL) {
 341                         MUTEX_UNLOCK(&instance_list.ril_lock);
 342                         return (0);
 343                 }
 344         }
 345
 346         /* Allocate an instance */
 347         inst = startd_zalloc(sizeof (restarter_inst_t));
 348         inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
 349         inst->ri_utmpx_prefix[0] = '\0';
 350
 351         inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
 352         (void) strcpy((char *)inst->ri_i.i_fmri, name);
 353
 354         inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
 355
 356         /*
 357          * id shouldn't be -1 since we use the same dictionary as graph.c, but
 358          * just in case.
 359          */
 360         inst->ri_id = (id != -1 ? id : dict_insert(name));
 361
 362         special_online_hooks_get(name, &inst->ri_pre_online_hook,
 363             &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
 364
 365         scf_svc = safe_scf_service_create(h);
 366         scf_inst = safe_scf_instance_create(h);
 367         pg = safe_scf_pg_create(h);
 368         svc_name = startd_alloc(max_scf_name_size);
 369         inst_name = startd_alloc(max_scf_name_size);
 370
 371 rep_retry:
 372         if (snap != NULL)
 373                 scf_snapshot_destroy(snap);
 374         if (inst->ri_logstem != NULL)
 375                 startd_free(inst->ri_logstem, PATH_MAX);
 376         if (inst->ri_common_name != NULL)
 377                 startd_free(inst->ri_common_name,
 378                     strlen(inst->ri_common_name) + 1);
 379         if (inst->ri_C_common_name != NULL)
 380                 startd_free(inst->ri_C_common_name,
 381                     strlen(inst->ri_C_common_name) + 1);
 382         snap = NULL;
 383         inst->ri_logstem = NULL;
 384         inst->ri_common_name = NULL;
 385         inst->ri_C_common_name = NULL;
 386
 387         if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
 388             NULL, SCF_DECODE_FMRI_EXACT) != 0) {
 389                 switch (scf_error()) {
 390                 case SCF_ERROR_CONNECTION_BROKEN:
 391                         libscf_handle_rebind(h);
 392                         goto rep_retry;
 393
 394                 case SCF_ERROR_NOT_FOUND:
 395                         goto deleted;
 396                 }
 397
 398                 uu_die("Can't decode FMRI %s: %s\n", name,
 399                     scf_strerror(scf_error()));
 400         }
 401
 402         /*
 403          * If there's no running snapshot, then we execute using the editing
 404          * snapshot.  Pending snapshots will be taken later.
 405          */
 406         snap = libscf_get_running_snapshot(scf_inst);
 407
 408         if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
 409             (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
 410             0)) {
 411                 switch (scf_error()) {
 412                 case SCF_ERROR_NOT_SET:
 413                         break;
 414
 415                 case SCF_ERROR_CONNECTION_BROKEN:
 416                         libscf_handle_rebind(h);
 417                         goto rep_retry;
 418
 419                 default:
 420                         assert(0);
 421                         abort();
 422                 }
 423
 424                 goto deleted;
 425         }
 426
 427         (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
 428         for (c = logfilebuf; *c != '\0'; c++)
 429                 if (*c == '/')
 430                         *c = '-';
 431
 432         inst->ri_logstem = startd_alloc(PATH_MAX);
 433         (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
 434             LOG_SUFFIX);
 435
 436         /*
 437          * If the restarter group is missing, use uninit/none.  Otherwise,
 438          * we're probably being restarted & don't want to mess up the states
 439          * that are there.
 440          */
 441         state = RESTARTER_STATE_UNINIT;
 442         next_state = RESTARTER_STATE_NONE;
 443
 444         r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
 445         if (r != 0) {
 446                 switch (scf_error()) {
 447                 case SCF_ERROR_CONNECTION_BROKEN:
 448                         libscf_handle_rebind(h);
 449                         goto rep_retry;
 450
 451                 case SCF_ERROR_NOT_SET:
 452                         goto deleted;
 453
 454                 case SCF_ERROR_NOT_FOUND:
 455                         /*
 456                          * This shouldn't happen since the graph engine should
 457                          * have initialized the state to uninitialized/none if
 458                          * there was no restarter pg.  In case somebody
 459                          * deleted it, though....
 460                          */
 461                         do_commit_states = B_TRUE;
 462                         break;
 463
 464                 default:
 465                         assert(0);
 466                         abort();
 467                 }
 468         } else {
 469                 r = libscf_read_states(pg, &state, &next_state);
 470                 if (r != 0) {
 471                         do_commit_states = B_TRUE;
 472                 } else {
 473                         if (next_state != RESTARTER_STATE_NONE) {
 474                                 /*
 475                                  * Force next_state to _NONE since we
 476                                  * don't look for method processes.
 477                                  */
 478                                 next_state = RESTARTER_STATE_NONE;
 479                                 do_commit_states = B_TRUE;
 480                         } else {
 481                                 /*
 482                                  * The reason for transition will depend on
 483                                  * state.
 484                                  */
 485                                 if (st->st_initial == 0)
 486                                         reason = restarter_str_startd_restart;
 487                                 else if (state == RESTARTER_STATE_MAINT)
 488                                         reason = restarter_str_bad_repo_state;
 489                                 /*
 490                                  * Inform the restarter of our state without
 491                                  * changing the STIME in the repository.
 492                                  */
 493                                 ps = startd_alloc(sizeof (*ps));
 494                                 inst->ri_i.i_state = ps->ps_state = state;
 495                                 inst->ri_i.i_next_state = ps->ps_state_next =
 496                                     next_state;
 497                                 ps->ps_reason = reason;
 498
 499                                 graph_protocol_send_event(inst->ri_i.i_fmri,
 500                                     GRAPH_UPDATE_STATE_CHANGE, ps);
 501
 502                                 do_commit_states = B_FALSE;
 503                         }
 504                 }
 505         }
 506
 507         switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
 508             &inst->ri_utmpx_prefix)) {
 509         case 0:
 510                 break;
 511
 512         case ECONNABORTED:
 513                 libscf_handle_rebind(h);
 514                 goto rep_retry;
 515
 516         case ECANCELED:
 517                 goto deleted;
 518
 519         case ENOENT:
 520                 /*
 521                  * This is odd, because the graph engine should have required
 522                  * the general property group.  So we'll just use default
 523                  * flags in anticipation of the graph engine sending us
 524                  * REMOVE_INSTANCE when it finds out that the general property
 525                  * group has been deleted.
 526                  */
 527                 inst->ri_flags = RINST_CONTRACT;
 528                 break;
 529
 530         default:
 531                 assert(0);
 532                 abort();
 533         }
 534
 535         r = libscf_get_template_values(scf_inst, snap,
 536             &inst->ri_common_name, &inst->ri_C_common_name);
 537
 538         /*
 539          * Copy our names to smaller buffers to reduce our memory footprint.
 540          */
 541         if (inst->ri_common_name != NULL) {
 542                 char *tmp = safe_strdup(inst->ri_common_name);
 543                 startd_free(inst->ri_common_name, max_scf_value_size);
 544                 inst->ri_common_name = tmp;
 545         }
 546
 547         if (inst->ri_C_common_name != NULL) {
 548                 char *tmp = safe_strdup(inst->ri_C_common_name);
 549                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 550                 inst->ri_C_common_name = tmp;
 551         }
 552
 553         switch (r) {
 554         case 0:
 555                 break;
 556
 557         case ECONNABORTED:
 558                 libscf_handle_rebind(h);
 559                 goto rep_retry;
 560
 561         case ECANCELED:
 562                 goto deleted;
 563
 564         case ECHILD:
 565         case ENOENT:
 566                 break;
 567
 568         default:
 569                 assert(0);
 570                 abort();
 571         }
 572
 573         switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
 574             &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
 575             &start_pid)) {
 576         case 0:
 577                 break;
 578
 579         case ECONNABORTED:
 580                 libscf_handle_rebind(h);
 581                 goto rep_retry;
 582
 583         case ECANCELED:
 584                 goto deleted;
 585
 586         default:
 587                 assert(0);
 588                 abort();
 589         }
 590
 591         if (inst->ri_i.i_primary_ctid >= 1) {
 592                 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
 593
 594                 switch (check_contract(inst, B_TRUE, scf_inst)) {
 595                 case 0:
 596                         break;
 597
 598                 case ECONNABORTED:
 599                         libscf_handle_rebind(h);
 600                         goto rep_retry;
 601
 602                 case ECANCELED:
 603                         goto deleted;
 604
 605                 default:
 606                         assert(0);
 607                         abort();
 608                 }
 609         }
 610
 611         if (inst->ri_i.i_transient_ctid >= 1) {
 612                 switch (check_contract(inst, B_FALSE, scf_inst)) {
 613                 case 0:
 614                         break;
 615
 616                 case ECONNABORTED:
 617                         libscf_handle_rebind(h);
 618                         goto rep_retry;
 619
 620                 case ECANCELED:
 621                         goto deleted;
 622
 623                 default:
 624                         assert(0);
 625                         abort();
 626                 }
 627         }
 628
 629         /* No more failures we live through, so add it to the list. */
 630         (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
 631         (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
 632         MUTEX_LOCK(&inst->ri_lock);
 633         MUTEX_LOCK(&inst->ri_queue_lock);
 634
 635         (void) pthread_cond_init(&inst->ri_method_cv, NULL);
 636
 637         uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
 638         uu_list_insert(instance_list.ril_instance_list, inst, idx);
 639         MUTEX_UNLOCK(&instance_list.ril_lock);
 640
 641         if (start_pid != -1 &&
 642             (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
 643                 int ret;
 644                 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
 645                 if (ret == -1) {
 646                         /*
 647                          * Implication:  if we can't reregister the
 648                          * instance, we will start another one.  Two
 649                          * instances may or may not result in a resource
 650                          * conflict.
 651                          */
 652                         log_error(LOG_WARNING,
 653                             "%s: couldn't reregister %ld for wait\n",
 654                             inst->ri_i.i_fmri, start_pid);
 655                 } else if (ret == 1) {
 656                         /*
 657                          * Leading PID has exited.
 658                          */
 659                         (void) stop_instance(h, inst, RSTOP_EXIT);
 660                 }
 661         }
 662
 663
 664         scf_pg_destroy(pg);
 665
 666         if (do_commit_states)
 667                 (void) restarter_instance_update_states(h, inst, state,
 668                     next_state, RERR_NONE, reason);
 669
 670         log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
 671             service_style(inst->ri_flags));
 672
 673         MUTEX_UNLOCK(&inst->ri_queue_lock);
 674         MUTEX_UNLOCK(&inst->ri_lock);
 675
 676         startd_free(svc_name, max_scf_name_size);
 677         startd_free(inst_name, max_scf_name_size);
 678         scf_snapshot_destroy(snap);
 679         scf_instance_destroy(scf_inst);
 680         scf_service_destroy(scf_svc);
 681
 682         log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
 683             name);
 684
 685         return (0);
 686
 687 deleted:
 688         MUTEX_UNLOCK(&instance_list.ril_lock);
 689         startd_free(inst_name, max_scf_name_size);
 690         startd_free(svc_name, max_scf_name_size);
 691         if (snap != NULL)
 692                 scf_snapshot_destroy(snap);
 693         scf_pg_destroy(pg);
 694         scf_instance_destroy(scf_inst);
 695         scf_service_destroy(scf_svc);
 696         startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
 697         uu_list_destroy(inst->ri_queue);
 698         if (inst->ri_logstem != NULL)
 699                 startd_free(inst->ri_logstem, PATH_MAX);
 700         if (inst->ri_common_name != NULL)
 701                 startd_free(inst->ri_common_name,
 702                     strlen(inst->ri_common_name) + 1);
 703         if (inst->ri_C_common_name != NULL)
 704                 startd_free(inst->ri_C_common_name,
 705                     strlen(inst->ri_C_common_name) + 1);
 706         startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
 707         startd_free(inst, sizeof (restarter_inst_t));
 708         return (ENOENT);
 709 }
 710
 711 static void
 712 restarter_delete_inst(restarter_inst_t *ri)
 713 {
 714         int id;
 715         restarter_inst_t *rip;
 716         void *cookie = NULL;
 717         restarter_instance_qentry_t *e;
 718
 719         assert(MUTEX_HELD(&ri->ri_lock));
 720
 721         /*
 722          * Must drop the instance lock so we can pick up the instance_list
 723          * lock & remove the instance.
 724          */
 725         id = ri->ri_id;
 726         MUTEX_UNLOCK(&ri->ri_lock);
 727
 728         MUTEX_LOCK(&instance_list.ril_lock);
 729
 730         rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 731         if (rip == NULL) {
 732                 MUTEX_UNLOCK(&instance_list.ril_lock);
 733                 return;
 734         }
 735
 736         assert(ri == rip);
 737
 738         uu_list_remove(instance_list.ril_instance_list, ri);
 739
 740         log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
 741             ri->ri_i.i_fmri);
 742
 743         MUTEX_UNLOCK(&instance_list.ril_lock);
 744
 745         /*
 746          * We can lock the instance without holding the instance_list lock
 747          * since we removed the instance from the list.
 748          */
 749         MUTEX_LOCK(&ri->ri_lock);
 750         MUTEX_LOCK(&ri->ri_queue_lock);
 751
 752         if (ri->ri_i.i_primary_ctid >= 1)
 753                 contract_hash_remove(ri->ri_i.i_primary_ctid);
 754
 755         while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
 756                 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
 757
 758         while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
 759                 startd_free(e, sizeof (*e));
 760         uu_list_destroy(ri->ri_queue);
 761
 762         startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
 763         startd_free(ri->ri_logstem, PATH_MAX);
 764         if (ri->ri_common_name != NULL)
 765                 startd_free(ri->ri_common_name,
 766                     strlen(ri->ri_common_name) + 1);
 767         if (ri->ri_C_common_name != NULL)
 768                 startd_free(ri->ri_C_common_name,
 769                     strlen(ri->ri_C_common_name) + 1);
 770         startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
 771         (void) pthread_mutex_destroy(&ri->ri_lock);
 772         (void) pthread_mutex_destroy(&ri->ri_queue_lock);
 773         startd_free(ri, sizeof (restarter_inst_t));
 774 }
 775
 776 /*
 777  * instance_is_wait_style()
 778  *
 779  *   Returns 1 if the given instance is a "wait-style" service instance.
 780  */
 781 int
 782 instance_is_wait_style(restarter_inst_t *inst)
 783 {
 784         assert(MUTEX_HELD(&inst->ri_lock));
 785         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
 786 }
 787
 788 /*
 789  * instance_is_transient_style()
 790  *
 791  *   Returns 1 if the given instance is a transient service instance.
 792  */
 793 int
 794 instance_is_transient_style(restarter_inst_t *inst)
 795 {
 796         assert(MUTEX_HELD(&inst->ri_lock));
 797         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
 798 }
 799
 800 /*
 801  * instance_in_transition()
 802  * Returns 1 if instance is in transition, 0 if not
 803  */
 804 int
 805 instance_in_transition(restarter_inst_t *inst)
 806 {
 807         assert(MUTEX_HELD(&inst->ri_lock));
 808         if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
 809                 return (0);
 810         return (1);
 811 }
 812
 813 /*
 814  * returns 1 if instance is already started, 0 if not
 815  */
 816 static int
 817 instance_started(restarter_inst_t *inst)
 818 {
 819         int ret;
 820
 821         assert(MUTEX_HELD(&inst->ri_lock));
 822
 823         if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
 824             inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
 825                 ret = 1;
 826         else
 827                 ret = 0;
 828
 829         return (ret);
 830 }
 831
 832 /*
 833  * Returns
 834  *   0 - success
 835  *   ECONNRESET - success, but h was rebound
 836  */
 837 int
 838 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
 839     restarter_instance_state_t new_state,
 840     restarter_instance_state_t new_state_next, restarter_error_t err,
 841     restarter_str_t reason)
 842 {
 843         protocol_states_t *states;
 844         int e;
 845         uint_t retry_count = 0, msecs = ALLOC_DELAY;
 846         boolean_t rebound = B_FALSE;
 847         int prev_state_online;
 848         int state_online;
 849
 850         assert(MUTEX_HELD(&ri->ri_lock));
 851
 852         prev_state_online = instance_started(ri);
 853
 854 retry:
 855         e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
 856             restarter_get_str_short(reason));
 857         switch (e) {
 858         case 0:
 859                 break;
 860
 861         case ENOMEM:
 862                 ++retry_count;
 863                 if (retry_count < ALLOC_RETRY) {
 864                         (void) poll(NULL, 0, msecs);
 865                         msecs *= ALLOC_DELAY_MULT;
 866                         goto retry;
 867                 }
 868
 869                 /* Like startd_alloc(). */
 870                 uu_die("Insufficient memory.\n");
 871                 /* NOTREACHED */
 872
 873         case ECONNABORTED:
 874                 libscf_handle_rebind(h);
 875                 rebound = B_TRUE;
 876                 goto retry;
 877
 878         case EPERM:
 879         case EACCES:
 880         case EROFS:
 881                 log_error(LOG_NOTICE, "Could not commit state change for %s "
 882                     "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
 883                 /* FALLTHROUGH */
 884
 885         case ENOENT:
 886                 ri->ri_i.i_state = new_state;
 887                 ri->ri_i.i_next_state = new_state_next;
 888                 break;
 889
 890         case EINVAL:
 891         default:
 892                 bad_error("_restarter_commit_states", e);
 893         }
 894
 895         states = startd_alloc(sizeof (protocol_states_t));
 896         states->ps_state = new_state;
 897         states->ps_state_next = new_state_next;
 898         states->ps_err = err;
 899         states->ps_reason = reason;
 900         graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
 901             (void *)states);
 902
 903         state_online = instance_started(ri);
 904
 905         if (prev_state_online && !state_online)
 906                 ri->ri_post_offline_hook();
 907         else if (!prev_state_online && state_online)
 908                 ri->ri_post_online_hook();
 909
 910         return (rebound ? ECONNRESET : 0);
 911 }
 912
 913 void
 914 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
 915 {
 916         restarter_inst_t *inst;
 917
 918         assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
 919
 920         inst = inst_lookup_by_name(fmri);
 921         if (inst == NULL)
 922                 return;
 923
 924         inst->ri_flags |= flag;
 925
 926         MUTEX_UNLOCK(&inst->ri_lock);
 927 }
 928
 929 static void
 930 restarter_take_pending_snapshots(scf_handle_t *h)
 931 {
 932         restarter_inst_t *inst;
 933         int r;
 934
 935         MUTEX_LOCK(&instance_list.ril_lock);
 936
 937         for (inst = uu_list_first(instance_list.ril_instance_list);
 938             inst != NULL;
 939             inst = uu_list_next(instance_list.ril_instance_list, inst)) {
 940                 const char *fmri;
 941                 scf_instance_t *sinst = NULL;
 942
 943                 MUTEX_LOCK(&inst->ri_lock);
 944
 945                 /*
 946                  * This is where we'd check inst->ri_method_thread and if it
 947                  * were nonzero we'd wait in anticipation of another thread
 948                  * executing a method for inst.  Doing so with the instance_list
 949                  * locked, though, leads to deadlock.  Since taking a snapshot
 950                  * during that window won't hurt anything, we'll just continue.
 951                  */
 952
 953                 fmri = inst->ri_i.i_fmri;
 954
 955                 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
 956                         scf_snapshot_t *rsnap;
 957
 958                         (void) libscf_fmri_get_instance(h, fmri, &sinst);
 959
 960                         rsnap = libscf_get_or_make_running_snapshot(sinst,
 961                             fmri, B_FALSE);
 962
 963                         scf_instance_destroy(sinst);
 964
 965                         if (rsnap != NULL)
 966                                 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
 967
 968                         scf_snapshot_destroy(rsnap);
 969                 }
 970
 971                 if (inst->ri_flags & RINST_RETAKE_START) {
 972                         switch (r = libscf_snapshots_poststart(h, fmri,
 973                             B_FALSE)) {
 974                         case 0:
 975                         case ENOENT:
 976                                 inst->ri_flags &= ~RINST_RETAKE_START;
 977                                 break;
 978
 979                         case ECONNABORTED:
 980                                 break;
 981
 982                         case EACCES:
 983                         default:
 984                                 bad_error("libscf_snapshots_poststart", r);
 985                         }
 986                 }
 987
 988                 MUTEX_UNLOCK(&inst->ri_lock);
 989         }
 990
 991         MUTEX_UNLOCK(&instance_list.ril_lock);
 992 }
 993
 994 /* ARGSUSED */
 995 void *
 996 restarter_post_fsminimal_thread(void *unused)
 997 {
 998         scf_handle_t *h;
 999         int r;
1000
1001         h = libscf_handle_create_bound_loop();
1002
1003         for (;;) {
1004                 r = libscf_create_self(h);
1005                 if (r == 0)
1006                         break;
1007
1008                 assert(r == ECONNABORTED);
1009                 libscf_handle_rebind(h);
1010         }
1011
1012         restarter_take_pending_snapshots(h);
1013
1014         (void) scf_handle_unbind(h);
1015         scf_handle_destroy(h);
1016
1017         return (NULL);
1018 }
1019
1020 /*
1021  * int stop_instance()
1022  *
1023  *   Stop the instance identified by the instance given as the second argument,
1024  *   for the cause stated.
1025  *
1026  *   Returns
1027  *     0 - success
1028  *     -1 - inst is in transition
1029  */
1030 static int
1031 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1032     stop_cause_t cause)
1033 {
1034         fork_info_t *info;
1035         const char *cp;
1036         int err;
1037         restarter_error_t re;
1038         restarter_str_t reason;
1039
1040         assert(MUTEX_HELD(&inst->ri_lock));
1041         assert(inst->ri_method_thread == 0);
1042
1043         switch (cause) {
1044         case RSTOP_EXIT:
1045                 re = RERR_RESTART;
1046                 reason = restarter_str_ct_ev_exit;
1047                 cp = "all processes in service exited";
1048                 break;
1049         case RSTOP_CORE:
1050                 re = RERR_FAULT;
1051                 reason = restarter_str_ct_ev_core;
1052                 cp = "process dumped core";
1053                 break;
1054         case RSTOP_SIGNAL:
1055                 re = RERR_FAULT;
1056                 reason = restarter_str_ct_ev_signal;
1057                 cp = "process received fatal signal from outside the service";
1058                 break;
1059         case RSTOP_HWERR:
1060                 re = RERR_FAULT;
1061                 reason = restarter_str_ct_ev_hwerr;
1062                 cp = "process killed due to uncorrectable hardware error";
1063                 break;
1064         case RSTOP_DEPENDENCY:
1065                 re = RERR_RESTART;
1066                 reason = restarter_str_dependency_activity;
1067                 cp = "dependency activity requires stop";
1068                 break;
1069         case RSTOP_DISABLE:
1070                 re = RERR_RESTART;
1071                 reason = restarter_str_disable_request;
1072                 cp = "service disabled";
1073                 break;
1074         case RSTOP_RESTART:
1075                 re = RERR_RESTART;
1076                 reason = restarter_str_restart_request;
1077                 cp = "service restarting";
1078                 break;
1079         default:
1080 #ifndef NDEBUG
1081                 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1082                     cause, __FILE__, __LINE__);
1083 #endif
1084                 abort();
1085         }
1086
1087         /* Services in the disabled and maintenance state are ignored */
1088         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1089             inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1090                 log_framework(LOG_DEBUG,
1091                     "%s: stop_instance -> is maint/disabled\n",
1092                     inst->ri_i.i_fmri);
1093                 return (0);
1094         }
1095
1096         /* Already stopped instances are left alone */
1097         if (instance_started(inst) == 0) {
1098                 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1099                     inst->ri_i.i_fmri);
1100                 return (0);
1101         }
1102
1103         if (instance_in_transition(inst)) {
1104                 /* requeue event by returning -1 */
1105                 log_framework(LOG_DEBUG,
1106                     "Restarter: Not stopping %s, in transition.\n",
1107                     inst->ri_i.i_fmri);
1108                 return (-1);
1109         }
1110
1111         log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1112
1113         log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1114             "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1115
1116         if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1117                 /*
1118                  * No need to stop instance, as child has exited; remove
1119                  * contract and move the instance to the offline state.
1120                  */
1121                 switch (err = restarter_instance_update_states(local_handle,
1122                     inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1123                     reason)) {
1124                 case 0:
1125                 case ECONNRESET:
1126                         break;
1127
1128                 default:
1129                         bad_error("restarter_instance_update_states", err);
1130                 }
1131
1132                 (void) update_fault_count(inst, FAULT_COUNT_RESET);
1133                 reset_start_times(inst);
1134
1135                 if (inst->ri_i.i_primary_ctid != 0) {
1136                         inst->ri_m_inst =
1137                             safe_scf_instance_create(local_handle);
1138                         inst->ri_mi_deleted = B_FALSE;
1139
1140                         libscf_reget_instance(inst);
1141                         method_remove_contract(inst, B_TRUE, B_TRUE);
1142
1143                         scf_instance_destroy(inst->ri_m_inst);
1144                         inst->ri_m_inst = NULL;
1145                 }
1146
1147                 switch (err = restarter_instance_update_states(local_handle,
1148                     inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1149                     reason)) {
1150                 case 0:
1151                 case ECONNRESET:
1152                         break;
1153
1154                 default:
1155                         bad_error("restarter_instance_update_states", err);
1156                 }
1157
1158                 return (0);
1159         } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1160                 /*
1161                  * Stopping a wait service through means other than the pid
1162                  * exiting should keep wait_thread() from restarting the
1163                  * service, by removing it from the wait list.
1164                  * We cannot remove it right now otherwise the process will
1165                  * end up <defunct> so mark it to be ignored.
1166                  */
1167                 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1168         }
1169
1170         switch (err = restarter_instance_update_states(local_handle, inst,
1171             inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1172             RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
1173         case 0:
1174         case ECONNRESET:
1175                 break;
1176
1177         default:
1178                 bad_error("restarter_instance_update_states", err);
1179         }
1180
1181         info = startd_zalloc(sizeof (fork_info_t));
1182
1183         info->sf_id = inst->ri_id;
1184         info->sf_method_type = METHOD_STOP;
1185         info->sf_event_type = re;
1186         info->sf_reason = reason;
1187         inst->ri_method_thread = startd_thread_create(method_thread, info);
1188
1189         return (0);
1190 }
1191
1192 /*
1193  * Returns
1194  *   ENOENT - fmri is not in instance_list
1195  *   0 - success
1196  *   ECONNRESET - success, though handle was rebound
1197  *   -1 - instance is in transition
1198  */
1199 int
1200 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1201 {
1202         restarter_inst_t *rip;
1203         int r;
1204
1205         rip = inst_lookup_by_name(fmri);
1206         if (rip == NULL)
1207                 return (ENOENT);
1208
1209         r = stop_instance(h, rip, flags);
1210
1211         MUTEX_UNLOCK(&rip->ri_lock);
1212
1213         return (r);
1214 }
1215
1216 static void
1217 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1218     unmaint_cause_t cause)
1219 {
1220         ctid_t ctid;
1221         scf_instance_t *inst;
1222         int r;
1223         uint_t tries = 0, msecs = ALLOC_DELAY;
1224         const char *cp;
1225         restarter_str_t reason;
1226
1227         assert(MUTEX_HELD(&rip->ri_lock));
1228
1229         if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1230                 log_error(LOG_DEBUG, "Restarter: "
1231                     "Ignoring maintenance off command because %s is not in the "
1232                     "maintenance state.\n", rip->ri_i.i_fmri);
1233                 return;
1234         }
1235
1236         switch (cause) {
1237         case RUNMAINT_CLEAR:
1238                 cp = "clear requested";
1239                 reason = restarter_str_clear_request;
1240                 break;
1241         case RUNMAINT_DISABLE:
1242                 cp = "disable requested";
1243                 reason = restarter_str_disable_request;
1244                 break;
1245         default:
1246 #ifndef NDEBUG
1247                 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1248                     cause, __FILE__, __LINE__);
1249 #endif
1250                 abort();
1251         }
1252
1253         log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1254             cp);
1255         log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1256             "%s.\n", rip->ri_i.i_fmri, cp);
1257
1258         (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1259             RESTARTER_STATE_NONE, RERR_RESTART, reason);
1260
1261         /*
1262          * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1263          * a primary contract.
1264          */
1265         if (rip->ri_i.i_primary_ctid == 0)
1266                 return;
1267
1268         ctid = rip->ri_i.i_primary_ctid;
1269         contract_abandon(ctid);
1270         rip->ri_i.i_primary_ctid = 0;
1271
1272 rep_retry:
1273         switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1274         case 0:
1275                 break;
1276
1277         case ECONNABORTED:
1278                 libscf_handle_rebind(h);
1279                 goto rep_retry;
1280
1281         case ENOENT:
1282                 /* Must have been deleted. */
1283                 return;
1284
1285         case EINVAL:
1286         case ENOTSUP:
1287         default:
1288                 bad_error("libscf_handle_rebind", r);
1289         }
1290
1291 again:
1292         r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1293         switch (r) {
1294         case 0:
1295                 break;
1296
1297         case ENOMEM:
1298                 ++tries;
1299                 if (tries < ALLOC_RETRY) {
1300                         (void) poll(NULL, 0, msecs);
1301                         msecs *= ALLOC_DELAY_MULT;
1302                         goto again;
1303                 }
1304
1305                 uu_die("Insufficient memory.\n");
1306                 /* NOTREACHED */
1307
1308         case ECONNABORTED:
1309                 scf_instance_destroy(inst);
1310                 libscf_handle_rebind(h);
1311                 goto rep_retry;
1312
1313         case ECANCELED:
1314                 break;
1315
1316         case EPERM:
1317         case EACCES:
1318         case EROFS:
1319                 log_error(LOG_INFO,
1320                     "Could not remove contract id %lu for %s (%s).\n", ctid,
1321                     rip->ri_i.i_fmri, strerror(r));
1322                 break;
1323
1324         case EINVAL:
1325         case EBADF:
1326         default:
1327                 bad_error("restarter_remove_contract", r);
1328         }
1329
1330         scf_instance_destroy(inst);
1331 }
1332
1333 /*
1334  * enable_inst()
1335  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1336  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1337  *   disabled, move it to offline.  If the event is _DISABLE or
1338  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1339  *
1340  *   Returns
1341  *     0 - success
1342  *     ECONNRESET - h was rebound
1343  */
1344 static int
1345 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1346     restarter_instance_qentry_t *riq)
1347 {
1348         restarter_instance_state_t state;
1349         restarter_event_type_t e = riq->riq_type;
1350         restarter_str_t reason = restarter_str_per_configuration;
1351         int r;
1352
1353         assert(MUTEX_HELD(&inst->ri_lock));
1354         assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1355             e == RESTARTER_EVENT_TYPE_DISABLE ||
1356             e == RESTARTER_EVENT_TYPE_ENABLE);
1357         assert(instance_in_transition(inst) == 0);
1358
1359         state = inst->ri_i.i_state;
1360
1361         if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1362                 inst->ri_i.i_enabled = 1;
1363
1364                 if (state == RESTARTER_STATE_UNINIT ||
1365                     state == RESTARTER_STATE_DISABLED) {
1366                         /*
1367                          * B_FALSE: Don't log an error if the log_instance()
1368                          * fails because it will fail on the miniroot before
1369                          * install-discovery runs.
1370                          */
1371                         log_instance(inst, B_FALSE, "Enabled.");
1372                         log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1373                             inst->ri_i.i_fmri);
1374
1375                         /*
1376                          * If we are coming from DISABLED, it was obviously an
1377                          * enable request. If we are coming from UNINIT, it may
1378                          * have been a sevice in MAINT that was cleared.
1379                          */
1380                         if (riq->riq_reason == restarter_str_clear_request)
1381                                 reason = restarter_str_clear_request;
1382                         else if (state == RESTARTER_STATE_DISABLED)
1383                                 reason = restarter_str_enable_request;
1384                         (void) restarter_instance_update_states(h, inst,
1385                             RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1386                             RERR_NONE, reason);
1387                 } else {
1388                         log_framework(LOG_DEBUG, "Restarter: "
1389                             "Not changing state of %s for enable command.\n",
1390                             inst->ri_i.i_fmri);
1391                 }
1392         } else {
1393                 inst->ri_i.i_enabled = 0;
1394
1395                 switch (state) {
1396                 case RESTARTER_STATE_ONLINE:
1397                 case RESTARTER_STATE_DEGRADED:
1398                         r = stop_instance(h, inst, RSTOP_DISABLE);
1399                         return (r == ECONNRESET ? 0 : r);
1400
1401                 case RESTARTER_STATE_OFFLINE:
1402                 case RESTARTER_STATE_UNINIT:
1403                         if (inst->ri_i.i_primary_ctid != 0) {
1404                                 inst->ri_m_inst = safe_scf_instance_create(h);
1405                                 inst->ri_mi_deleted = B_FALSE;
1406
1407                                 libscf_reget_instance(inst);
1408                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1409
1410                                 scf_instance_destroy(inst->ri_m_inst);
1411                         }
1412                         /* B_FALSE: See log_instance(..., "Enabled."); above */
1413                         log_instance(inst, B_FALSE, "Disabled.");
1414                         log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1415                             inst->ri_i.i_fmri);
1416
1417                         /*
1418                          * If we are coming from OFFLINE, it was obviously a
1419                          * disable request. But if we are coming from
1420                          * UNINIT, it may have been a disable request for a
1421                          * service in MAINT.
1422                          */
1423                         if (riq->riq_reason == restarter_str_disable_request ||
1424                             state == RESTARTER_STATE_OFFLINE)
1425                                 reason = restarter_str_disable_request;
1426                         (void) restarter_instance_update_states(h, inst,
1427                             RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1428                             RERR_RESTART, reason);
1429                         return (0);
1430
1431                 case RESTARTER_STATE_DISABLED:
1432                         break;
1433
1434                 case RESTARTER_STATE_MAINT:
1435                         /*
1436                          * We only want to pull the instance out of maintenance
1437                          * if the disable is on adminstrative request.  The
1438                          * graph engine sends _DISABLE events whenever a
1439                          * service isn't in the disabled state, and we don't
1440                          * want to pull the service out of maintenance if,
1441                          * for example, it is there due to a dependency cycle.
1442                          */
1443                         if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1444                                 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1445                         break;
1446
1447                 default:
1448 #ifndef NDEBUG
1449                         (void) fprintf(stderr, "Restarter instance %s has "
1450                             "unknown state %d.\n", inst->ri_i.i_fmri, state);
1451 #endif
1452                         abort();
1453                 }
1454         }
1455
1456         return (0);
1457 }
1458
1459 static void
1460 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1461     int32_t reason)
1462 {
1463         fork_info_t *info;
1464         restarter_str_t new_reason;
1465
1466         assert(MUTEX_HELD(&inst->ri_lock));
1467         assert(instance_in_transition(inst) == 0);
1468         assert(inst->ri_method_thread == 0);
1469
1470         log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1471             inst->ri_i.i_fmri);
1472
1473         /*
1474          * We want to keep the original reason for restarts and clear actions
1475          */
1476         switch (reason) {
1477         case restarter_str_restart_request:
1478         case restarter_str_clear_request:
1479                 new_reason = reason;
1480                 break;
1481         default:
1482                 new_reason = restarter_str_dependencies_satisfied;
1483         }
1484
1485         /* Services in the disabled and maintenance state are ignored */
1486         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1487             inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1488             inst->ri_i.i_enabled == 0) {
1489                 log_framework(LOG_DEBUG,
1490                     "%s: start_instance -> is maint/disabled\n",
1491                     inst->ri_i.i_fmri);
1492                 return;
1493         }
1494
1495         /* Already started instances are left alone */
1496         if (instance_started(inst) == 1) {
1497                 log_framework(LOG_DEBUG,
1498                     "%s: start_instance -> is already started\n",
1499                     inst->ri_i.i_fmri);
1500                 return;
1501         }
1502
1503         log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1504
1505         (void) restarter_instance_update_states(local_handle, inst,
1506             inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1507
1508         info = startd_zalloc(sizeof (fork_info_t));
1509
1510         info->sf_id = inst->ri_id;
1511         info->sf_method_type = METHOD_START;
1512         info->sf_event_type = RERR_NONE;
1513         info->sf_reason = new_reason;
1514         inst->ri_method_thread = startd_thread_create(method_thread, info);
1515 }
1516
1517 static int
1518 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1519 {
1520         scf_instance_t *inst;
1521         int ret = 0;
1522
1523         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1524                 return (-1);
1525
1526         ret = restarter_inst_ractions_from_tty(inst);
1527
1528         scf_instance_destroy(inst);
1529         return (ret);
1530 }
1531
1532 static void
1533 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1534     restarter_str_t reason)
1535 {
1536         fork_info_t *info;
1537         scf_instance_t *scf_inst = NULL;
1538
1539         assert(MUTEX_HELD(&rip->ri_lock));
1540         assert(reason != restarter_str_none);
1541         assert(rip->ri_method_thread == 0);
1542
1543         log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1544             restarter_get_str_short(reason));
1545         log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1546             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1547
1548         /* Services in the maintenance state are ignored */
1549         if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1550                 log_framework(LOG_DEBUG,
1551                     "%s: maintain_instance -> is already in maintenance\n",
1552                     rip->ri_i.i_fmri);
1553                 return;
1554         }
1555
1556         /*
1557          * If reason state is restarter_str_service_request and
1558          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1559          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1560          */
1561         if (reason == restarter_str_service_request &&
1562             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1563                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1564                         if (restarter_inst_set_aux_fmri(scf_inst))
1565                                 log_framework(LOG_DEBUG, "%s: "
1566                                     "restarter_inst_set_aux_fmri failed: ",
1567                                     rip->ri_i.i_fmri);
1568                 } else {
1569                         log_framework(LOG_DEBUG, "%s: "
1570                             "restarter_inst_validate_ractions_aux_fmri "
1571                             "failed: ", rip->ri_i.i_fmri);
1572
1573                         if (restarter_inst_reset_aux_fmri(scf_inst))
1574                                 log_framework(LOG_DEBUG, "%s: "
1575                                     "restarter_inst_reset_aux_fmri failed: ",
1576                                     rip->ri_i.i_fmri);
1577                 }
1578                 scf_instance_destroy(scf_inst);
1579         }
1580
1581         if (immediate || !instance_started(rip)) {
1582                 if (rip->ri_i.i_primary_ctid != 0) {
1583                         rip->ri_m_inst = safe_scf_instance_create(h);
1584                         rip->ri_mi_deleted = B_FALSE;
1585
1586                         libscf_reget_instance(rip);
1587                         method_remove_contract(rip, B_TRUE, B_TRUE);
1588
1589                         scf_instance_destroy(rip->ri_m_inst);
1590                 }
1591
1592                 (void) restarter_instance_update_states(h, rip,
1593                     RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1594                     reason);
1595                 return;
1596         }
1597
1598         (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1599             RESTARTER_STATE_MAINT, RERR_NONE, reason);
1600
1601         log_transition(rip, MAINT_REQUESTED);
1602
1603         info = startd_zalloc(sizeof (*info));
1604         info->sf_id = rip->ri_id;
1605         info->sf_method_type = METHOD_STOP;
1606         info->sf_event_type = RERR_RESTART;
1607         info->sf_reason = reason;
1608         rip->ri_method_thread = startd_thread_create(method_thread, info);
1609 }
1610
1611 static void
1612 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1613 {
1614         scf_instance_t *inst;
1615         scf_snapshot_t *snap;
1616         fork_info_t *info;
1617         int r;
1618
1619         assert(MUTEX_HELD(&rip->ri_lock));
1620
1621         log_instance(rip, B_TRUE, "Rereading configuration.");
1622         log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1623             rip->ri_i.i_fmri);
1624
1625 rep_retry:
1626         r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1627         switch (r) {
1628         case 0:
1629                 break;
1630
1631         case ECONNABORTED:
1632                 libscf_handle_rebind(h);
1633                 goto rep_retry;
1634
1635         case ENOENT:
1636                 /* Must have been deleted. */
1637                 return;
1638
1639         case EINVAL:
1640         case ENOTSUP:
1641         default:
1642                 bad_error("libscf_fmri_get_instance", r);
1643         }
1644
1645         snap = libscf_get_running_snapshot(inst);
1646
1647         r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1648             &rip->ri_utmpx_prefix);
1649         switch (r) {
1650         case 0:
1651                 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1652                     rip->ri_i.i_fmri, service_style(rip->ri_flags));
1653                 break;
1654
1655         case ECONNABORTED:
1656                 scf_instance_destroy(inst);
1657                 scf_snapshot_destroy(snap);
1658                 libscf_handle_rebind(h);
1659                 goto rep_retry;
1660
1661         case ECANCELED:
1662         case ENOENT:
1663                 /* Succeed in anticipation of REMOVE_INSTANCE. */
1664                 break;
1665
1666         default:
1667                 bad_error("libscf_get_startd_properties", r);
1668         }
1669
1670         if (instance_started(rip)) {
1671                 /* Refresh does not change the state. */
1672                 (void) restarter_instance_update_states(h, rip,
1673                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1674                     restarter_str_refresh);
1675
1676                 info = startd_zalloc(sizeof (*info));
1677                 info->sf_id = rip->ri_id;
1678                 info->sf_method_type = METHOD_REFRESH;
1679                 info->sf_event_type = RERR_REFRESH;
1680                 info->sf_reason = NULL;
1681
1682                 assert(rip->ri_method_thread == 0);
1683                 rip->ri_method_thread =
1684                     startd_thread_create(method_thread, info);
1685         }
1686
1687         scf_snapshot_destroy(snap);
1688         scf_instance_destroy(inst);
1689 }
1690
1691 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1692         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1693         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1694         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1695         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1696 };
1697
1698 /*
1699  * void *restarter_process_events()
1700  *
1701  *   Called in a separate thread to process the events on an instance's
1702  *   queue.  Empties the queue completely, and tries to keep the thread
1703  *   around for a little while after the queue is empty to save on
1704  *   startup costs.
1705  */
1706 static void *
1707 restarter_process_events(void *arg)
1708 {
1709         scf_handle_t *h;
1710         restarter_instance_qentry_t *event;
1711         restarter_inst_t *rip;
1712         char *fmri = (char *)arg;
1713         struct timespec to;
1714
1715         assert(fmri != NULL);
1716
1717         h = libscf_handle_create_bound_loop();
1718
1719         /* grab the queue lock */
1720         rip = inst_lookup_queue(fmri);
1721         if (rip == NULL)
1722                 goto out;
1723
1724 again:
1725
1726         while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1727                 restarter_inst_t *inst;
1728
1729                 /* drop the queue lock */
1730                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1731
1732                 /*
1733                  * Grab the inst lock -- this waits until any outstanding
1734                  * method finishes running.
1735                  */
1736                 inst = inst_lookup_by_name(fmri);
1737                 if (inst == NULL) {
1738                         /* Getting deleted in the middle isn't an error. */
1739                         goto cont;
1740                 }
1741
1742                 assert(instance_in_transition(inst) == 0);
1743
1744                 /* process the event */
1745                 switch (event->riq_type) {
1746                 case RESTARTER_EVENT_TYPE_ENABLE:
1747                 case RESTARTER_EVENT_TYPE_DISABLE:
1748                         (void) enable_inst(h, inst, event);
1749                         break;
1750
1751                 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1752                         if (enable_inst(h, inst, event) == 0)
1753                                 reset_start_times(inst);
1754                         break;
1755
1756                 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1757                         restarter_delete_inst(inst);
1758                         inst = NULL;
1759                         goto cont;
1760
1761                 case RESTARTER_EVENT_TYPE_STOP_RESET:
1762                         reset_start_times(inst);
1763                         /* FALLTHROUGH */
1764                 case RESTARTER_EVENT_TYPE_STOP:
1765                         (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1766                         break;
1767
1768                 case RESTARTER_EVENT_TYPE_START:
1769                         start_instance(h, inst, event->riq_reason);
1770                         break;
1771
1772                 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1773                         maintain_instance(h, inst, 0,
1774                             restarter_str_dependency_cycle);
1775                         break;
1776
1777                 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1778                         maintain_instance(h, inst, 0,
1779                             restarter_str_invalid_dependency);
1780                         break;
1781
1782                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1783                         if (event_from_tty(h, inst) == 0)
1784                                 maintain_instance(h, inst, 0,
1785                                     restarter_str_service_request);
1786                         else
1787                                 maintain_instance(h, inst, 0,
1788                                     restarter_str_administrative_request);
1789                         break;
1790
1791                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1792                         if (event_from_tty(h, inst) == 0)
1793                                 maintain_instance(h, inst, 1,
1794                                     restarter_str_service_request);
1795                         else
1796                                 maintain_instance(h, inst, 1,
1797                                     restarter_str_administrative_request);
1798                         break;
1799
1800                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1801                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1802                         reset_start_times(inst);
1803                         break;
1804
1805                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1806                         refresh_instance(h, inst);
1807                         break;
1808
1809                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1810                         log_framework(LOG_WARNING, "Restarter: "
1811                             "%s command (for %s) unimplemented.\n",
1812                             event_names[event->riq_type], inst->ri_i.i_fmri);
1813                         break;
1814
1815                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1816                         if (!instance_started(inst)) {
1817                                 log_framework(LOG_DEBUG, "Restarter: "
1818                                     "Not restarting %s; not running.\n",
1819                                     inst->ri_i.i_fmri);
1820                         } else {
1821                                 /*
1822                                  * Stop the instance.  If it can be restarted,
1823                                  * the graph engine will send a new event.
1824                                  */
1825                                 if (stop_instance(h, inst, RSTOP_RESTART) == 0)
1826                                         reset_start_times(inst);
1827                         }
1828                         break;
1829
1830                 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1831                 default:
1832 #ifndef NDEBUG
1833                         uu_warn("%s:%d: Bad restarter event %d.  "
1834                             "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1835 #endif
1836                         abort();
1837                 }
1838
1839                 assert(inst != NULL);
1840                 MUTEX_UNLOCK(&inst->ri_lock);
1841
1842 cont:
1843                 /* grab the queue lock */
1844                 rip = inst_lookup_queue(fmri);
1845                 if (rip == NULL)
1846                         goto out;
1847
1848                 /* delete the event */
1849                 uu_list_remove(rip->ri_queue, event);
1850                 startd_free(event, sizeof (restarter_instance_qentry_t));
1851         }
1852
1853         assert(rip != NULL);
1854
1855         /*
1856          * Try to preserve the thread for a little while for future use.
1857          */
1858         to.tv_sec = 3;
1859         to.tv_nsec = 0;
1860         (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1861             &rip->ri_queue_lock, &to);
1862
1863         if (uu_list_first(rip->ri_queue) != NULL)
1864                 goto again;
1865
1866         rip->ri_queue_thread = 0;
1867         MUTEX_UNLOCK(&rip->ri_queue_lock);
1868
1869 out:
1870         (void) scf_handle_unbind(h);
1871         scf_handle_destroy(h);
1872         free(fmri);
1873         return (NULL);
1874 }
1875
1876 static int
1877 is_admin_event(restarter_event_type_t t) {
1878
1879         switch (t) {
1880         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1881         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1882         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1883         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1884         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1885         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1886                 return (1);
1887         default:
1888                 return (0);
1889         }
1890 }
1891
1892 static void
1893 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1894 {
1895         restarter_instance_qentry_t *qe;
1896         int r;
1897
1898         assert(MUTEX_HELD(&ri->ri_queue_lock));
1899         assert(!MUTEX_HELD(&ri->ri_lock));
1900
1901         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1902         qe->riq_type = e->rpe_type;
1903         qe->riq_reason = e->rpe_reason;
1904
1905         uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1906         r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1907         assert(r == 0);
1908 }
1909
1910 /*
1911  * void *restarter_event_thread()
1912  *
1913  *  Handle incoming graph events by placing them on a per-instance
1914  *  queue.  We can't lock the main part of the instance structure, so
1915  *  just modify the seprarately locked event queue portion.
1916  */
1917 /*ARGSUSED*/
1918 static void *
1919 restarter_event_thread(void *unused)
1920 {
1921         scf_handle_t *h;
1922
1923         /*
1924          * This is a new thread, and thus, gets its own handle
1925          * to the repository.
1926          */
1927         h = libscf_handle_create_bound_loop();
1928
1929         MUTEX_LOCK(&ru->restarter_update_lock);
1930
1931         /*CONSTCOND*/
1932         while (1) {
1933                 restarter_protocol_event_t *e;
1934
1935                 while (ru->restarter_update_wakeup == 0)
1936                         (void) pthread_cond_wait(&ru->restarter_update_cv,
1937                             &ru->restarter_update_lock);
1938
1939                 ru->restarter_update_wakeup = 0;
1940
1941                 while ((e = restarter_event_dequeue()) != NULL) {
1942                         restarter_inst_t *rip;
1943                         char *fmri;
1944
1945                         MUTEX_UNLOCK(&ru->restarter_update_lock);
1946
1947                         /*
1948                          * ADD_INSTANCE is special: there's likely no
1949                          * instance structure yet, so we need to handle the
1950                          * addition synchronously.
1951                          */
1952                         switch (e->rpe_type) {
1953                         case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1954                                 if (restarter_insert_inst(h, e->rpe_inst) != 0)
1955                                         log_error(LOG_INFO, "Restarter: "
1956                                             "Could not add %s.\n", e->rpe_inst);
1957
1958                                 MUTEX_LOCK(&st->st_load_lock);
1959                                 if (--st->st_load_instances == 0)
1960                                         (void) pthread_cond_broadcast(
1961                                             &st->st_load_cv);
1962                                 MUTEX_UNLOCK(&st->st_load_lock);
1963
1964                                 goto nolookup;
1965                         }
1966
1967                         /*
1968                          * Lookup the instance, locking only the event queue.
1969                          * Can't grab ri_lock here because it might be held
1970                          * by a long-running method.
1971                          */
1972                         rip = inst_lookup_queue(e->rpe_inst);
1973                         if (rip == NULL) {
1974                                 log_error(LOG_INFO, "Restarter: "
1975                                     "Ignoring %s command for unknown service "
1976                                     "%s.\n", event_names[e->rpe_type],
1977                                     e->rpe_inst);
1978                                 goto nolookup;
1979                         }
1980
1981                         /* Keep ADMIN events from filling up the queue. */
1982                         if (is_admin_event(e->rpe_type) &&
1983                             uu_list_numnodes(rip->ri_queue) >
1984                             RINST_QUEUE_THRESHOLD) {
1985                                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1986                                 log_instance(rip, B_TRUE, "Instance event "
1987                                     "queue overflow.  Dropping administrative "
1988                                     "request.");
1989                                 log_framework(LOG_DEBUG, "%s: Instance event "
1990                                     "queue overflow.  Dropping administrative "
1991                                     "request.\n", rip->ri_i.i_fmri);
1992                                 goto nolookup;
1993                         }
1994
1995                         /* Now add the event to the instance queue. */
1996                         restarter_queue_event(rip, e);
1997
1998                         if (rip->ri_queue_thread == 0) {
1999                                 /*
2000                                  * Start a thread if one isn't already
2001                                  * running.
2002                                  */
2003                                 fmri = safe_strdup(e->rpe_inst);
2004                                 rip->ri_queue_thread =  startd_thread_create(
2005                                     restarter_process_events, (void *)fmri);
2006                         } else {
2007                                 /*
2008                                  * Signal the existing thread that there's
2009                                  * a new event.
2010                                  */
2011                                 (void) pthread_cond_broadcast(
2012                                     &rip->ri_queue_cv);
2013                         }
2014
2015                         MUTEX_UNLOCK(&rip->ri_queue_lock);
2016 nolookup:
2017                         restarter_event_release(e);
2018
2019                         MUTEX_LOCK(&ru->restarter_update_lock);
2020                 }
2021         }
2022
2023         /*
2024          * Unreachable for now -- there's currently no graceful cleanup
2025          * called on exit().
2026          */
2027         (void) scf_handle_unbind(h);
2028         scf_handle_destroy(h);
2029         return (NULL);
2030 }
2031
2032 static restarter_inst_t *
2033 contract_to_inst(ctid_t ctid)
2034 {
2035         restarter_inst_t *inst;
2036         int id;
2037
2038         id = lookup_inst_by_contract(ctid);
2039         if (id == -1)
2040                 return (NULL);
2041
2042         inst = inst_lookup_by_id(id);
2043         if (inst != NULL) {
2044                 /*
2045                  * Since ri_lock isn't held by the contract id lookup, this
2046                  * instance may have been restarted and now be in a new
2047                  * contract, making the old contract no longer valid for this
2048                  * instance.
2049                  */
2050                 if (ctid != inst->ri_i.i_primary_ctid) {
2051                         MUTEX_UNLOCK(&inst->ri_lock);
2052                         inst = NULL;
2053                 }
2054         }
2055         return (inst);
2056 }
2057
2058 /*
2059  * void contract_action()
2060  *   Take action on contract events.
2061  */
2062 static void
2063 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2064     uint32_t type)
2065 {
2066         const char *fmri = inst->ri_i.i_fmri;
2067
2068         assert(MUTEX_HELD(&inst->ri_lock));
2069
2070         /*
2071          * If startd has stopped this contract, there is no need to
2072          * stop it again.
2073          */
2074         if (inst->ri_i.i_primary_ctid > 0 &&
2075             inst->ri_i.i_primary_ctid_stopped)
2076                 return;
2077
2078         if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2079             | CT_PR_EV_HWERR)) == 0) {
2080                 /*
2081                  * There shouldn't be other events, since that's not how we set
2082                  * the terms. Thus, just log an error and drive on.
2083                  */
2084                 log_framework(LOG_NOTICE,
2085                     "%s: contract %ld received unexpected critical event "
2086                     "(%d)\n", fmri, id, type);
2087                 return;
2088         }
2089
2090         assert(instance_in_transition(inst) == 0);
2091
2092         if (instance_is_wait_style(inst)) {
2093                 /*
2094                  * We ignore all events; if they impact the
2095                  * process we're monitoring, then the
2096                  * wait_thread will stop the instance.
2097                  */
2098                 log_framework(LOG_DEBUG,
2099                     "%s: ignoring contract event on wait-style service\n",
2100                     fmri);
2101         } else {
2102                 /*
2103                  * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2104                  */
2105                 switch (type) {
2106                 case CT_PR_EV_EMPTY:
2107                         (void) stop_instance(h, inst, RSTOP_EXIT);
2108                         break;
2109                 case CT_PR_EV_CORE:
2110                         (void) stop_instance(h, inst, RSTOP_CORE);
2111                         break;
2112                 case CT_PR_EV_SIGNAL:
2113                         (void) stop_instance(h, inst, RSTOP_SIGNAL);
2114                         break;
2115                 case CT_PR_EV_HWERR:
2116                         (void) stop_instance(h, inst, RSTOP_HWERR);
2117                         break;
2118                 }
2119         }
2120 }
2121
2122 /*
2123  * void *restarter_contract_event_thread(void *)
2124  *   Listens to the process contract bundle for critical events, taking action
2125  *   on events from contracts we know we are responsible for.
2126  */
2127 /*ARGSUSED*/
2128 static void *
2129 restarter_contracts_event_thread(void *unused)
2130 {
2131         int fd, err;
2132         scf_handle_t *local_handle;
2133
2134         /*
2135          * Await graph load completion.  That is, stop here, until we've scanned
2136          * the repository for contract - instance associations.
2137          */
2138         MUTEX_LOCK(&st->st_load_lock);
2139         while (!(st->st_load_complete && st->st_load_instances == 0))
2140                 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2141         MUTEX_UNLOCK(&st->st_load_lock);
2142
2143         /*
2144          * This is a new thread, and thus, gets its own handle
2145          * to the repository.
2146          */
2147         if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2148                 uu_die("Unable to bind a new repository handle: %s\n",
2149                     scf_strerror(scf_error()));
2150
2151         fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2152         if (fd == -1)
2153                 uu_die("process bundle open failed");
2154
2155         /*
2156          * Make sure we get all events (including those generated by configd
2157          * before this thread was started).
2158          */
2159         err = ct_event_reset(fd);
2160         assert(err == 0);
2161
2162         for (;;) {
2163                 int efd, sfd;
2164                 ct_evthdl_t ev;
2165                 uint32_t type;
2166                 ctevid_t evid;
2167                 ct_stathdl_t status;
2168                 ctid_t ctid;
2169                 restarter_inst_t *inst;
2170                 uint64_t cookie;
2171
2172                 if (err = ct_event_read_critical(fd, &ev)) {
2173                         log_error(LOG_WARNING,
2174                             "Error reading next contract event: %s",
2175                             strerror(err));
2176                         continue;
2177                 }
2178
2179                 evid = ct_event_get_evid(ev);
2180                 ctid = ct_event_get_ctid(ev);
2181                 type = ct_event_get_type(ev);
2182
2183                 /* Fetch cookie. */
2184                 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2185                     < 0) {
2186                         ct_event_free(ev);
2187                         continue;
2188                 }
2189
2190                 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2191                         log_framework(LOG_WARNING, "Could not get status for "
2192                             "contract %ld: %s\n", ctid, strerror(err));
2193
2194                         startd_close(sfd);
2195                         ct_event_free(ev);
2196                         continue;
2197                 }
2198
2199                 cookie = ct_status_get_cookie(status);
2200
2201                 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2202                     "cookie %lld\n", type, ctid, cookie);
2203
2204                 ct_status_free(status);
2205
2206                 startd_close(sfd);
2207
2208                 /*
2209                  * svc.configd(1M) restart handling performed by the
2210                  * fork_configd_thread.  We don't acknowledge, as that thread
2211                  * will do so.
2212                  */
2213                 if (cookie == CONFIGD_COOKIE) {
2214                         ct_event_free(ev);
2215                         continue;
2216                 }
2217
2218                 inst = NULL;
2219                 if (storing_contract != 0 &&
2220                     (inst = contract_to_inst(ctid)) == NULL) {
2221                         /*
2222                          * This can happen for two reasons:
2223                          * - method_run() has not yet stored the
2224                          *    the contract into the internal hash table.
2225                          * - we receive an EMPTY event for an abandoned
2226                          *    contract.
2227                          * If there is any contract in the process of
2228                          * being stored into the hash table then re-read
2229                          * the event later.
2230                          */
2231                         log_framework(LOG_DEBUG,
2232                             "Reset event %d for unknown "
2233                             "contract id %ld\n", type, ctid);
2234
2235                         /* don't go too fast */
2236                         (void) poll(NULL, 0, 100);
2237
2238                         (void) ct_event_reset(fd);
2239                         ct_event_free(ev);
2240                         continue;
2241                 }
2242
2243                 /*
2244                  * Do not call contract_to_inst() again if first
2245                  * call succeeded.
2246                  */
2247                 if (inst == NULL)
2248                         inst = contract_to_inst(ctid);
2249                 if (inst == NULL) {
2250                         /*
2251                          * This can happen if we receive an EMPTY
2252                          * event for an abandoned contract.
2253                          */
2254                         log_framework(LOG_DEBUG,
2255                             "Received event %d for unknown contract id "
2256                             "%ld\n", type, ctid);
2257                 } else {
2258                         log_framework(LOG_DEBUG,
2259                             "Received event %d for contract id "
2260                             "%ld (%s)\n", type, ctid,
2261                             inst->ri_i.i_fmri);
2262
2263                         contract_action(local_handle, inst, ctid, type);
2264
2265                         MUTEX_UNLOCK(&inst->ri_lock);
2266                 }
2267
2268                 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2269                     O_WRONLY);
2270                 if (efd != -1) {
2271                         (void) ct_ctl_ack(efd, evid);
2272                         startd_close(efd);
2273                 }
2274
2275                 ct_event_free(ev);
2276
2277         }
2278
2279         /*NOTREACHED*/
2280         return (NULL);
2281 }
2282
2283 /*
2284  * Timeout queue, processed by restarter_timeouts_event_thread().
2285  */
2286 timeout_queue_t *timeouts;
2287 static uu_list_pool_t *timeout_pool;
2288
2289 typedef struct timeout_update {
2290         pthread_mutex_t         tu_lock;
2291         pthread_cond_t          tu_cv;
2292         int                     tu_wakeup;
2293 } timeout_update_t;
2294
2295 timeout_update_t *tu;
2296
2297 static const char *timeout_ovr_svcs[] = {
2298         "svc:/system/manifest-import:default",
2299         "svc:/network/initial:default",
2300         "svc:/network/service:default",
2301         "svc:/system/rmtmpfiles:default",
2302         "svc:/network/loopback:default",
2303         "svc:/network/physical:default",
2304         "svc:/system/device/local:default",
2305         "svc:/system/metainit:default",
2306         "svc:/system/filesystem/usr:default",
2307         "svc:/system/filesystem/minimal:default",
2308         "svc:/system/filesystem/local:default",
2309         NULL
2310 };
2311
2312 int
2313 is_timeout_ovr(restarter_inst_t *inst)
2314 {
2315         int i;
2316
2317         for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2318                 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2319                         log_instance(inst, B_TRUE, "Timeout override by "
2320                             "svc.startd.  Using infinite timeout.");
2321                         return (1);
2322                 }
2323         }
2324
2325         return (0);
2326 }
2327
2328 /*ARGSUSED*/
2329 static int
2330 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2331 {
2332         hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2333         hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2334
2335         if (t1 > t2)
2336                 return (1);
2337         else if (t1 < t2)
2338                 return (-1);
2339         return (0);
2340 }
2341
2342 void
2343 timeout_init()
2344 {
2345         timeouts = startd_zalloc(sizeof (timeout_queue_t));
2346
2347         (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2348
2349         timeout_pool = startd_list_pool_create("timeouts",
2350             sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2351             timeout_compare, UU_LIST_POOL_DEBUG);
2352         assert(timeout_pool != NULL);
2353
2354         timeouts->tq_list = startd_list_create(timeout_pool,
2355             timeouts, UU_LIST_SORTED);
2356         assert(timeouts->tq_list != NULL);
2357
2358         tu = startd_zalloc(sizeof (timeout_update_t));
2359         (void) pthread_cond_init(&tu->tu_cv, NULL);
2360         (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2361 }
2362
2363 void
2364 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2365 {
2366         hrtime_t now, timeout;
2367         timeout_entry_t *entry;
2368         uu_list_index_t idx;
2369
2370         assert(MUTEX_HELD(&inst->ri_lock));
2371
2372         now = gethrtime();
2373
2374         /*
2375          * If we overflow LLONG_MAX, we're never timing out anyways, so
2376          * just return.
2377          */
2378         if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2379                 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2380                     "treating as infinite.");
2381                 return;
2382         }
2383
2384         /* hrtime is in nanoseconds. Convert timeout_sec. */
2385         timeout = now + (timeout_sec * 1000000000LL);
2386
2387         entry = startd_alloc(sizeof (timeout_entry_t));
2388         entry->te_timeout = timeout;
2389         entry->te_ctid = cid;
2390         entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2391         entry->te_logstem = safe_strdup(inst->ri_logstem);
2392         entry->te_fired = 0;
2393         /* Insert the calculated timeout time onto the queue. */
2394         MUTEX_LOCK(&timeouts->tq_lock);
2395         (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2396         uu_list_node_init(entry, &entry->te_link, timeout_pool);
2397         uu_list_insert(timeouts->tq_list, entry, idx);
2398         MUTEX_UNLOCK(&timeouts->tq_lock);
2399
2400         assert(inst->ri_timeout == NULL);
2401         inst->ri_timeout = entry;
2402
2403         MUTEX_LOCK(&tu->tu_lock);
2404         tu->tu_wakeup = 1;
2405         (void) pthread_cond_broadcast(&tu->tu_cv);
2406         MUTEX_UNLOCK(&tu->tu_lock);
2407 }
2408
2409
2410 void
2411 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2412 {
2413         assert(MUTEX_HELD(&inst->ri_lock));
2414
2415         if (inst->ri_timeout == NULL)
2416                 return;
2417
2418         assert(inst->ri_timeout->te_ctid == cid);
2419
2420         MUTEX_LOCK(&timeouts->tq_lock);
2421         uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2422         MUTEX_UNLOCK(&timeouts->tq_lock);
2423
2424         free(inst->ri_timeout->te_fmri);
2425         free(inst->ri_timeout->te_logstem);
2426         startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2427         inst->ri_timeout = NULL;
2428 }
2429
2430 static int
2431 timeout_now()
2432 {
2433         timeout_entry_t *e;
2434         hrtime_t now;
2435         int ret;
2436
2437         now = gethrtime();
2438
2439         /*
2440          * Walk through the (sorted) timeouts list.  While the timeout
2441          * at the head of the list is <= the current time, kill the
2442          * method.
2443          */
2444         MUTEX_LOCK(&timeouts->tq_lock);
2445
2446         for (e = uu_list_first(timeouts->tq_list);
2447             e != NULL && e->te_timeout <= now;
2448             e = uu_list_next(timeouts->tq_list, e)) {
2449                 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2450                     "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2451                 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2452                     "Method or service exit timed out.  Killing contract %ld.",
2453                     e->te_ctid);
2454                 e->te_fired = 1;
2455                 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2456         }
2457
2458         if (uu_list_numnodes(timeouts->tq_list) > 0)
2459                 ret = 0;
2460         else
2461                 ret = -1;
2462
2463         MUTEX_UNLOCK(&timeouts->tq_lock);
2464
2465         return (ret);
2466 }
2467
2468 /*
2469  * void *restarter_timeouts_event_thread(void *)
2470  *   Responsible for monitoring the method timeouts.  This thread must
2471  *   be started before any methods are called.
2472  */
2473 /*ARGSUSED*/
2474 static void *
2475 restarter_timeouts_event_thread(void *unused)
2476 {
2477         /*
2478          * Timeouts are entered on a priority queue, which is processed by
2479          * this thread.  As timeouts are specified in seconds, we'll do
2480          * the necessary processing every second, as long as the queue
2481          * is not empty.
2482          */
2483
2484         /*CONSTCOND*/
2485         while (1) {
2486                 /*
2487                  * As long as the timeout list isn't empty, process it
2488                  * every second.
2489                  */
2490                 if (timeout_now() == 0) {
2491                         (void) sleep(1);
2492                         continue;
2493                 }
2494
2495                 /* The list is empty, wait until we have more timeouts. */
2496                 MUTEX_LOCK(&tu->tu_lock);
2497
2498                 while (tu->tu_wakeup == 0)
2499                         (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2500
2501                 tu->tu_wakeup = 0;
2502                 MUTEX_UNLOCK(&tu->tu_lock);
2503         }
2504
2505         return (NULL);
2506 }
2507
2508 void
2509 restarter_start()
2510 {
2511         (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2512         (void) startd_thread_create(restarter_event_thread, NULL);
2513         (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2514         (void) startd_thread_create(wait_thread, NULL);
2515 }
2516
2517
2518 void
2519 restarter_init()
2520 {
2521         restarter_instance_pool = startd_list_pool_create("restarter_instances",
2522             sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2523             ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2524         (void) memset(&instance_list, 0, sizeof (instance_list));
2525
2526         (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2527         instance_list.ril_instance_list = startd_list_create(
2528             restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2529
2530         restarter_queue_pool = startd_list_pool_create(
2531             "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2532             offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2533             UU_LIST_POOL_DEBUG);
2534
2535         contract_list_pool = startd_list_pool_create(
2536             "contract_list", sizeof (contract_entry_t),
2537             offsetof(contract_entry_t,  ce_link), NULL,
2538             UU_LIST_POOL_DEBUG);
2539         contract_hash_init();
2540
2541         log_framework(LOG_DEBUG, "Initialized restarter\n");
2542 }