usr/src/cmd/svc/startd/restarter.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25
  26 /*
  27  * restarter.c - service manipulation
  28  *
  29  * This component manages services whose restarter is svc.startd, the standard
  30  * restarter.  It translates restarter protocol events from the graph engine
  31  * into actions on processes, as a delegated restarter would do.
  32  *
  33  * The master restarter manages a number of always-running threads:
  34  *   - restarter event thread: events from the graph engine
  35  *   - timeout thread: thread to fire queued timeouts
  36  *   - contract thread: thread to handle contract events
  37  *   - wait thread: thread to handle wait-based services
  38  *
  39  * The other threads are created as-needed:
  40  *   - per-instance method threads
  41  *   - per-instance event processing threads
  42  *
  43  * The interaction of all threads must result in the following conditions
  44  * being satisfied (on a per-instance basis):
  45  *   - restarter events must be processed in order
  46  *   - method execution must be serialized
  47  *   - instance delete must be held until outstanding methods are complete
  48  *   - contract events shouldn't be processed while a method is running
  49  *   - timeouts should fire even when a method is running
  50  *
  51  * Service instances are represented by restarter_inst_t's and are kept in the
  52  * instance_list list.
  53  *
  54  * Service States
  55  *   The current state of a service instance is kept in
  56  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
  57  *   some time, then before we effect the transition we set
  58  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
  59  *   rotate i_next_state to i_state and set i_next_state to
  60  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
  61  *   held.  The exception is when we launch methods, which are done with
  62  *   a separate thread.  To keep any other threads from grabbing ri_lock before
  63  *   method_thread() does, we set ri_method_thread to the thread id of the
  64  *   method thread, and when it is nonzero any thread with a different thread id
  65  *   waits on ri_method_cv.
  66  *
  67  * Method execution is serialized by blocking on ri_method_cv in
  68  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
  69  * also prevents the instance structure from being deleted until all
  70  * outstanding operations such as method_thread() have finished.
  71  *
  72  * Lock ordering:
  73  *
  74  * dgraph_lock [can be held when taking:]
  75  *   utmpx_lock
  76  *   dictionary->dict_lock
  77  *   st->st_load_lock
  78  *   wait_info_lock
  79  *   ru->restarter_update_lock
  80  *     restarter_queue->rpeq_lock
  81  *   instance_list.ril_lock
  82  *     inst->ri_lock
  83  *   st->st_configd_live_lock
  84  *
  85  * instance_list.ril_lock
  86  *   graph_queue->gpeq_lock
  87  *   gu->gu_lock
  88  *   st->st_configd_live_lock
  89  *   dictionary->dict_lock
  90  *   inst->ri_lock
  91  *     graph_queue->gpeq_lock
  92  *     gu->gu_lock
  93  *     tu->tu_lock
  94  *     tq->tq_lock
  95  *     inst->ri_queue_lock
  96  *       wait_info_lock
  97  *       bp->cb_lock
  98  *     utmpx_lock
  99  *
 100  * single_user_thread_lock
 101  *   wait_info_lock
 102  *   utmpx_lock
 103  *
 104  * gu_freeze_lock
 105  *
 106  * logbuf_mutex nests inside pretty much everything.
 107  */
 108
 109 #include <sys/contract/process.h>
 110 #include <sys/ctfs.h>
 111 #include <sys/stat.h>
 112 #include <sys/time.h>
 113 #include <sys/types.h>
 114 #include <sys/uio.h>
 115 #include <sys/wait.h>
 116 #include <assert.h>
 117 #include <errno.h>
 118 #include <fcntl.h>
 119 #include <libcontract.h>
 120 #include <libcontract_priv.h>
 121 #include <libintl.h>
 122 #include <librestart.h>
 123 #include <librestart_priv.h>
 124 #include <libuutil.h>
 125 #include <limits.h>
 126 #include <poll.h>
 127 #include <port.h>
 128 #include <pthread.h>
 129 #include <stdarg.h>
 130 #include <stdio.h>
 131 #include <strings.h>
 132 #include <unistd.h>
 133
 134 #include "startd.h"
 135 #include "protocol.h"
 136
 137 static uu_list_pool_t *restarter_instance_pool;
 138 static restarter_instance_list_t instance_list;
 139
 140 static uu_list_pool_t *restarter_queue_pool;
 141
 142 /*
 143  * Function used to reset the restart times for an instance, when
 144  * an administrative task comes along and essentially makes the times
 145  * in this array ineffective.
 146  */
 147 static void
 148 reset_start_times(restarter_inst_t *inst)
 149 {
 150         inst->ri_start_index = 0;
 151         bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
 152 }
 153
 154 /*ARGSUSED*/
 155 static int
 156 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
 157     void *private)
 158 {
 159         int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
 160         int rc_id = *(int *)rc_arg;
 161
 162         if (lc_id > rc_id)
 163                 return (1);
 164         if (lc_id < rc_id)
 165                 return (-1);
 166         return (0);
 167 }
 168
 169 static restarter_inst_t *
 170 inst_lookup_by_name(const char *name)
 171 {
 172         int id;
 173
 174         id = dict_lookup_byname(name);
 175         if (id == -1)
 176                 return (NULL);
 177
 178         return (inst_lookup_by_id(id));
 179 }
 180
 181 restarter_inst_t *
 182 inst_lookup_by_id(int id)
 183 {
 184         restarter_inst_t *inst;
 185
 186         MUTEX_LOCK(&instance_list.ril_lock);
 187         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 188         if (inst != NULL)
 189                 MUTEX_LOCK(&inst->ri_lock);
 190         MUTEX_UNLOCK(&instance_list.ril_lock);
 191
 192         if (inst != NULL) {
 193                 while (inst->ri_method_thread != 0 &&
 194                     !pthread_equal(inst->ri_method_thread, pthread_self())) {
 195                         ++inst->ri_method_waiters;
 196                         (void) pthread_cond_wait(&inst->ri_method_cv,
 197                             &inst->ri_lock);
 198                         assert(inst->ri_method_waiters > 0);
 199                         --inst->ri_method_waiters;
 200                 }
 201         }
 202
 203         return (inst);
 204 }
 205
 206 static restarter_inst_t *
 207 inst_lookup_queue(const char *name)
 208 {
 209         int id;
 210         restarter_inst_t *inst;
 211
 212         id = dict_lookup_byname(name);
 213         if (id == -1)
 214                 return (NULL);
 215
 216         MUTEX_LOCK(&instance_list.ril_lock);
 217         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 218         if (inst != NULL)
 219                 MUTEX_LOCK(&inst->ri_queue_lock);
 220         MUTEX_UNLOCK(&instance_list.ril_lock);
 221
 222         return (inst);
 223 }
 224
 225 const char *
 226 service_style(int flags)
 227 {
 228         switch (flags & RINST_STYLE_MASK) {
 229         case RINST_CONTRACT:    return ("contract");
 230         case RINST_TRANSIENT:   return ("transient");
 231         case RINST_WAIT:        return ("wait");
 232
 233         default:
 234 #ifndef NDEBUG
 235                 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
 236 #endif
 237                 abort();
 238                 /* NOTREACHED */
 239         }
 240 }
 241
 242 /*
 243  * Fails with ECONNABORTED or ECANCELED.
 244  */
 245 static int
 246 check_contract(restarter_inst_t *inst, boolean_t primary,
 247     scf_instance_t *scf_inst)
 248 {
 249         ctid_t *ctidp;
 250         int fd, r;
 251
 252         ctidp = primary ? &inst->ri_i.i_primary_ctid :
 253             &inst->ri_i.i_transient_ctid;
 254
 255         assert(*ctidp >= 1);
 256
 257         fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
 258         if (fd >= 0) {
 259                 r = close(fd);
 260                 assert(r == 0);
 261                 return (0);
 262         }
 263
 264         r = restarter_remove_contract(scf_inst, *ctidp, primary ?
 265             RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
 266         switch (r) {
 267         case 0:
 268         case ECONNABORTED:
 269         case ECANCELED:
 270                 *ctidp = 0;
 271                 return (r);
 272
 273         case ENOMEM:
 274                 uu_die("Out of memory\n");
 275                 /* NOTREACHED */
 276
 277         case EPERM:
 278                 uu_die("Insufficient privilege.\n");
 279                 /* NOTREACHED */
 280
 281         case EACCES:
 282                 uu_die("Repository backend access denied.\n");
 283                 /* NOTREACHED */
 284
 285         case EROFS:
 286                 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
 287                     "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
 288                 return (0);
 289
 290         case EINVAL:
 291         case EBADF:
 292         default:
 293                 assert(0);
 294                 abort();
 295                 /* NOTREACHED */
 296         }
 297 }
 298
 299 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
 300
 301 /*
 302  * int restarter_insert_inst(scf_handle_t *, char *)
 303  *   If the inst is already in the restarter list, return its id.  If the inst
 304  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
 305  *   states, insert it into the list, and return 0.
 306  *
 307  *   Fails with
 308  *     ENOENT - name is not in the repository
 309  */
 310 static int
 311 restarter_insert_inst(scf_handle_t *h, const char *name)
 312 {
 313         int id, r;
 314         restarter_inst_t *inst;
 315         uu_list_index_t idx;
 316         scf_service_t *scf_svc;
 317         scf_instance_t *scf_inst;
 318         scf_snapshot_t *snap = NULL;
 319         scf_propertygroup_t *pg;
 320         char *svc_name, *inst_name;
 321         char logfilebuf[PATH_MAX];
 322         char *c;
 323         boolean_t do_commit_states;
 324         restarter_instance_state_t state, next_state;
 325         protocol_states_t *ps;
 326         pid_t start_pid;
 327         restarter_str_t reason = restarter_str_insert_in_graph;
 328
 329         MUTEX_LOCK(&instance_list.ril_lock);
 330
 331         /*
 332          * We don't use inst_lookup_by_name() here because we want the lookup
 333          * & insert to be atomic.
 334          */
 335         id = dict_lookup_byname(name);
 336         if (id != -1) {
 337                 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
 338                     &idx);
 339                 if (inst != NULL) {
 340                         MUTEX_UNLOCK(&instance_list.ril_lock);
 341                         return (0);
 342                 }
 343         }
 344
 345         /* Allocate an instance */
 346         inst = startd_zalloc(sizeof (restarter_inst_t));
 347         inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
 348         inst->ri_utmpx_prefix[0] = '\0';
 349
 350         inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
 351         (void) strcpy((char *)inst->ri_i.i_fmri, name);
 352
 353         inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
 354
 355         /*
 356          * id shouldn't be -1 since we use the same dictionary as graph.c, but
 357          * just in case.
 358          */
 359         inst->ri_id = (id != -1 ? id : dict_insert(name));
 360
 361         special_online_hooks_get(name, &inst->ri_pre_online_hook,
 362             &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
 363
 364         scf_svc = safe_scf_service_create(h);
 365         scf_inst = safe_scf_instance_create(h);
 366         pg = safe_scf_pg_create(h);
 367         svc_name = startd_alloc(max_scf_name_size);
 368         inst_name = startd_alloc(max_scf_name_size);
 369
 370 rep_retry:
 371         if (snap != NULL)
 372                 scf_snapshot_destroy(snap);
 373         if (inst->ri_logstem != NULL)
 374                 startd_free(inst->ri_logstem, PATH_MAX);
 375         if (inst->ri_common_name != NULL)
 376                 startd_free(inst->ri_common_name,
 377                     strlen(inst->ri_common_name) + 1);
 378         if (inst->ri_C_common_name != NULL)
 379                 startd_free(inst->ri_C_common_name,
 380                     strlen(inst->ri_C_common_name) + 1);
 381         snap = NULL;
 382         inst->ri_logstem = NULL;
 383         inst->ri_common_name = NULL;
 384         inst->ri_C_common_name = NULL;
 385
 386         if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
 387             NULL, SCF_DECODE_FMRI_EXACT) != 0) {
 388                 switch (scf_error()) {
 389                 case SCF_ERROR_CONNECTION_BROKEN:
 390                         libscf_handle_rebind(h);
 391                         goto rep_retry;
 392
 393                 case SCF_ERROR_NOT_FOUND:
 394                         goto deleted;
 395                 }
 396
 397                 uu_die("Can't decode FMRI %s: %s\n", name,
 398                     scf_strerror(scf_error()));
 399         }
 400
 401         /*
 402          * If there's no running snapshot, then we execute using the editing
 403          * snapshot.  Pending snapshots will be taken later.
 404          */
 405         snap = libscf_get_running_snapshot(scf_inst);
 406
 407         if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
 408             (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
 409             0)) {
 410                 switch (scf_error()) {
 411                 case SCF_ERROR_NOT_SET:
 412                         break;
 413
 414                 case SCF_ERROR_CONNECTION_BROKEN:
 415                         libscf_handle_rebind(h);
 416                         goto rep_retry;
 417
 418                 default:
 419                         assert(0);
 420                         abort();
 421                 }
 422
 423                 goto deleted;
 424         }
 425
 426         (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
 427         for (c = logfilebuf; *c != '\0'; c++)
 428                 if (*c == '/')
 429                         *c = '-';
 430
 431         inst->ri_logstem = startd_alloc(PATH_MAX);
 432         (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
 433             LOG_SUFFIX);
 434
 435         /*
 436          * If the restarter group is missing, use uninit/none.  Otherwise,
 437          * we're probably being restarted & don't want to mess up the states
 438          * that are there.
 439          */
 440         state = RESTARTER_STATE_UNINIT;
 441         next_state = RESTARTER_STATE_NONE;
 442
 443         r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
 444         if (r != 0) {
 445                 switch (scf_error()) {
 446                 case SCF_ERROR_CONNECTION_BROKEN:
 447                         libscf_handle_rebind(h);
 448                         goto rep_retry;
 449
 450                 case SCF_ERROR_NOT_SET:
 451                         goto deleted;
 452
 453                 case SCF_ERROR_NOT_FOUND:
 454                         /*
 455                          * This shouldn't happen since the graph engine should
 456                          * have initialized the state to uninitialized/none if
 457                          * there was no restarter pg.  In case somebody
 458                          * deleted it, though....
 459                          */
 460                         do_commit_states = B_TRUE;
 461                         break;
 462
 463                 default:
 464                         assert(0);
 465                         abort();
 466                 }
 467         } else {
 468                 r = libscf_read_states(pg, &state, &next_state);
 469                 if (r != 0) {
 470                         do_commit_states = B_TRUE;
 471                 } else {
 472                         if (next_state != RESTARTER_STATE_NONE) {
 473                                 /*
 474                                  * Force next_state to _NONE since we
 475                                  * don't look for method processes.
 476                                  */
 477                                 next_state = RESTARTER_STATE_NONE;
 478                                 do_commit_states = B_TRUE;
 479                         } else {
 480                                 /*
 481                                  * The reason for transition will depend on
 482                                  * state.
 483                                  */
 484                                 if (st->st_initial == 0)
 485                                         reason = restarter_str_startd_restart;
 486                                 else if (state == RESTARTER_STATE_MAINT)
 487                                         reason = restarter_str_bad_repo_state;
 488                                 /*
 489                                  * Inform the restarter of our state without
 490                                  * changing the STIME in the repository.
 491                                  */
 492                                 ps = startd_alloc(sizeof (*ps));
 493                                 inst->ri_i.i_state = ps->ps_state = state;
 494                                 inst->ri_i.i_next_state = ps->ps_state_next =
 495                                     next_state;
 496                                 ps->ps_reason = reason;
 497
 498                                 graph_protocol_send_event(inst->ri_i.i_fmri,
 499                                     GRAPH_UPDATE_STATE_CHANGE, ps);
 500
 501                                 do_commit_states = B_FALSE;
 502                         }
 503                 }
 504         }
 505
 506         switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
 507             &inst->ri_utmpx_prefix)) {
 508         case 0:
 509                 break;
 510
 511         case ECONNABORTED:
 512                 libscf_handle_rebind(h);
 513                 goto rep_retry;
 514
 515         case ECANCELED:
 516                 goto deleted;
 517
 518         case ENOENT:
 519                 /*
 520                  * This is odd, because the graph engine should have required
 521                  * the general property group.  So we'll just use default
 522                  * flags in anticipation of the graph engine sending us
 523                  * REMOVE_INSTANCE when it finds out that the general property
 524                  * group has been deleted.
 525                  */
 526                 inst->ri_flags = RINST_CONTRACT;
 527                 break;
 528
 529         default:
 530                 assert(0);
 531                 abort();
 532         }
 533
 534         r = libscf_get_template_values(scf_inst, snap,
 535             &inst->ri_common_name, &inst->ri_C_common_name);
 536
 537         /*
 538          * Copy our names to smaller buffers to reduce our memory footprint.
 539          */
 540         if (inst->ri_common_name != NULL) {
 541                 char *tmp = safe_strdup(inst->ri_common_name);
 542                 startd_free(inst->ri_common_name, max_scf_value_size);
 543                 inst->ri_common_name = tmp;
 544         }
 545
 546         if (inst->ri_C_common_name != NULL) {
 547                 char *tmp = safe_strdup(inst->ri_C_common_name);
 548                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 549                 inst->ri_C_common_name = tmp;
 550         }
 551
 552         switch (r) {
 553         case 0:
 554                 break;
 555
 556         case ECONNABORTED:
 557                 libscf_handle_rebind(h);
 558                 goto rep_retry;
 559
 560         case ECANCELED:
 561                 goto deleted;
 562
 563         case ECHILD:
 564         case ENOENT:
 565                 break;
 566
 567         default:
 568                 assert(0);
 569                 abort();
 570         }
 571
 572         switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
 573             &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
 574             &start_pid)) {
 575         case 0:
 576                 break;
 577
 578         case ECONNABORTED:
 579                 libscf_handle_rebind(h);
 580                 goto rep_retry;
 581
 582         case ECANCELED:
 583                 goto deleted;
 584
 585         default:
 586                 assert(0);
 587                 abort();
 588         }
 589
 590         if (inst->ri_i.i_primary_ctid >= 1) {
 591                 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
 592
 593                 switch (check_contract(inst, B_TRUE, scf_inst)) {
 594                 case 0:
 595                         break;
 596
 597                 case ECONNABORTED:
 598                         libscf_handle_rebind(h);
 599                         goto rep_retry;
 600
 601                 case ECANCELED:
 602                         goto deleted;
 603
 604                 default:
 605                         assert(0);
 606                         abort();
 607                 }
 608         }
 609
 610         if (inst->ri_i.i_transient_ctid >= 1) {
 611                 switch (check_contract(inst, B_FALSE, scf_inst)) {
 612                 case 0:
 613                         break;
 614
 615                 case ECONNABORTED:
 616                         libscf_handle_rebind(h);
 617                         goto rep_retry;
 618
 619                 case ECANCELED:
 620                         goto deleted;
 621
 622                 default:
 623                         assert(0);
 624                         abort();
 625                 }
 626         }
 627
 628         /* No more failures we live through, so add it to the list. */
 629         (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
 630         (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
 631         MUTEX_LOCK(&inst->ri_lock);
 632         MUTEX_LOCK(&inst->ri_queue_lock);
 633
 634         (void) pthread_cond_init(&inst->ri_method_cv, NULL);
 635
 636         uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
 637         uu_list_insert(instance_list.ril_instance_list, inst, idx);
 638         MUTEX_UNLOCK(&instance_list.ril_lock);
 639
 640         if (start_pid != -1 &&
 641             (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
 642                 int ret;
 643                 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
 644                 if (ret == -1) {
 645                         /*
 646                          * Implication:  if we can't reregister the
 647                          * instance, we will start another one.  Two
 648                          * instances may or may not result in a resource
 649                          * conflict.
 650                          */
 651                         log_error(LOG_WARNING,
 652                             "%s: couldn't reregister %ld for wait\n",
 653                             inst->ri_i.i_fmri, start_pid);
 654                 } else if (ret == 1) {
 655                         /*
 656                          * Leading PID has exited.
 657                          */
 658                         (void) stop_instance(h, inst, RSTOP_EXIT);
 659                 }
 660         }
 661
 662
 663         scf_pg_destroy(pg);
 664
 665         if (do_commit_states)
 666                 (void) restarter_instance_update_states(h, inst, state,
 667                     next_state, RERR_NONE, reason);
 668
 669         log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
 670             service_style(inst->ri_flags));
 671
 672         MUTEX_UNLOCK(&inst->ri_queue_lock);
 673         MUTEX_UNLOCK(&inst->ri_lock);
 674
 675         startd_free(svc_name, max_scf_name_size);
 676         startd_free(inst_name, max_scf_name_size);
 677         scf_snapshot_destroy(snap);
 678         scf_instance_destroy(scf_inst);
 679         scf_service_destroy(scf_svc);
 680
 681         log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
 682             name);
 683
 684         return (0);
 685
 686 deleted:
 687         MUTEX_UNLOCK(&instance_list.ril_lock);
 688         startd_free(inst_name, max_scf_name_size);
 689         startd_free(svc_name, max_scf_name_size);
 690         if (snap != NULL)
 691                 scf_snapshot_destroy(snap);
 692         scf_pg_destroy(pg);
 693         scf_instance_destroy(scf_inst);
 694         scf_service_destroy(scf_svc);
 695         startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
 696         uu_list_destroy(inst->ri_queue);
 697         if (inst->ri_logstem != NULL)
 698                 startd_free(inst->ri_logstem, PATH_MAX);
 699         if (inst->ri_common_name != NULL)
 700                 startd_free(inst->ri_common_name,
 701                     strlen(inst->ri_common_name) + 1);
 702         if (inst->ri_C_common_name != NULL)
 703                 startd_free(inst->ri_C_common_name,
 704                     strlen(inst->ri_C_common_name) + 1);
 705         startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
 706         startd_free(inst, sizeof (restarter_inst_t));
 707         return (ENOENT);
 708 }
 709
 710 static void
 711 restarter_delete_inst(restarter_inst_t *ri)
 712 {
 713         int id;
 714         restarter_inst_t *rip;
 715         void *cookie = NULL;
 716         restarter_instance_qentry_t *e;
 717
 718         assert(MUTEX_HELD(&ri->ri_lock));
 719
 720         /*
 721          * Must drop the instance lock so we can pick up the instance_list
 722          * lock & remove the instance.
 723          */
 724         id = ri->ri_id;
 725         MUTEX_UNLOCK(&ri->ri_lock);
 726
 727         MUTEX_LOCK(&instance_list.ril_lock);
 728
 729         rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 730         if (rip == NULL) {
 731                 MUTEX_UNLOCK(&instance_list.ril_lock);
 732                 return;
 733         }
 734
 735         assert(ri == rip);
 736
 737         uu_list_remove(instance_list.ril_instance_list, ri);
 738
 739         log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
 740             ri->ri_i.i_fmri);
 741
 742         MUTEX_UNLOCK(&instance_list.ril_lock);
 743
 744         /*
 745          * We can lock the instance without holding the instance_list lock
 746          * since we removed the instance from the list.
 747          */
 748         MUTEX_LOCK(&ri->ri_lock);
 749         MUTEX_LOCK(&ri->ri_queue_lock);
 750
 751         if (ri->ri_i.i_primary_ctid >= 1)
 752                 contract_hash_remove(ri->ri_i.i_primary_ctid);
 753
 754         while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
 755                 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
 756
 757         while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
 758                 startd_free(e, sizeof (*e));
 759         uu_list_destroy(ri->ri_queue);
 760
 761         startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
 762         startd_free(ri->ri_logstem, PATH_MAX);
 763         if (ri->ri_common_name != NULL)
 764                 startd_free(ri->ri_common_name,
 765                     strlen(ri->ri_common_name) + 1);
 766         if (ri->ri_C_common_name != NULL)
 767                 startd_free(ri->ri_C_common_name,
 768                     strlen(ri->ri_C_common_name) + 1);
 769         startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
 770         (void) pthread_mutex_destroy(&ri->ri_lock);
 771         (void) pthread_mutex_destroy(&ri->ri_queue_lock);
 772         startd_free(ri, sizeof (restarter_inst_t));
 773 }
 774
 775 /*
 776  * instance_is_wait_style()
 777  *
 778  *   Returns 1 if the given instance is a "wait-style" service instance.
 779  */
 780 int
 781 instance_is_wait_style(restarter_inst_t *inst)
 782 {
 783         assert(MUTEX_HELD(&inst->ri_lock));
 784         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
 785 }
 786
 787 /*
 788  * instance_is_transient_style()
 789  *
 790  *   Returns 1 if the given instance is a transient service instance.
 791  */
 792 int
 793 instance_is_transient_style(restarter_inst_t *inst)
 794 {
 795         assert(MUTEX_HELD(&inst->ri_lock));
 796         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
 797 }
 798
 799 /*
 800  * instance_in_transition()
 801  * Returns 1 if instance is in transition, 0 if not
 802  */
 803 int
 804 instance_in_transition(restarter_inst_t *inst)
 805 {
 806         assert(MUTEX_HELD(&inst->ri_lock));
 807         if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
 808                 return (0);
 809         return (1);
 810 }
 811
 812 /*
 813  * returns 1 if instance is already started, 0 if not
 814  */
 815 static int
 816 instance_started(restarter_inst_t *inst)
 817 {
 818         int ret;
 819
 820         assert(MUTEX_HELD(&inst->ri_lock));
 821
 822         if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
 823             inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
 824                 ret = 1;
 825         else
 826                 ret = 0;
 827
 828         return (ret);
 829 }
 830
 831 /*
 832  * Returns
 833  *   0 - success
 834  *   ECONNRESET - success, but h was rebound
 835  */
 836 int
 837 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
 838     restarter_instance_state_t new_state,
 839     restarter_instance_state_t new_state_next, restarter_error_t err,
 840     restarter_str_t reason)
 841 {
 842         protocol_states_t *states;
 843         int e;
 844         uint_t retry_count = 0, msecs = ALLOC_DELAY;
 845         boolean_t rebound = B_FALSE;
 846         int prev_state_online;
 847         int state_online;
 848
 849         assert(MUTEX_HELD(&ri->ri_lock));
 850
 851         prev_state_online = instance_started(ri);
 852
 853 retry:
 854         e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
 855             restarter_get_str_short(reason));
 856         switch (e) {
 857         case 0:
 858                 break;
 859
 860         case ENOMEM:
 861                 ++retry_count;
 862                 if (retry_count < ALLOC_RETRY) {
 863                         (void) poll(NULL, 0, msecs);
 864                         msecs *= ALLOC_DELAY_MULT;
 865                         goto retry;
 866                 }
 867
 868                 /* Like startd_alloc(). */
 869                 uu_die("Insufficient memory.\n");
 870                 /* NOTREACHED */
 871
 872         case ECONNABORTED:
 873                 libscf_handle_rebind(h);
 874                 rebound = B_TRUE;
 875                 goto retry;
 876
 877         case EPERM:
 878         case EACCES:
 879         case EROFS:
 880                 log_error(LOG_NOTICE, "Could not commit state change for %s "
 881                     "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
 882                 /* FALLTHROUGH */
 883
 884         case ENOENT:
 885                 ri->ri_i.i_state = new_state;
 886                 ri->ri_i.i_next_state = new_state_next;
 887                 break;
 888
 889         case EINVAL:
 890         default:
 891                 bad_error("_restarter_commit_states", e);
 892         }
 893
 894         states = startd_alloc(sizeof (protocol_states_t));
 895         states->ps_state = new_state;
 896         states->ps_state_next = new_state_next;
 897         states->ps_err = err;
 898         states->ps_reason = reason;
 899         graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
 900             (void *)states);
 901
 902         state_online = instance_started(ri);
 903
 904         if (prev_state_online && !state_online)
 905                 ri->ri_post_offline_hook();
 906         else if (!prev_state_online && state_online)
 907                 ri->ri_post_online_hook();
 908
 909         return (rebound ? ECONNRESET : 0);
 910 }
 911
 912 void
 913 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
 914 {
 915         restarter_inst_t *inst;
 916
 917         assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
 918
 919         inst = inst_lookup_by_name(fmri);
 920         if (inst == NULL)
 921                 return;
 922
 923         inst->ri_flags |= flag;
 924
 925         MUTEX_UNLOCK(&inst->ri_lock);
 926 }
 927
 928 static void
 929 restarter_take_pending_snapshots(scf_handle_t *h)
 930 {
 931         restarter_inst_t *inst;
 932         int r;
 933
 934         MUTEX_LOCK(&instance_list.ril_lock);
 935
 936         for (inst = uu_list_first(instance_list.ril_instance_list);
 937             inst != NULL;
 938             inst = uu_list_next(instance_list.ril_instance_list, inst)) {
 939                 const char *fmri;
 940                 scf_instance_t *sinst = NULL;
 941
 942                 MUTEX_LOCK(&inst->ri_lock);
 943
 944                 /*
 945                  * This is where we'd check inst->ri_method_thread and if it
 946                  * were nonzero we'd wait in anticipation of another thread
 947                  * executing a method for inst.  Doing so with the instance_list
 948                  * locked, though, leads to deadlock.  Since taking a snapshot
 949                  * during that window won't hurt anything, we'll just continue.
 950                  */
 951
 952                 fmri = inst->ri_i.i_fmri;
 953
 954                 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
 955                         scf_snapshot_t *rsnap;
 956
 957                         (void) libscf_fmri_get_instance(h, fmri, &sinst);
 958
 959                         rsnap = libscf_get_or_make_running_snapshot(sinst,
 960                             fmri, B_FALSE);
 961
 962                         scf_instance_destroy(sinst);
 963
 964                         if (rsnap != NULL)
 965                                 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
 966
 967                         scf_snapshot_destroy(rsnap);
 968                 }
 969
 970                 if (inst->ri_flags & RINST_RETAKE_START) {
 971                         switch (r = libscf_snapshots_poststart(h, fmri,
 972                             B_FALSE)) {
 973                         case 0:
 974                         case ENOENT:
 975                                 inst->ri_flags &= ~RINST_RETAKE_START;
 976                                 break;
 977
 978                         case ECONNABORTED:
 979                                 break;
 980
 981                         case EACCES:
 982                         default:
 983                                 bad_error("libscf_snapshots_poststart", r);
 984                         }
 985                 }
 986
 987                 MUTEX_UNLOCK(&inst->ri_lock);
 988         }
 989
 990         MUTEX_UNLOCK(&instance_list.ril_lock);
 991 }
 992
 993 /* ARGSUSED */
 994 void *
 995 restarter_post_fsminimal_thread(void *unused)
 996 {
 997         scf_handle_t *h;
 998         int r;
 999
1000         h = libscf_handle_create_bound_loop();
1001
1002         for (;;) {
1003                 r = libscf_create_self(h);
1004                 if (r == 0)
1005                         break;
1006
1007                 assert(r == ECONNABORTED);
1008                 libscf_handle_rebind(h);
1009         }
1010
1011         restarter_take_pending_snapshots(h);
1012
1013         (void) scf_handle_unbind(h);
1014         scf_handle_destroy(h);
1015
1016         return (NULL);
1017 }
1018
1019 /*
1020  * int stop_instance()
1021  *
1022  *   Stop the instance identified by the instance given as the second argument,
1023  *   for the cause stated.
1024  *
1025  *   Returns
1026  *     0 - success
1027  *     -1 - inst is in transition
1028  */
1029 static int
1030 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1031     stop_cause_t cause)
1032 {
1033         fork_info_t *info;
1034         const char *cp;
1035         int err;
1036         restarter_error_t re;
1037         restarter_str_t reason;
1038
1039         assert(MUTEX_HELD(&inst->ri_lock));
1040         assert(inst->ri_method_thread == 0);
1041
1042         switch (cause) {
1043         case RSTOP_EXIT:
1044                 re = RERR_RESTART;
1045                 reason = restarter_str_ct_ev_exit;
1046                 cp = "all processes in service exited";
1047                 break;
1048         case RSTOP_CORE:
1049                 re = RERR_FAULT;
1050                 reason = restarter_str_ct_ev_core;
1051                 cp = "process dumped core";
1052                 break;
1053         case RSTOP_SIGNAL:
1054                 re = RERR_FAULT;
1055                 reason = restarter_str_ct_ev_signal;
1056                 cp = "process received fatal signal from outside the service";
1057                 break;
1058         case RSTOP_HWERR:
1059                 re = RERR_FAULT;
1060                 reason = restarter_str_ct_ev_hwerr;
1061                 cp = "process killed due to uncorrectable hardware error";
1062                 break;
1063         case RSTOP_DEPENDENCY:
1064                 re = RERR_RESTART;
1065                 reason = restarter_str_dependency_activity;
1066                 cp = "dependency activity requires stop";
1067                 break;
1068         case RSTOP_DISABLE:
1069                 re = RERR_RESTART;
1070                 reason = restarter_str_disable_request;
1071                 cp = "service disabled";
1072                 break;
1073         case RSTOP_RESTART:
1074                 re = RERR_RESTART;
1075                 reason = restarter_str_restart_request;
1076                 cp = "service restarting";
1077                 break;
1078         default:
1079 #ifndef NDEBUG
1080                 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1081                     cause, __FILE__, __LINE__);
1082 #endif
1083                 abort();
1084         }
1085
1086         /* Services in the disabled and maintenance state are ignored */
1087         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1088             inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1089                 log_framework(LOG_DEBUG,
1090                     "%s: stop_instance -> is maint/disabled\n",
1091                     inst->ri_i.i_fmri);
1092                 return (0);
1093         }
1094
1095         /* Already stopped instances are left alone */
1096         if (instance_started(inst) == 0) {
1097                 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1098                     inst->ri_i.i_fmri);
1099                 return (0);
1100         }
1101
1102         if (instance_in_transition(inst)) {
1103                 /* requeue event by returning -1 */
1104                 log_framework(LOG_DEBUG,
1105                     "Restarter: Not stopping %s, in transition.\n",
1106                     inst->ri_i.i_fmri);
1107                 return (-1);
1108         }
1109
1110         log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1111
1112         log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1113             "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1114
1115         if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1116                 /*
1117                  * No need to stop instance, as child has exited; remove
1118                  * contract and move the instance to the offline state.
1119                  */
1120                 switch (err = restarter_instance_update_states(local_handle,
1121                     inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1122                     reason)) {
1123                 case 0:
1124                 case ECONNRESET:
1125                         break;
1126
1127                 default:
1128                         bad_error("restarter_instance_update_states", err);
1129                 }
1130
1131                 (void) update_fault_count(inst, FAULT_COUNT_RESET);
1132                 reset_start_times(inst);
1133
1134                 if (inst->ri_i.i_primary_ctid != 0) {
1135                         inst->ri_m_inst =
1136                             safe_scf_instance_create(local_handle);
1137                         inst->ri_mi_deleted = B_FALSE;
1138
1139                         libscf_reget_instance(inst);
1140                         method_remove_contract(inst, B_TRUE, B_TRUE);
1141
1142                         scf_instance_destroy(inst->ri_m_inst);
1143                         inst->ri_m_inst = NULL;
1144                 }
1145
1146                 switch (err = restarter_instance_update_states(local_handle,
1147                     inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1148                     reason)) {
1149                 case 0:
1150                 case ECONNRESET:
1151                         break;
1152
1153                 default:
1154                         bad_error("restarter_instance_update_states", err);
1155                 }
1156
1157                 return (0);
1158         } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1159                 /*
1160                  * Stopping a wait service through means other than the pid
1161                  * exiting should keep wait_thread() from restarting the
1162                  * service, by removing it from the wait list.
1163                  * We cannot remove it right now otherwise the process will
1164                  * end up <defunct> so mark it to be ignored.
1165                  */
1166                 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1167         }
1168
1169         switch (err = restarter_instance_update_states(local_handle, inst,
1170             inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1171             RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
1172         case 0:
1173         case ECONNRESET:
1174                 break;
1175
1176         default:
1177                 bad_error("restarter_instance_update_states", err);
1178         }
1179
1180         info = startd_zalloc(sizeof (fork_info_t));
1181
1182         info->sf_id = inst->ri_id;
1183         info->sf_method_type = METHOD_STOP;
1184         info->sf_event_type = re;
1185         info->sf_reason = reason;
1186         inst->ri_method_thread = startd_thread_create(method_thread, info);
1187
1188         return (0);
1189 }
1190
1191 /*
1192  * Returns
1193  *   ENOENT - fmri is not in instance_list
1194  *   0 - success
1195  *   ECONNRESET - success, though handle was rebound
1196  *   -1 - instance is in transition
1197  */
1198 int
1199 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1200 {
1201         restarter_inst_t *rip;
1202         int r;
1203
1204         rip = inst_lookup_by_name(fmri);
1205         if (rip == NULL)
1206                 return (ENOENT);
1207
1208         r = stop_instance(h, rip, flags);
1209
1210         MUTEX_UNLOCK(&rip->ri_lock);
1211
1212         return (r);
1213 }
1214
1215 static void
1216 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1217     unmaint_cause_t cause)
1218 {
1219         ctid_t ctid;
1220         scf_instance_t *inst;
1221         int r;
1222         uint_t tries = 0, msecs = ALLOC_DELAY;
1223         const char *cp;
1224         restarter_str_t reason;
1225
1226         assert(MUTEX_HELD(&rip->ri_lock));
1227
1228         if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1229                 log_error(LOG_DEBUG, "Restarter: "
1230                     "Ignoring maintenance off command because %s is not in the "
1231                     "maintenance state.\n", rip->ri_i.i_fmri);
1232                 return;
1233         }
1234
1235         switch (cause) {
1236         case RUNMAINT_CLEAR:
1237                 cp = "clear requested";
1238                 reason = restarter_str_clear_request;
1239                 break;
1240         case RUNMAINT_DISABLE:
1241                 cp = "disable requested";
1242                 reason = restarter_str_disable_request;
1243                 break;
1244         default:
1245 #ifndef NDEBUG
1246                 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1247                     cause, __FILE__, __LINE__);
1248 #endif
1249                 abort();
1250         }
1251
1252         log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1253             cp);
1254         log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1255             "%s.\n", rip->ri_i.i_fmri, cp);
1256
1257         (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1258             RESTARTER_STATE_NONE, RERR_RESTART, reason);
1259
1260         /*
1261          * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1262          * a primary contract.
1263          */
1264         if (rip->ri_i.i_primary_ctid == 0)
1265                 return;
1266
1267         ctid = rip->ri_i.i_primary_ctid;
1268         contract_abandon(ctid);
1269         rip->ri_i.i_primary_ctid = 0;
1270
1271 rep_retry:
1272         switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1273         case 0:
1274                 break;
1275
1276         case ECONNABORTED:
1277                 libscf_handle_rebind(h);
1278                 goto rep_retry;
1279
1280         case ENOENT:
1281                 /* Must have been deleted. */
1282                 return;
1283
1284         case EINVAL:
1285         case ENOTSUP:
1286         default:
1287                 bad_error("libscf_handle_rebind", r);
1288         }
1289
1290 again:
1291         r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1292         switch (r) {
1293         case 0:
1294                 break;
1295
1296         case ENOMEM:
1297                 ++tries;
1298                 if (tries < ALLOC_RETRY) {
1299                         (void) poll(NULL, 0, msecs);
1300                         msecs *= ALLOC_DELAY_MULT;
1301                         goto again;
1302                 }
1303
1304                 uu_die("Insufficient memory.\n");
1305                 /* NOTREACHED */
1306
1307         case ECONNABORTED:
1308                 scf_instance_destroy(inst);
1309                 libscf_handle_rebind(h);
1310                 goto rep_retry;
1311
1312         case ECANCELED:
1313                 break;
1314
1315         case EPERM:
1316         case EACCES:
1317         case EROFS:
1318                 log_error(LOG_INFO,
1319                     "Could not remove contract id %lu for %s (%s).\n", ctid,
1320                     rip->ri_i.i_fmri, strerror(r));
1321                 break;
1322
1323         case EINVAL:
1324         case EBADF:
1325         default:
1326                 bad_error("restarter_remove_contract", r);
1327         }
1328
1329         scf_instance_destroy(inst);
1330 }
1331
1332 /*
1333  * enable_inst()
1334  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1335  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1336  *   disabled, move it to offline.  If the event is _DISABLE or
1337  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1338  *
1339  *   Returns
1340  *     0 - success
1341  *     ECONNRESET - h was rebound
1342  */
1343 static int
1344 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1345     restarter_instance_qentry_t *riq)
1346 {
1347         restarter_instance_state_t state;
1348         restarter_event_type_t e = riq->riq_type;
1349         restarter_str_t reason = restarter_str_per_configuration;
1350         int r;
1351
1352         assert(MUTEX_HELD(&inst->ri_lock));
1353         assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1354             e == RESTARTER_EVENT_TYPE_DISABLE ||
1355             e == RESTARTER_EVENT_TYPE_ENABLE);
1356         assert(instance_in_transition(inst) == 0);
1357
1358         state = inst->ri_i.i_state;
1359
1360         if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1361                 inst->ri_i.i_enabled = 1;
1362
1363                 if (state == RESTARTER_STATE_UNINIT ||
1364                     state == RESTARTER_STATE_DISABLED) {
1365                         /*
1366                          * B_FALSE: Don't log an error if the log_instance()
1367                          * fails because it will fail on the miniroot before
1368                          * install-discovery runs.
1369                          */
1370                         log_instance(inst, B_FALSE, "Enabled.");
1371                         log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1372                             inst->ri_i.i_fmri);
1373
1374                         /*
1375                          * If we are coming from DISABLED, it was obviously an
1376                          * enable request. If we are coming from UNINIT, it may
1377                          * have been a sevice in MAINT that was cleared.
1378                          */
1379                         if (riq->riq_reason == restarter_str_clear_request)
1380                                 reason = restarter_str_clear_request;
1381                         else if (state == RESTARTER_STATE_DISABLED)
1382                                 reason = restarter_str_enable_request;
1383                         (void) restarter_instance_update_states(h, inst,
1384                             RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1385                             RERR_NONE, reason);
1386                 } else {
1387                         log_framework(LOG_DEBUG, "Restarter: "
1388                             "Not changing state of %s for enable command.\n",
1389                             inst->ri_i.i_fmri);
1390                 }
1391         } else {
1392                 inst->ri_i.i_enabled = 0;
1393
1394                 switch (state) {
1395                 case RESTARTER_STATE_ONLINE:
1396                 case RESTARTER_STATE_DEGRADED:
1397                         r = stop_instance(h, inst, RSTOP_DISABLE);
1398                         return (r == ECONNRESET ? 0 : r);
1399
1400                 case RESTARTER_STATE_OFFLINE:
1401                 case RESTARTER_STATE_UNINIT:
1402                         if (inst->ri_i.i_primary_ctid != 0) {
1403                                 inst->ri_m_inst = safe_scf_instance_create(h);
1404                                 inst->ri_mi_deleted = B_FALSE;
1405
1406                                 libscf_reget_instance(inst);
1407                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1408
1409                                 scf_instance_destroy(inst->ri_m_inst);
1410                         }
1411                         /* B_FALSE: See log_instance(..., "Enabled."); above */
1412                         log_instance(inst, B_FALSE, "Disabled.");
1413                         log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1414                             inst->ri_i.i_fmri);
1415
1416                         /*
1417                          * If we are coming from OFFLINE, it was obviously a
1418                          * disable request. But if we are coming from
1419                          * UNINIT, it may have been a disable request for a
1420                          * service in MAINT.
1421                          */
1422                         if (riq->riq_reason == restarter_str_disable_request ||
1423                             state == RESTARTER_STATE_OFFLINE)
1424                                 reason = restarter_str_disable_request;
1425                         (void) restarter_instance_update_states(h, inst,
1426                             RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1427                             RERR_RESTART, reason);
1428                         return (0);
1429
1430                 case RESTARTER_STATE_DISABLED:
1431                         break;
1432
1433                 case RESTARTER_STATE_MAINT:
1434                         /*
1435                          * We only want to pull the instance out of maintenance
1436                          * if the disable is on adminstrative request.  The
1437                          * graph engine sends _DISABLE events whenever a
1438                          * service isn't in the disabled state, and we don't
1439                          * want to pull the service out of maintenance if,
1440                          * for example, it is there due to a dependency cycle.
1441                          */
1442                         if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1443                                 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1444                         break;
1445
1446                 default:
1447 #ifndef NDEBUG
1448                         (void) fprintf(stderr, "Restarter instance %s has "
1449                             "unknown state %d.\n", inst->ri_i.i_fmri, state);
1450 #endif
1451                         abort();
1452                 }
1453         }
1454
1455         return (0);
1456 }
1457
1458 static void
1459 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1460     int32_t reason)
1461 {
1462         fork_info_t *info;
1463         restarter_str_t new_reason;
1464
1465         assert(MUTEX_HELD(&inst->ri_lock));
1466         assert(instance_in_transition(inst) == 0);
1467         assert(inst->ri_method_thread == 0);
1468
1469         log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1470             inst->ri_i.i_fmri);
1471
1472         /*
1473          * We want to keep the original reason for restarts and clear actions
1474          */
1475         switch (reason) {
1476         case restarter_str_restart_request:
1477         case restarter_str_clear_request:
1478                 new_reason = reason;
1479                 break;
1480         default:
1481                 new_reason = restarter_str_dependencies_satisfied;
1482         }
1483
1484         /* Services in the disabled and maintenance state are ignored */
1485         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1486             inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1487             inst->ri_i.i_enabled == 0) {
1488                 log_framework(LOG_DEBUG,
1489                     "%s: start_instance -> is maint/disabled\n",
1490                     inst->ri_i.i_fmri);
1491                 return;
1492         }
1493
1494         /* Already started instances are left alone */
1495         if (instance_started(inst) == 1) {
1496                 log_framework(LOG_DEBUG,
1497                     "%s: start_instance -> is already started\n",
1498                     inst->ri_i.i_fmri);
1499                 return;
1500         }
1501
1502         log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1503
1504         (void) restarter_instance_update_states(local_handle, inst,
1505             inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1506
1507         info = startd_zalloc(sizeof (fork_info_t));
1508
1509         info->sf_id = inst->ri_id;
1510         info->sf_method_type = METHOD_START;
1511         info->sf_event_type = RERR_NONE;
1512         info->sf_reason = new_reason;
1513         inst->ri_method_thread = startd_thread_create(method_thread, info);
1514 }
1515
1516 static int
1517 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1518 {
1519         scf_instance_t *inst;
1520         int ret = 0;
1521
1522         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1523                 return (-1);
1524
1525         ret = restarter_inst_ractions_from_tty(inst);
1526
1527         scf_instance_destroy(inst);
1528         return (ret);
1529 }
1530
1531 static void
1532 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1533     restarter_str_t reason)
1534 {
1535         fork_info_t *info;
1536         scf_instance_t *scf_inst = NULL;
1537
1538         assert(MUTEX_HELD(&rip->ri_lock));
1539         assert(reason != restarter_str_none);
1540         assert(rip->ri_method_thread == 0);
1541
1542         log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1543             restarter_get_str_short(reason));
1544         log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1545             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1546
1547         /* Services in the maintenance state are ignored */
1548         if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1549                 log_framework(LOG_DEBUG,
1550                     "%s: maintain_instance -> is already in maintenance\n",
1551                     rip->ri_i.i_fmri);
1552                 return;
1553         }
1554
1555         /*
1556          * If reason state is restarter_str_service_request and
1557          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1558          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1559          */
1560         if (reason == restarter_str_service_request &&
1561             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1562                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1563                         if (restarter_inst_set_aux_fmri(scf_inst))
1564                                 log_framework(LOG_DEBUG, "%s: "
1565                                     "restarter_inst_set_aux_fmri failed: ",
1566                                     rip->ri_i.i_fmri);
1567                 } else {
1568                         log_framework(LOG_DEBUG, "%s: "
1569                             "restarter_inst_validate_ractions_aux_fmri "
1570                             "failed: ", rip->ri_i.i_fmri);
1571
1572                         if (restarter_inst_reset_aux_fmri(scf_inst))
1573                                 log_framework(LOG_DEBUG, "%s: "
1574                                     "restarter_inst_reset_aux_fmri failed: ",
1575                                     rip->ri_i.i_fmri);
1576                 }
1577                 scf_instance_destroy(scf_inst);
1578         }
1579
1580         if (immediate || !instance_started(rip)) {
1581                 if (rip->ri_i.i_primary_ctid != 0) {
1582                         rip->ri_m_inst = safe_scf_instance_create(h);
1583                         rip->ri_mi_deleted = B_FALSE;
1584
1585                         libscf_reget_instance(rip);
1586                         method_remove_contract(rip, B_TRUE, B_TRUE);
1587
1588                         scf_instance_destroy(rip->ri_m_inst);
1589                 }
1590
1591                 (void) restarter_instance_update_states(h, rip,
1592                     RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1593                     reason);
1594                 return;
1595         }
1596
1597         (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1598             RESTARTER_STATE_MAINT, RERR_NONE, reason);
1599
1600         log_transition(rip, MAINT_REQUESTED);
1601
1602         info = startd_zalloc(sizeof (*info));
1603         info->sf_id = rip->ri_id;
1604         info->sf_method_type = METHOD_STOP;
1605         info->sf_event_type = RERR_RESTART;
1606         info->sf_reason = reason;
1607         rip->ri_method_thread = startd_thread_create(method_thread, info);
1608 }
1609
1610 static void
1611 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1612 {
1613         scf_instance_t *inst;
1614         scf_snapshot_t *snap;
1615         fork_info_t *info;
1616         int r;
1617
1618         assert(MUTEX_HELD(&rip->ri_lock));
1619
1620         log_instance(rip, B_TRUE, "Rereading configuration.");
1621         log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1622             rip->ri_i.i_fmri);
1623
1624 rep_retry:
1625         r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1626         switch (r) {
1627         case 0:
1628                 break;
1629
1630         case ECONNABORTED:
1631                 libscf_handle_rebind(h);
1632                 goto rep_retry;
1633
1634         case ENOENT:
1635                 /* Must have been deleted. */
1636                 return;
1637
1638         case EINVAL:
1639         case ENOTSUP:
1640         default:
1641                 bad_error("libscf_fmri_get_instance", r);
1642         }
1643
1644         snap = libscf_get_running_snapshot(inst);
1645
1646         r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1647             &rip->ri_utmpx_prefix);
1648         switch (r) {
1649         case 0:
1650                 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1651                     rip->ri_i.i_fmri, service_style(rip->ri_flags));
1652                 break;
1653
1654         case ECONNABORTED:
1655                 scf_instance_destroy(inst);
1656                 scf_snapshot_destroy(snap);
1657                 libscf_handle_rebind(h);
1658                 goto rep_retry;
1659
1660         case ECANCELED:
1661         case ENOENT:
1662                 /* Succeed in anticipation of REMOVE_INSTANCE. */
1663                 break;
1664
1665         default:
1666                 bad_error("libscf_get_startd_properties", r);
1667         }
1668
1669         if (instance_started(rip)) {
1670                 /* Refresh does not change the state. */
1671                 (void) restarter_instance_update_states(h, rip,
1672                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1673                     restarter_str_refresh);
1674
1675                 info = startd_zalloc(sizeof (*info));
1676                 info->sf_id = rip->ri_id;
1677                 info->sf_method_type = METHOD_REFRESH;
1678                 info->sf_event_type = RERR_REFRESH;
1679                 info->sf_reason = NULL;
1680
1681                 assert(rip->ri_method_thread == 0);
1682                 rip->ri_method_thread =
1683                     startd_thread_create(method_thread, info);
1684         }
1685
1686         scf_snapshot_destroy(snap);
1687         scf_instance_destroy(inst);
1688 }
1689
1690 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1691         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1692         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1693         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1694         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1695 };
1696
1697 /*
1698  * void *restarter_process_events()
1699  *
1700  *   Called in a separate thread to process the events on an instance's
1701  *   queue.  Empties the queue completely, and tries to keep the thread
1702  *   around for a little while after the queue is empty to save on
1703  *   startup costs.
1704  */
1705 static void *
1706 restarter_process_events(void *arg)
1707 {
1708         scf_handle_t *h;
1709         restarter_instance_qentry_t *event;
1710         restarter_inst_t *rip;
1711         char *fmri = (char *)arg;
1712         struct timespec to;
1713
1714         assert(fmri != NULL);
1715
1716         h = libscf_handle_create_bound_loop();
1717
1718         /* grab the queue lock */
1719         rip = inst_lookup_queue(fmri);
1720         if (rip == NULL)
1721                 goto out;
1722
1723 again:
1724
1725         while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1726                 restarter_inst_t *inst;
1727
1728                 /* drop the queue lock */
1729                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1730
1731                 /*
1732                  * Grab the inst lock -- this waits until any outstanding
1733                  * method finishes running.
1734                  */
1735                 inst = inst_lookup_by_name(fmri);
1736                 if (inst == NULL) {
1737                         /* Getting deleted in the middle isn't an error. */
1738                         goto cont;
1739                 }
1740
1741                 assert(instance_in_transition(inst) == 0);
1742
1743                 /* process the event */
1744                 switch (event->riq_type) {
1745                 case RESTARTER_EVENT_TYPE_ENABLE:
1746                 case RESTARTER_EVENT_TYPE_DISABLE:
1747                         (void) enable_inst(h, inst, event);
1748                         break;
1749
1750                 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1751                         if (enable_inst(h, inst, event) == 0)
1752                                 reset_start_times(inst);
1753                         break;
1754
1755                 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1756                         restarter_delete_inst(inst);
1757                         inst = NULL;
1758                         goto cont;
1759
1760                 case RESTARTER_EVENT_TYPE_STOP_RESET:
1761                         reset_start_times(inst);
1762                         /* FALLTHROUGH */
1763                 case RESTARTER_EVENT_TYPE_STOP:
1764                         (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1765                         break;
1766
1767                 case RESTARTER_EVENT_TYPE_START:
1768                         start_instance(h, inst, event->riq_reason);
1769                         break;
1770
1771                 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1772                         maintain_instance(h, inst, 0,
1773                             restarter_str_dependency_cycle);
1774                         break;
1775
1776                 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1777                         maintain_instance(h, inst, 0,
1778                             restarter_str_invalid_dependency);
1779                         break;
1780
1781                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1782                         if (event_from_tty(h, inst) == 0)
1783                                 maintain_instance(h, inst, 0,
1784                                     restarter_str_service_request);
1785                         else
1786                                 maintain_instance(h, inst, 0,
1787                                     restarter_str_administrative_request);
1788                         break;
1789
1790                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1791                         if (event_from_tty(h, inst) == 0)
1792                                 maintain_instance(h, inst, 1,
1793                                     restarter_str_service_request);
1794                         else
1795                                 maintain_instance(h, inst, 1,
1796                                     restarter_str_administrative_request);
1797                         break;
1798
1799                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1800                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1801                         reset_start_times(inst);
1802                         break;
1803
1804                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1805                         refresh_instance(h, inst);
1806                         break;
1807
1808                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1809                         log_framework(LOG_WARNING, "Restarter: "
1810                             "%s command (for %s) unimplemented.\n",
1811                             event_names[event->riq_type], inst->ri_i.i_fmri);
1812                         break;
1813
1814                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1815                         if (!instance_started(inst)) {
1816                                 log_framework(LOG_DEBUG, "Restarter: "
1817                                     "Not restarting %s; not running.\n",
1818                                     inst->ri_i.i_fmri);
1819                         } else {
1820                                 /*
1821                                  * Stop the instance.  If it can be restarted,
1822                                  * the graph engine will send a new event.
1823                                  */
1824                                 if (stop_instance(h, inst, RSTOP_RESTART) == 0)
1825                                         reset_start_times(inst);
1826                         }
1827                         break;
1828
1829                 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1830                 default:
1831 #ifndef NDEBUG
1832                         uu_warn("%s:%d: Bad restarter event %d.  "
1833                             "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1834 #endif
1835                         abort();
1836                 }
1837
1838                 assert(inst != NULL);
1839                 MUTEX_UNLOCK(&inst->ri_lock);
1840
1841 cont:
1842                 /* grab the queue lock */
1843                 rip = inst_lookup_queue(fmri);
1844                 if (rip == NULL)
1845                         goto out;
1846
1847                 /* delete the event */
1848                 uu_list_remove(rip->ri_queue, event);
1849                 startd_free(event, sizeof (restarter_instance_qentry_t));
1850         }
1851
1852         assert(rip != NULL);
1853
1854         /*
1855          * Try to preserve the thread for a little while for future use.
1856          */
1857         to.tv_sec = 3;
1858         to.tv_nsec = 0;
1859         (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1860             &rip->ri_queue_lock, &to);
1861
1862         if (uu_list_first(rip->ri_queue) != NULL)
1863                 goto again;
1864
1865         rip->ri_queue_thread = 0;
1866         MUTEX_UNLOCK(&rip->ri_queue_lock);
1867
1868 out:
1869         (void) scf_handle_unbind(h);
1870         scf_handle_destroy(h);
1871         free(fmri);
1872         return (NULL);
1873 }
1874
1875 static int
1876 is_admin_event(restarter_event_type_t t) {
1877
1878         switch (t) {
1879         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1880         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1881         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1882         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1883         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1884         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1885                 return (1);
1886         default:
1887                 return (0);
1888         }
1889 }
1890
1891 static void
1892 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1893 {
1894         restarter_instance_qentry_t *qe;
1895         int r;
1896
1897         assert(MUTEX_HELD(&ri->ri_queue_lock));
1898         assert(!MUTEX_HELD(&ri->ri_lock));
1899
1900         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1901         qe->riq_type = e->rpe_type;
1902         qe->riq_reason = e->rpe_reason;
1903
1904         uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1905         r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1906         assert(r == 0);
1907 }
1908
1909 /*
1910  * void *restarter_event_thread()
1911  *
1912  *  Handle incoming graph events by placing them on a per-instance
1913  *  queue.  We can't lock the main part of the instance structure, so
1914  *  just modify the seprarately locked event queue portion.
1915  */
1916 /*ARGSUSED*/
1917 static void *
1918 restarter_event_thread(void *unused)
1919 {
1920         scf_handle_t *h;
1921
1922         /*
1923          * This is a new thread, and thus, gets its own handle
1924          * to the repository.
1925          */
1926         h = libscf_handle_create_bound_loop();
1927
1928         MUTEX_LOCK(&ru->restarter_update_lock);
1929
1930         /*CONSTCOND*/
1931         while (1) {
1932                 restarter_protocol_event_t *e;
1933
1934                 while (ru->restarter_update_wakeup == 0)
1935                         (void) pthread_cond_wait(&ru->restarter_update_cv,
1936                             &ru->restarter_update_lock);
1937
1938                 ru->restarter_update_wakeup = 0;
1939
1940                 while ((e = restarter_event_dequeue()) != NULL) {
1941                         restarter_inst_t *rip;
1942                         char *fmri;
1943
1944                         MUTEX_UNLOCK(&ru->restarter_update_lock);
1945
1946                         /*
1947                          * ADD_INSTANCE is special: there's likely no
1948                          * instance structure yet, so we need to handle the
1949                          * addition synchronously.
1950                          */
1951                         switch (e->rpe_type) {
1952                         case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1953                                 if (restarter_insert_inst(h, e->rpe_inst) != 0)
1954                                         log_error(LOG_INFO, "Restarter: "
1955                                             "Could not add %s.\n", e->rpe_inst);
1956
1957                                 MUTEX_LOCK(&st->st_load_lock);
1958                                 if (--st->st_load_instances == 0)
1959                                         (void) pthread_cond_broadcast(
1960                                             &st->st_load_cv);
1961                                 MUTEX_UNLOCK(&st->st_load_lock);
1962
1963                                 goto nolookup;
1964                         }
1965
1966                         /*
1967                          * Lookup the instance, locking only the event queue.
1968                          * Can't grab ri_lock here because it might be held
1969                          * by a long-running method.
1970                          */
1971                         rip = inst_lookup_queue(e->rpe_inst);
1972                         if (rip == NULL) {
1973                                 log_error(LOG_INFO, "Restarter: "
1974                                     "Ignoring %s command for unknown service "
1975                                     "%s.\n", event_names[e->rpe_type],
1976                                     e->rpe_inst);
1977                                 goto nolookup;
1978                         }
1979
1980                         /* Keep ADMIN events from filling up the queue. */
1981                         if (is_admin_event(e->rpe_type) &&
1982                             uu_list_numnodes(rip->ri_queue) >
1983                             RINST_QUEUE_THRESHOLD) {
1984                                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1985                                 log_instance(rip, B_TRUE, "Instance event "
1986                                     "queue overflow.  Dropping administrative "
1987                                     "request.");
1988                                 log_framework(LOG_DEBUG, "%s: Instance event "
1989                                     "queue overflow.  Dropping administrative "
1990                                     "request.\n", rip->ri_i.i_fmri);
1991                                 goto nolookup;
1992                         }
1993
1994                         /* Now add the event to the instance queue. */
1995                         restarter_queue_event(rip, e);
1996
1997                         if (rip->ri_queue_thread == 0) {
1998                                 /*
1999                                  * Start a thread if one isn't already
2000                                  * running.
2001                                  */
2002                                 fmri = safe_strdup(e->rpe_inst);
2003                                 rip->ri_queue_thread =  startd_thread_create(
2004                                     restarter_process_events, (void *)fmri);
2005                         } else {
2006                                 /*
2007                                  * Signal the existing thread that there's
2008                                  * a new event.
2009                                  */
2010                                 (void) pthread_cond_broadcast(
2011                                     &rip->ri_queue_cv);
2012                         }
2013
2014                         MUTEX_UNLOCK(&rip->ri_queue_lock);
2015 nolookup:
2016                         restarter_event_release(e);
2017
2018                         MUTEX_LOCK(&ru->restarter_update_lock);
2019                 }
2020         }
2021
2022         /*
2023          * Unreachable for now -- there's currently no graceful cleanup
2024          * called on exit().
2025          */
2026         (void) scf_handle_unbind(h);
2027         scf_handle_destroy(h);
2028         return (NULL);
2029 }
2030
2031 static restarter_inst_t *
2032 contract_to_inst(ctid_t ctid)
2033 {
2034         restarter_inst_t *inst;
2035         int id;
2036
2037         id = lookup_inst_by_contract(ctid);
2038         if (id == -1)
2039                 return (NULL);
2040
2041         inst = inst_lookup_by_id(id);
2042         if (inst != NULL) {
2043                 /*
2044                  * Since ri_lock isn't held by the contract id lookup, this
2045                  * instance may have been restarted and now be in a new
2046                  * contract, making the old contract no longer valid for this
2047                  * instance.
2048                  */
2049                 if (ctid != inst->ri_i.i_primary_ctid) {
2050                         MUTEX_UNLOCK(&inst->ri_lock);
2051                         inst = NULL;
2052                 }
2053         }
2054         return (inst);
2055 }
2056
2057 /*
2058  * void contract_action()
2059  *   Take action on contract events.
2060  */
2061 static void
2062 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2063     uint32_t type)
2064 {
2065         const char *fmri = inst->ri_i.i_fmri;
2066
2067         assert(MUTEX_HELD(&inst->ri_lock));
2068
2069         /*
2070          * If startd has stopped this contract, there is no need to
2071          * stop it again.
2072          */
2073         if (inst->ri_i.i_primary_ctid > 0 &&
2074             inst->ri_i.i_primary_ctid_stopped)
2075                 return;
2076
2077         if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2078             | CT_PR_EV_HWERR)) == 0) {
2079                 /*
2080                  * There shouldn't be other events, since that's not how we set
2081                  * the terms. Thus, just log an error and drive on.
2082                  */
2083                 log_framework(LOG_NOTICE,
2084                     "%s: contract %ld received unexpected critical event "
2085                     "(%d)\n", fmri, id, type);
2086                 return;
2087         }
2088
2089         assert(instance_in_transition(inst) == 0);
2090
2091         if (instance_is_wait_style(inst)) {
2092                 /*
2093                  * We ignore all events; if they impact the
2094                  * process we're monitoring, then the
2095                  * wait_thread will stop the instance.
2096                  */
2097                 log_framework(LOG_DEBUG,
2098                     "%s: ignoring contract event on wait-style service\n",
2099                     fmri);
2100         } else {
2101                 /*
2102                  * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2103                  */
2104                 switch (type) {
2105                 case CT_PR_EV_EMPTY:
2106                         (void) stop_instance(h, inst, RSTOP_EXIT);
2107                         break;
2108                 case CT_PR_EV_CORE:
2109                         (void) stop_instance(h, inst, RSTOP_CORE);
2110                         break;
2111                 case CT_PR_EV_SIGNAL:
2112                         (void) stop_instance(h, inst, RSTOP_SIGNAL);
2113                         break;
2114                 case CT_PR_EV_HWERR:
2115                         (void) stop_instance(h, inst, RSTOP_HWERR);
2116                         break;
2117                 }
2118         }
2119 }
2120
2121 /*
2122  * void *restarter_contract_event_thread(void *)
2123  *   Listens to the process contract bundle for critical events, taking action
2124  *   on events from contracts we know we are responsible for.
2125  */
2126 /*ARGSUSED*/
2127 static void *
2128 restarter_contracts_event_thread(void *unused)
2129 {
2130         int fd, err;
2131         scf_handle_t *local_handle;
2132
2133         /*
2134          * Await graph load completion.  That is, stop here, until we've scanned
2135          * the repository for contract - instance associations.
2136          */
2137         MUTEX_LOCK(&st->st_load_lock);
2138         while (!(st->st_load_complete && st->st_load_instances == 0))
2139                 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2140         MUTEX_UNLOCK(&st->st_load_lock);
2141
2142         /*
2143          * This is a new thread, and thus, gets its own handle
2144          * to the repository.
2145          */
2146         if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2147                 uu_die("Unable to bind a new repository handle: %s\n",
2148                     scf_strerror(scf_error()));
2149
2150         fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2151         if (fd == -1)
2152                 uu_die("process bundle open failed");
2153
2154         /*
2155          * Make sure we get all events (including those generated by configd
2156          * before this thread was started).
2157          */
2158         err = ct_event_reset(fd);
2159         assert(err == 0);
2160
2161         for (;;) {
2162                 int efd, sfd;
2163                 ct_evthdl_t ev;
2164                 uint32_t type;
2165                 ctevid_t evid;
2166                 ct_stathdl_t status;
2167                 ctid_t ctid;
2168                 restarter_inst_t *inst;
2169                 uint64_t cookie;
2170
2171                 if (err = ct_event_read_critical(fd, &ev)) {
2172                         log_error(LOG_WARNING,
2173                             "Error reading next contract event: %s",
2174                             strerror(err));
2175                         continue;
2176                 }
2177
2178                 evid = ct_event_get_evid(ev);
2179                 ctid = ct_event_get_ctid(ev);
2180                 type = ct_event_get_type(ev);
2181
2182                 /* Fetch cookie. */
2183                 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2184                     < 0) {
2185                         ct_event_free(ev);
2186                         continue;
2187                 }
2188
2189                 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2190                         log_framework(LOG_WARNING, "Could not get status for "
2191                             "contract %ld: %s\n", ctid, strerror(err));
2192
2193                         startd_close(sfd);
2194                         ct_event_free(ev);
2195                         continue;
2196                 }
2197
2198                 cookie = ct_status_get_cookie(status);
2199
2200                 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2201                     "cookie %lld\n", type, ctid, cookie);
2202
2203                 ct_status_free(status);
2204
2205                 startd_close(sfd);
2206
2207                 /*
2208                  * svc.configd(1M) restart handling performed by the
2209                  * fork_configd_thread.  We don't acknowledge, as that thread
2210                  * will do so.
2211                  */
2212                 if (cookie == CONFIGD_COOKIE) {
2213                         ct_event_free(ev);
2214                         continue;
2215                 }
2216
2217                 inst = NULL;
2218                 if (storing_contract != 0 &&
2219                     (inst = contract_to_inst(ctid)) == NULL) {
2220                         /*
2221                          * This can happen for two reasons:
2222                          * - method_run() has not yet stored the
2223                          *    the contract into the internal hash table.
2224                          * - we receive an EMPTY event for an abandoned
2225                          *    contract.
2226                          * If there is any contract in the process of
2227                          * being stored into the hash table then re-read
2228                          * the event later.
2229                          */
2230                         log_framework(LOG_DEBUG,
2231                             "Reset event %d for unknown "
2232                             "contract id %ld\n", type, ctid);
2233
2234                         /* don't go too fast */
2235                         (void) poll(NULL, 0, 100);
2236
2237                         (void) ct_event_reset(fd);
2238                         ct_event_free(ev);
2239                         continue;
2240                 }
2241
2242                 /*
2243                  * Do not call contract_to_inst() again if first
2244                  * call succeeded.
2245                  */
2246                 if (inst == NULL)
2247                         inst = contract_to_inst(ctid);
2248                 if (inst == NULL) {
2249                         /*
2250                          * This can happen if we receive an EMPTY
2251                          * event for an abandoned contract.
2252                          */
2253                         log_framework(LOG_DEBUG,
2254                             "Received event %d for unknown contract id "
2255                             "%ld\n", type, ctid);
2256                 } else {
2257                         log_framework(LOG_DEBUG,
2258                             "Received event %d for contract id "
2259                             "%ld (%s)\n", type, ctid,
2260                             inst->ri_i.i_fmri);
2261
2262                         contract_action(local_handle, inst, ctid, type);
2263
2264                         MUTEX_UNLOCK(&inst->ri_lock);
2265                 }
2266
2267                 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2268                     O_WRONLY);
2269                 if (efd != -1) {
2270                         (void) ct_ctl_ack(efd, evid);
2271                         startd_close(efd);
2272                 }
2273
2274                 ct_event_free(ev);
2275
2276         }
2277
2278         /*NOTREACHED*/
2279         return (NULL);
2280 }
2281
2282 /*
2283  * Timeout queue, processed by restarter_timeouts_event_thread().
2284  */
2285 timeout_queue_t *timeouts;
2286 static uu_list_pool_t *timeout_pool;
2287
2288 typedef struct timeout_update {
2289         pthread_mutex_t         tu_lock;
2290         pthread_cond_t          tu_cv;
2291         int                     tu_wakeup;
2292 } timeout_update_t;
2293
2294 timeout_update_t *tu;
2295
2296 static const char *timeout_ovr_svcs[] = {
2297         "svc:/system/manifest-import:default",
2298         "svc:/network/initial:default",
2299         "svc:/network/service:default",
2300         "svc:/system/rmtmpfiles:default",
2301         "svc:/network/loopback:default",
2302         "svc:/network/physical:default",
2303         "svc:/system/device/local:default",
2304         "svc:/system/metainit:default",
2305         "svc:/system/filesystem/usr:default",
2306         "svc:/system/filesystem/minimal:default",
2307         "svc:/system/filesystem/local:default",
2308         NULL
2309 };
2310
2311 int
2312 is_timeout_ovr(restarter_inst_t *inst)
2313 {
2314         int i;
2315
2316         for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2317                 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2318                         log_instance(inst, B_TRUE, "Timeout override by "
2319                             "svc.startd.  Using infinite timeout.");
2320                         return (1);
2321                 }
2322         }
2323
2324         return (0);
2325 }
2326
2327 /*ARGSUSED*/
2328 static int
2329 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2330 {
2331         hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2332         hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2333
2334         if (t1 > t2)
2335                 return (1);
2336         else if (t1 < t2)
2337                 return (-1);
2338         return (0);
2339 }
2340
2341 void
2342 timeout_init()
2343 {
2344         timeouts = startd_zalloc(sizeof (timeout_queue_t));
2345
2346         (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2347
2348         timeout_pool = startd_list_pool_create("timeouts",
2349             sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2350             timeout_compare, UU_LIST_POOL_DEBUG);
2351         assert(timeout_pool != NULL);
2352
2353         timeouts->tq_list = startd_list_create(timeout_pool,
2354             timeouts, UU_LIST_SORTED);
2355         assert(timeouts->tq_list != NULL);
2356
2357         tu = startd_zalloc(sizeof (timeout_update_t));
2358         (void) pthread_cond_init(&tu->tu_cv, NULL);
2359         (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2360 }
2361
2362 void
2363 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2364 {
2365         hrtime_t now, timeout;
2366         timeout_entry_t *entry;
2367         uu_list_index_t idx;
2368
2369         assert(MUTEX_HELD(&inst->ri_lock));
2370
2371         now = gethrtime();
2372
2373         /*
2374          * If we overflow LLONG_MAX, we're never timing out anyways, so
2375          * just return.
2376          */
2377         if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2378                 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2379                     "treating as infinite.");
2380                 return;
2381         }
2382
2383         /* hrtime is in nanoseconds. Convert timeout_sec. */
2384         timeout = now + (timeout_sec * 1000000000LL);
2385
2386         entry = startd_alloc(sizeof (timeout_entry_t));
2387         entry->te_timeout = timeout;
2388         entry->te_ctid = cid;
2389         entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2390         entry->te_logstem = safe_strdup(inst->ri_logstem);
2391         entry->te_fired = 0;
2392         /* Insert the calculated timeout time onto the queue. */
2393         MUTEX_LOCK(&timeouts->tq_lock);
2394         (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2395         uu_list_node_init(entry, &entry->te_link, timeout_pool);
2396         uu_list_insert(timeouts->tq_list, entry, idx);
2397         MUTEX_UNLOCK(&timeouts->tq_lock);
2398
2399         assert(inst->ri_timeout == NULL);
2400         inst->ri_timeout = entry;
2401
2402         MUTEX_LOCK(&tu->tu_lock);
2403         tu->tu_wakeup = 1;
2404         (void) pthread_cond_broadcast(&tu->tu_cv);
2405         MUTEX_UNLOCK(&tu->tu_lock);
2406 }
2407
2408
2409 void
2410 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2411 {
2412         assert(MUTEX_HELD(&inst->ri_lock));
2413
2414         if (inst->ri_timeout == NULL)
2415                 return;
2416
2417         assert(inst->ri_timeout->te_ctid == cid);
2418
2419         MUTEX_LOCK(&timeouts->tq_lock);
2420         uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2421         MUTEX_UNLOCK(&timeouts->tq_lock);
2422
2423         free(inst->ri_timeout->te_fmri);
2424         free(inst->ri_timeout->te_logstem);
2425         startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2426         inst->ri_timeout = NULL;
2427 }
2428
2429 static int
2430 timeout_now()
2431 {
2432         timeout_entry_t *e;
2433         hrtime_t now;
2434         int ret;
2435
2436         now = gethrtime();
2437
2438         /*
2439          * Walk through the (sorted) timeouts list.  While the timeout
2440          * at the head of the list is <= the current time, kill the
2441          * method.
2442          */
2443         MUTEX_LOCK(&timeouts->tq_lock);
2444
2445         for (e = uu_list_first(timeouts->tq_list);
2446             e != NULL && e->te_timeout <= now;
2447             e = uu_list_next(timeouts->tq_list, e)) {
2448                 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2449                     "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2450                 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2451                     "Method or service exit timed out.  Killing contract %ld.",
2452                     e->te_ctid);
2453                 e->te_fired = 1;
2454                 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2455         }
2456
2457         if (uu_list_numnodes(timeouts->tq_list) > 0)
2458                 ret = 0;
2459         else
2460                 ret = -1;
2461
2462         MUTEX_UNLOCK(&timeouts->tq_lock);
2463
2464         return (ret);
2465 }
2466
2467 /*
2468  * void *restarter_timeouts_event_thread(void *)
2469  *   Responsible for monitoring the method timeouts.  This thread must
2470  *   be started before any methods are called.
2471  */
2472 /*ARGSUSED*/
2473 static void *
2474 restarter_timeouts_event_thread(void *unused)
2475 {
2476         /*
2477          * Timeouts are entered on a priority queue, which is processed by
2478          * this thread.  As timeouts are specified in seconds, we'll do
2479          * the necessary processing every second, as long as the queue
2480          * is not empty.
2481          */
2482
2483         /*CONSTCOND*/
2484         while (1) {
2485                 /*
2486                  * As long as the timeout list isn't empty, process it
2487                  * every second.
2488                  */
2489                 if (timeout_now() == 0) {
2490                         (void) sleep(1);
2491                         continue;
2492                 }
2493
2494                 /* The list is empty, wait until we have more timeouts. */
2495                 MUTEX_LOCK(&tu->tu_lock);
2496
2497                 while (tu->tu_wakeup == 0)
2498                         (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2499
2500                 tu->tu_wakeup = 0;
2501                 MUTEX_UNLOCK(&tu->tu_lock);
2502         }
2503
2504         return (NULL);
2505 }
2506
2507 void
2508 restarter_start()
2509 {
2510         (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2511         (void) startd_thread_create(restarter_event_thread, NULL);
2512         (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2513         (void) startd_thread_create(wait_thread, NULL);
2514 }
2515
2516
2517 void
2518 restarter_init()
2519 {
2520         restarter_instance_pool = startd_list_pool_create("restarter_instances",
2521             sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2522             ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2523         (void) memset(&instance_list, 0, sizeof (instance_list));
2524
2525         (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2526         instance_list.ril_instance_list = startd_list_create(
2527             restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2528
2529         restarter_queue_pool = startd_list_pool_create(
2530             "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2531             offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2532             UU_LIST_POOL_DEBUG);
2533
2534         contract_list_pool = startd_list_pool_create(
2535             "contract_list", sizeof (contract_entry_t),
2536             offsetof(contract_entry_t,  ce_link), NULL,
2537             UU_LIST_POOL_DEBUG);
2538         contract_hash_init();
2539
2540         log_framework(LOG_DEBUG, "Initialized restarter\n");
2541 }