usr/src/cmd/svc/startd/restarter.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  25  */
  26
  27 /*
  28  * restarter.c - service manipulation
  29  *
  30  * This component manages services whose restarter is svc.startd, the standard
  31  * restarter.  It translates restarter protocol events from the graph engine
  32  * into actions on processes, as a delegated restarter would do.
  33  *
  34  * The master restarter manages a number of always-running threads:
  35  *   - restarter event thread: events from the graph engine
  36  *   - timeout thread: thread to fire queued timeouts
  37  *   - contract thread: thread to handle contract events
  38  *   - wait thread: thread to handle wait-based services
  39  *
  40  * The other threads are created as-needed:
  41  *   - per-instance method threads
  42  *   - per-instance event processing threads
  43  *
  44  * The interaction of all threads must result in the following conditions
  45  * being satisfied (on a per-instance basis):
  46  *   - restarter events must be processed in order
  47  *   - method execution must be serialized
  48  *   - instance delete must be held until outstanding methods are complete
  49  *   - contract events shouldn't be processed while a method is running
  50  *   - timeouts should fire even when a method is running
  51  *
  52  * Service instances are represented by restarter_inst_t's and are kept in the
  53  * instance_list list.
  54  *
  55  * Service States
  56  *   The current state of a service instance is kept in
  57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
  58  *   some time, then before we effect the transition we set
  59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
  60  *   rotate i_next_state to i_state and set i_next_state to
  61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
  62  *   held.  The exception is when we launch methods, which are done with
  63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
  64  *   method_thread() does, we set ri_method_thread to the thread id of the
  65  *   method thread, and when it is nonzero any thread with a different thread id
  66  *   waits on ri_method_cv.
  67  *
  68  * Method execution is serialized by blocking on ri_method_cv in
  69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
  70  * also prevents the instance structure from being deleted until all
  71  * outstanding operations such as method_thread() have finished.
  72  *
  73  * Lock ordering:
  74  *
  75  * dgraph_lock [can be held when taking:]
  76  *   utmpx_lock
  77  *   dictionary->dict_lock
  78  *   st->st_load_lock
  79  *   wait_info_lock
  80  *   ru->restarter_update_lock
  81  *     restarter_queue->rpeq_lock
  82  *   instance_list.ril_lock
  83  *     inst->ri_lock
  84  *   st->st_configd_live_lock
  85  *
  86  * instance_list.ril_lock
  87  *   graph_queue->gpeq_lock
  88  *   gu->gu_lock
  89  *   st->st_configd_live_lock
  90  *   dictionary->dict_lock
  91  *   inst->ri_lock
  92  *     graph_queue->gpeq_lock
  93  *     gu->gu_lock
  94  *     tu->tu_lock
  95  *     tq->tq_lock
  96  *     inst->ri_queue_lock
  97  *       wait_info_lock
  98  *       bp->cb_lock
  99  *     utmpx_lock
 100  *
 101  * single_user_thread_lock
 102  *   wait_info_lock
 103  *   utmpx_lock
 104  *
 105  * gu_freeze_lock
 106  *
 107  * logbuf_mutex nests inside pretty much everything.
 108  */
 109
 110 #include <sys/contract/process.h>
 111 #include <sys/ctfs.h>
 112 #include <sys/stat.h>
 113 #include <sys/time.h>
 114 #include <sys/types.h>
 115 #include <sys/uio.h>
 116 #include <sys/wait.h>
 117 #include <assert.h>
 118 #include <errno.h>
 119 #include <fcntl.h>
 120 #include <libcontract.h>
 121 #include <libcontract_priv.h>
 122 #include <libintl.h>
 123 #include <librestart.h>
 124 #include <librestart_priv.h>
 125 #include <libuutil.h>
 126 #include <limits.h>
 127 #include <poll.h>
 128 #include <port.h>
 129 #include <pthread.h>
 130 #include <stdarg.h>
 131 #include <stdio.h>
 132 #include <strings.h>
 133 #include <unistd.h>
 134
 135 #include "startd.h"
 136 #include "protocol.h"
 137
 138 static uu_list_pool_t *restarter_instance_pool;
 139 static restarter_instance_list_t instance_list;
 140
 141 static uu_list_pool_t *restarter_queue_pool;
 142
 143 #define WT_SVC_ERR_THROTTLE     1       /* 1 sec delay for erroring wait svc */
 144
 145 /*
 146  * Function used to reset the restart times for an instance, when
 147  * an administrative task comes along and essentially makes the times
 148  * in this array ineffective.
 149  */
 150 static void
 151 reset_start_times(restarter_inst_t *inst)
 152 {
 153         inst->ri_start_index = 0;
 154         bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
 155 }
 156
 157 /*ARGSUSED*/
 158 static int
 159 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
 160     void *private)
 161 {
 162         int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
 163         int rc_id = *(int *)rc_arg;
 164
 165         if (lc_id > rc_id)
 166                 return (1);
 167         if (lc_id < rc_id)
 168                 return (-1);
 169         return (0);
 170 }
 171
 172 static restarter_inst_t *
 173 inst_lookup_by_name(const char *name)
 174 {
 175         int id;
 176
 177         id = dict_lookup_byname(name);
 178         if (id == -1)
 179                 return (NULL);
 180
 181         return (inst_lookup_by_id(id));
 182 }
 183
 184 restarter_inst_t *
 185 inst_lookup_by_id(int id)
 186 {
 187         restarter_inst_t *inst;
 188
 189         MUTEX_LOCK(&instance_list.ril_lock);
 190         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 191         if (inst != NULL)
 192                 MUTEX_LOCK(&inst->ri_lock);
 193         MUTEX_UNLOCK(&instance_list.ril_lock);
 194
 195         if (inst != NULL) {
 196                 while (inst->ri_method_thread != 0 &&
 197                     !pthread_equal(inst->ri_method_thread, pthread_self())) {
 198                         ++inst->ri_method_waiters;
 199                         (void) pthread_cond_wait(&inst->ri_method_cv,
 200                             &inst->ri_lock);
 201                         assert(inst->ri_method_waiters > 0);
 202                         --inst->ri_method_waiters;
 203                 }
 204         }
 205
 206         return (inst);
 207 }
 208
 209 static restarter_inst_t *
 210 inst_lookup_queue(const char *name)
 211 {
 212         int id;
 213         restarter_inst_t *inst;
 214
 215         id = dict_lookup_byname(name);
 216         if (id == -1)
 217                 return (NULL);
 218
 219         MUTEX_LOCK(&instance_list.ril_lock);
 220         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 221         if (inst != NULL)
 222                 MUTEX_LOCK(&inst->ri_queue_lock);
 223         MUTEX_UNLOCK(&instance_list.ril_lock);
 224
 225         return (inst);
 226 }
 227
 228 const char *
 229 service_style(int flags)
 230 {
 231         switch (flags & RINST_STYLE_MASK) {
 232         case RINST_CONTRACT:    return ("contract");
 233         case RINST_TRANSIENT:   return ("transient");
 234         case RINST_WAIT:        return ("wait");
 235
 236         default:
 237 #ifndef NDEBUG
 238                 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
 239 #endif
 240                 abort();
 241                 /* NOTREACHED */
 242         }
 243 }
 244
 245 /*
 246  * Fails with ECONNABORTED or ECANCELED.
 247  */
 248 static int
 249 check_contract(restarter_inst_t *inst, boolean_t primary,
 250     scf_instance_t *scf_inst)
 251 {
 252         ctid_t *ctidp;
 253         int fd, r;
 254
 255         ctidp = primary ? &inst->ri_i.i_primary_ctid :
 256             &inst->ri_i.i_transient_ctid;
 257
 258         assert(*ctidp >= 1);
 259
 260         fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
 261         if (fd >= 0) {
 262                 r = close(fd);
 263                 assert(r == 0);
 264                 return (0);
 265         }
 266
 267         r = restarter_remove_contract(scf_inst, *ctidp, primary ?
 268             RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
 269         switch (r) {
 270         case 0:
 271         case ECONNABORTED:
 272         case ECANCELED:
 273                 *ctidp = 0;
 274                 return (r);
 275
 276         case ENOMEM:
 277                 uu_die("Out of memory\n");
 278                 /* NOTREACHED */
 279
 280         case EPERM:
 281                 uu_die("Insufficient privilege.\n");
 282                 /* NOTREACHED */
 283
 284         case EACCES:
 285                 uu_die("Repository backend access denied.\n");
 286                 /* NOTREACHED */
 287
 288         case EROFS:
 289                 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
 290                     "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
 291                 return (0);
 292
 293         case EINVAL:
 294         case EBADF:
 295         default:
 296                 assert(0);
 297                 abort();
 298                 /* NOTREACHED */
 299         }
 300 }
 301
 302 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
 303
 304 /*
 305  * int restarter_insert_inst(scf_handle_t *, char *)
 306  *   If the inst is already in the restarter list, return its id.  If the inst
 307  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
 308  *   states, insert it into the list, and return 0.
 309  *
 310  *   Fails with
 311  *     ENOENT - name is not in the repository
 312  */
 313 static int
 314 restarter_insert_inst(scf_handle_t *h, const char *name)
 315 {
 316         int id, r;
 317         restarter_inst_t *inst;
 318         uu_list_index_t idx;
 319         scf_service_t *scf_svc;
 320         scf_instance_t *scf_inst;
 321         scf_snapshot_t *snap = NULL;
 322         scf_propertygroup_t *pg;
 323         char *svc_name, *inst_name;
 324         char logfilebuf[PATH_MAX];
 325         char *c;
 326         boolean_t do_commit_states;
 327         restarter_instance_state_t state, next_state;
 328         protocol_states_t *ps;
 329         pid_t start_pid;
 330         restarter_str_t reason = restarter_str_insert_in_graph;
 331
 332         MUTEX_LOCK(&instance_list.ril_lock);
 333
 334         /*
 335          * We don't use inst_lookup_by_name() here because we want the lookup
 336          * & insert to be atomic.
 337          */
 338         id = dict_lookup_byname(name);
 339         if (id != -1) {
 340                 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
 341                     &idx);
 342                 if (inst != NULL) {
 343                         MUTEX_UNLOCK(&instance_list.ril_lock);
 344                         return (0);
 345                 }
 346         }
 347
 348         /* Allocate an instance */
 349         inst = startd_zalloc(sizeof (restarter_inst_t));
 350         inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
 351         inst->ri_utmpx_prefix[0] = '\0';
 352
 353         inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
 354         (void) strcpy((char *)inst->ri_i.i_fmri, name);
 355
 356         inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
 357
 358         /*
 359          * id shouldn't be -1 since we use the same dictionary as graph.c, but
 360          * just in case.
 361          */
 362         inst->ri_id = (id != -1 ? id : dict_insert(name));
 363
 364         special_online_hooks_get(name, &inst->ri_pre_online_hook,
 365             &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
 366
 367         scf_svc = safe_scf_service_create(h);
 368         scf_inst = safe_scf_instance_create(h);
 369         pg = safe_scf_pg_create(h);
 370         svc_name = startd_alloc(max_scf_name_size);
 371         inst_name = startd_alloc(max_scf_name_size);
 372
 373 rep_retry:
 374         if (snap != NULL)
 375                 scf_snapshot_destroy(snap);
 376         if (inst->ri_logstem != NULL)
 377                 startd_free(inst->ri_logstem, PATH_MAX);
 378         free(inst->ri_common_name);
 379         free(inst->ri_C_common_name);
 380         snap = NULL;
 381         inst->ri_logstem = NULL;
 382         inst->ri_common_name = NULL;
 383         inst->ri_C_common_name = NULL;
 384
 385         if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
 386             NULL, SCF_DECODE_FMRI_EXACT) != 0) {
 387                 switch (scf_error()) {
 388                 case SCF_ERROR_CONNECTION_BROKEN:
 389                         libscf_handle_rebind(h);
 390                         goto rep_retry;
 391
 392                 case SCF_ERROR_NOT_FOUND:
 393                         goto deleted;
 394                 }
 395
 396                 uu_die("Can't decode FMRI %s: %s\n", name,
 397                     scf_strerror(scf_error()));
 398         }
 399
 400         /*
 401          * If there's no running snapshot, then we execute using the editing
 402          * snapshot.  Pending snapshots will be taken later.
 403          */
 404         snap = libscf_get_running_snapshot(scf_inst);
 405
 406         if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
 407             (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
 408             0)) {
 409                 switch (scf_error()) {
 410                 case SCF_ERROR_NOT_SET:
 411                         break;
 412
 413                 case SCF_ERROR_CONNECTION_BROKEN:
 414                         libscf_handle_rebind(h);
 415                         goto rep_retry;
 416
 417                 default:
 418                         assert(0);
 419                         abort();
 420                 }
 421
 422                 goto deleted;
 423         }
 424
 425         (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
 426         for (c = logfilebuf; *c != '\0'; c++)
 427                 if (*c == '/')
 428                         *c = '-';
 429
 430         inst->ri_logstem = startd_alloc(PATH_MAX);
 431         (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
 432             LOG_SUFFIX);
 433
 434         /*
 435          * If the restarter group is missing, use uninit/none.  Otherwise,
 436          * we're probably being restarted & don't want to mess up the states
 437          * that are there.
 438          */
 439         state = RESTARTER_STATE_UNINIT;
 440         next_state = RESTARTER_STATE_NONE;
 441
 442         r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
 443         if (r != 0) {
 444                 switch (scf_error()) {
 445                 case SCF_ERROR_CONNECTION_BROKEN:
 446                         libscf_handle_rebind(h);
 447                         goto rep_retry;
 448
 449                 case SCF_ERROR_NOT_SET:
 450                         goto deleted;
 451
 452                 case SCF_ERROR_NOT_FOUND:
 453                         /*
 454                          * This shouldn't happen since the graph engine should
 455                          * have initialized the state to uninitialized/none if
 456                          * there was no restarter pg.  In case somebody
 457                          * deleted it, though....
 458                          */
 459                         do_commit_states = B_TRUE;
 460                         break;
 461
 462                 default:
 463                         assert(0);
 464                         abort();
 465                 }
 466         } else {
 467                 r = libscf_read_states(pg, &state, &next_state);
 468                 if (r != 0) {
 469                         do_commit_states = B_TRUE;
 470                 } else {
 471                         if (next_state != RESTARTER_STATE_NONE) {
 472                                 /*
 473                                  * Force next_state to _NONE since we
 474                                  * don't look for method processes.
 475                                  */
 476                                 next_state = RESTARTER_STATE_NONE;
 477                                 do_commit_states = B_TRUE;
 478                         } else {
 479                                 /*
 480                                  * The reason for transition will depend on
 481                                  * state.
 482                                  */
 483                                 if (st->st_initial == 0)
 484                                         reason = restarter_str_startd_restart;
 485                                 else if (state == RESTARTER_STATE_MAINT)
 486                                         reason = restarter_str_bad_repo_state;
 487                                 /*
 488                                  * Inform the restarter of our state without
 489                                  * changing the STIME in the repository.
 490                                  */
 491                                 ps = startd_alloc(sizeof (*ps));
 492                                 inst->ri_i.i_state = ps->ps_state = state;
 493                                 inst->ri_i.i_next_state = ps->ps_state_next =
 494                                     next_state;
 495                                 ps->ps_reason = reason;
 496
 497                                 graph_protocol_send_event(inst->ri_i.i_fmri,
 498                                     GRAPH_UPDATE_STATE_CHANGE, ps);
 499
 500                                 do_commit_states = B_FALSE;
 501                         }
 502                 }
 503         }
 504
 505         switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
 506             &inst->ri_utmpx_prefix)) {
 507         case 0:
 508                 break;
 509
 510         case ECONNABORTED:
 511                 libscf_handle_rebind(h);
 512                 goto rep_retry;
 513
 514         case ECANCELED:
 515                 goto deleted;
 516
 517         case ENOENT:
 518                 /*
 519                  * This is odd, because the graph engine should have required
 520                  * the general property group.  So we'll just use default
 521                  * flags in anticipation of the graph engine sending us
 522                  * REMOVE_INSTANCE when it finds out that the general property
 523                  * group has been deleted.
 524                  */
 525                 inst->ri_flags = RINST_CONTRACT;
 526                 break;
 527
 528         default:
 529                 assert(0);
 530                 abort();
 531         }
 532
 533         r = libscf_get_template_values(scf_inst, snap,
 534             &inst->ri_common_name, &inst->ri_C_common_name);
 535
 536         /*
 537          * Copy our names to smaller buffers to reduce our memory footprint.
 538          */
 539         if (inst->ri_common_name != NULL) {
 540                 char *tmp = safe_strdup(inst->ri_common_name);
 541                 startd_free(inst->ri_common_name, max_scf_value_size);
 542                 inst->ri_common_name = tmp;
 543         }
 544
 545         if (inst->ri_C_common_name != NULL) {
 546                 char *tmp = safe_strdup(inst->ri_C_common_name);
 547                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 548                 inst->ri_C_common_name = tmp;
 549         }
 550
 551         switch (r) {
 552         case 0:
 553                 break;
 554
 555         case ECONNABORTED:
 556                 libscf_handle_rebind(h);
 557                 goto rep_retry;
 558
 559         case ECANCELED:
 560                 goto deleted;
 561
 562         case ECHILD:
 563         case ENOENT:
 564                 break;
 565
 566         default:
 567                 assert(0);
 568                 abort();
 569         }
 570
 571         switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
 572             &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
 573             &start_pid)) {
 574         case 0:
 575                 break;
 576
 577         case ECONNABORTED:
 578                 libscf_handle_rebind(h);
 579                 goto rep_retry;
 580
 581         case ECANCELED:
 582                 goto deleted;
 583
 584         default:
 585                 assert(0);
 586                 abort();
 587         }
 588
 589         if (inst->ri_i.i_primary_ctid >= 1) {
 590                 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
 591
 592                 switch (check_contract(inst, B_TRUE, scf_inst)) {
 593                 case 0:
 594                         break;
 595
 596                 case ECONNABORTED:
 597                         libscf_handle_rebind(h);
 598                         goto rep_retry;
 599
 600                 case ECANCELED:
 601                         goto deleted;
 602
 603                 default:
 604                         assert(0);
 605                         abort();
 606                 }
 607         }
 608
 609         if (inst->ri_i.i_transient_ctid >= 1) {
 610                 switch (check_contract(inst, B_FALSE, scf_inst)) {
 611                 case 0:
 612                         break;
 613
 614                 case ECONNABORTED:
 615                         libscf_handle_rebind(h);
 616                         goto rep_retry;
 617
 618                 case ECANCELED:
 619                         goto deleted;
 620
 621                 default:
 622                         assert(0);
 623                         abort();
 624                 }
 625         }
 626
 627         /* No more failures we live through, so add it to the list. */
 628         (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
 629         (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
 630         MUTEX_LOCK(&inst->ri_lock);
 631         MUTEX_LOCK(&inst->ri_queue_lock);
 632
 633         (void) pthread_cond_init(&inst->ri_method_cv, NULL);
 634
 635         uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
 636         uu_list_insert(instance_list.ril_instance_list, inst, idx);
 637         MUTEX_UNLOCK(&instance_list.ril_lock);
 638
 639         if (start_pid != -1 &&
 640             (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
 641                 int ret;
 642                 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
 643                 if (ret == -1) {
 644                         /*
 645                          * Implication:  if we can't reregister the
 646                          * instance, we will start another one.  Two
 647                          * instances may or may not result in a resource
 648                          * conflict.
 649                          */
 650                         log_error(LOG_WARNING,
 651                             "%s: couldn't reregister %ld for wait\n",
 652                             inst->ri_i.i_fmri, start_pid);
 653                 } else if (ret == 1) {
 654                         /*
 655                          * Leading PID has exited.
 656                          */
 657                         (void) stop_instance(h, inst, RSTOP_EXIT);
 658                 }
 659         }
 660
 661
 662         scf_pg_destroy(pg);
 663
 664         if (do_commit_states)
 665                 (void) restarter_instance_update_states(h, inst, state,
 666                     next_state, RERR_NONE, reason);
 667
 668         log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
 669             service_style(inst->ri_flags));
 670
 671         MUTEX_UNLOCK(&inst->ri_queue_lock);
 672         MUTEX_UNLOCK(&inst->ri_lock);
 673
 674         startd_free(svc_name, max_scf_name_size);
 675         startd_free(inst_name, max_scf_name_size);
 676         scf_snapshot_destroy(snap);
 677         scf_instance_destroy(scf_inst);
 678         scf_service_destroy(scf_svc);
 679
 680         log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
 681             name);
 682
 683         return (0);
 684
 685 deleted:
 686         MUTEX_UNLOCK(&instance_list.ril_lock);
 687         startd_free(inst_name, max_scf_name_size);
 688         startd_free(svc_name, max_scf_name_size);
 689         if (snap != NULL)
 690                 scf_snapshot_destroy(snap);
 691         scf_pg_destroy(pg);
 692         scf_instance_destroy(scf_inst);
 693         scf_service_destroy(scf_svc);
 694         startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
 695         uu_list_destroy(inst->ri_queue);
 696         if (inst->ri_logstem != NULL)
 697                 startd_free(inst->ri_logstem, PATH_MAX);
 698         free(inst->ri_common_name);
 699         free(inst->ri_C_common_name);
 700         startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
 701         startd_free(inst, sizeof (restarter_inst_t));
 702         return (ENOENT);
 703 }
 704
 705 static void
 706 restarter_delete_inst(restarter_inst_t *ri)
 707 {
 708         int id;
 709         restarter_inst_t *rip;
 710         void *cookie = NULL;
 711         restarter_instance_qentry_t *e;
 712
 713         assert(MUTEX_HELD(&ri->ri_lock));
 714
 715         /*
 716          * Must drop the instance lock so we can pick up the instance_list
 717          * lock & remove the instance.
 718          */
 719         id = ri->ri_id;
 720         MUTEX_UNLOCK(&ri->ri_lock);
 721
 722         MUTEX_LOCK(&instance_list.ril_lock);
 723
 724         rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 725         if (rip == NULL) {
 726                 MUTEX_UNLOCK(&instance_list.ril_lock);
 727                 return;
 728         }
 729
 730         assert(ri == rip);
 731
 732         uu_list_remove(instance_list.ril_instance_list, ri);
 733
 734         log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
 735             ri->ri_i.i_fmri);
 736
 737         MUTEX_UNLOCK(&instance_list.ril_lock);
 738
 739         /*
 740          * We can lock the instance without holding the instance_list lock
 741          * since we removed the instance from the list.
 742          */
 743         MUTEX_LOCK(&ri->ri_lock);
 744         MUTEX_LOCK(&ri->ri_queue_lock);
 745
 746         if (ri->ri_i.i_primary_ctid >= 1)
 747                 contract_hash_remove(ri->ri_i.i_primary_ctid);
 748
 749         while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
 750                 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
 751
 752         while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
 753                 startd_free(e, sizeof (*e));
 754         uu_list_destroy(ri->ri_queue);
 755
 756         startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
 757         startd_free(ri->ri_logstem, PATH_MAX);
 758         free(ri->ri_common_name);
 759         free(ri->ri_C_common_name);
 760         startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
 761         (void) pthread_mutex_destroy(&ri->ri_lock);
 762         (void) pthread_mutex_destroy(&ri->ri_queue_lock);
 763         startd_free(ri, sizeof (restarter_inst_t));
 764 }
 765
 766 /*
 767  * instance_is_wait_style()
 768  *
 769  *   Returns 1 if the given instance is a "wait-style" service instance.
 770  */
 771 int
 772 instance_is_wait_style(restarter_inst_t *inst)
 773 {
 774         assert(MUTEX_HELD(&inst->ri_lock));
 775         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
 776 }
 777
 778 /*
 779  * instance_is_transient_style()
 780  *
 781  *   Returns 1 if the given instance is a transient service instance.
 782  */
 783 int
 784 instance_is_transient_style(restarter_inst_t *inst)
 785 {
 786         assert(MUTEX_HELD(&inst->ri_lock));
 787         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
 788 }
 789
 790 /*
 791  * instance_in_transition()
 792  * Returns 1 if instance is in transition, 0 if not
 793  */
 794 int
 795 instance_in_transition(restarter_inst_t *inst)
 796 {
 797         assert(MUTEX_HELD(&inst->ri_lock));
 798         if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
 799                 return (0);
 800         return (1);
 801 }
 802
 803 /*
 804  * returns 1 if instance is already started, 0 if not
 805  */
 806 static int
 807 instance_started(restarter_inst_t *inst)
 808 {
 809         int ret;
 810
 811         assert(MUTEX_HELD(&inst->ri_lock));
 812
 813         if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
 814             inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
 815                 ret = 1;
 816         else
 817                 ret = 0;
 818
 819         return (ret);
 820 }
 821
 822 /*
 823  * Returns
 824  *   0 - success
 825  *   ECONNRESET - success, but h was rebound
 826  */
 827 int
 828 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
 829     restarter_instance_state_t new_state,
 830     restarter_instance_state_t new_state_next, restarter_error_t err,
 831     restarter_str_t reason)
 832 {
 833         protocol_states_t *states;
 834         int e;
 835         uint_t retry_count = 0, msecs = ALLOC_DELAY;
 836         boolean_t rebound = B_FALSE;
 837         int prev_state_online;
 838         int state_online;
 839
 840         assert(MUTEX_HELD(&ri->ri_lock));
 841
 842         prev_state_online = instance_started(ri);
 843
 844 retry:
 845         e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
 846             restarter_get_str_short(reason));
 847         switch (e) {
 848         case 0:
 849                 break;
 850
 851         case ENOMEM:
 852                 ++retry_count;
 853                 if (retry_count < ALLOC_RETRY) {
 854                         (void) poll(NULL, 0, msecs);
 855                         msecs *= ALLOC_DELAY_MULT;
 856                         goto retry;
 857                 }
 858
 859                 /* Like startd_alloc(). */
 860                 uu_die("Insufficient memory.\n");
 861                 /* NOTREACHED */
 862
 863         case ECONNABORTED:
 864                 libscf_handle_rebind(h);
 865                 rebound = B_TRUE;
 866                 goto retry;
 867
 868         case EPERM:
 869         case EACCES:
 870         case EROFS:
 871                 log_error(LOG_NOTICE, "Could not commit state change for %s "
 872                     "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
 873                 /* FALLTHROUGH */
 874
 875         case ENOENT:
 876                 ri->ri_i.i_state = new_state;
 877                 ri->ri_i.i_next_state = new_state_next;
 878                 break;
 879
 880         case EINVAL:
 881         default:
 882                 bad_error("_restarter_commit_states", e);
 883         }
 884
 885         states = startd_alloc(sizeof (protocol_states_t));
 886         states->ps_state = new_state;
 887         states->ps_state_next = new_state_next;
 888         states->ps_err = err;
 889         states->ps_reason = reason;
 890         graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
 891             (void *)states);
 892
 893         state_online = instance_started(ri);
 894
 895         if (prev_state_online && !state_online)
 896                 ri->ri_post_offline_hook();
 897         else if (!prev_state_online && state_online)
 898                 ri->ri_post_online_hook();
 899
 900         return (rebound ? ECONNRESET : 0);
 901 }
 902
 903 void
 904 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
 905 {
 906         restarter_inst_t *inst;
 907
 908         assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
 909
 910         inst = inst_lookup_by_name(fmri);
 911         if (inst == NULL)
 912                 return;
 913
 914         inst->ri_flags |= flag;
 915
 916         MUTEX_UNLOCK(&inst->ri_lock);
 917 }
 918
 919 static void
 920 restarter_take_pending_snapshots(scf_handle_t *h)
 921 {
 922         restarter_inst_t *inst;
 923         int r;
 924
 925         MUTEX_LOCK(&instance_list.ril_lock);
 926
 927         for (inst = uu_list_first(instance_list.ril_instance_list);
 928             inst != NULL;
 929             inst = uu_list_next(instance_list.ril_instance_list, inst)) {
 930                 const char *fmri;
 931                 scf_instance_t *sinst = NULL;
 932
 933                 MUTEX_LOCK(&inst->ri_lock);
 934
 935                 /*
 936                  * This is where we'd check inst->ri_method_thread and if it
 937                  * were nonzero we'd wait in anticipation of another thread
 938                  * executing a method for inst.  Doing so with the instance_list
 939                  * locked, though, leads to deadlock.  Since taking a snapshot
 940                  * during that window won't hurt anything, we'll just continue.
 941                  */
 942
 943                 fmri = inst->ri_i.i_fmri;
 944
 945                 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
 946                         scf_snapshot_t *rsnap;
 947
 948                         (void) libscf_fmri_get_instance(h, fmri, &sinst);
 949
 950                         rsnap = libscf_get_or_make_running_snapshot(sinst,
 951                             fmri, B_FALSE);
 952
 953                         scf_instance_destroy(sinst);
 954
 955                         if (rsnap != NULL)
 956                                 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
 957
 958                         scf_snapshot_destroy(rsnap);
 959                 }
 960
 961                 if (inst->ri_flags & RINST_RETAKE_START) {
 962                         switch (r = libscf_snapshots_poststart(h, fmri,
 963                             B_FALSE)) {
 964                         case 0:
 965                         case ENOENT:
 966                                 inst->ri_flags &= ~RINST_RETAKE_START;
 967                                 break;
 968
 969                         case ECONNABORTED:
 970                                 break;
 971
 972                         case EACCES:
 973                         default:
 974                                 bad_error("libscf_snapshots_poststart", r);
 975                         }
 976                 }
 977
 978                 MUTEX_UNLOCK(&inst->ri_lock);
 979         }
 980
 981         MUTEX_UNLOCK(&instance_list.ril_lock);
 982 }
 983
 984 /* ARGSUSED */
 985 void *
 986 restarter_post_fsminimal_thread(void *unused)
 987 {
 988         scf_handle_t *h;
 989         int r;
 990
 991         h = libscf_handle_create_bound_loop();
 992
 993         for (;;) {
 994                 r = libscf_create_self(h);
 995                 if (r == 0)
 996                         break;
 997
 998                 assert(r == ECONNABORTED);
 999                 libscf_handle_rebind(h);
1000         }
1001
1002         restarter_take_pending_snapshots(h);
1003
1004         (void) scf_handle_unbind(h);
1005         scf_handle_destroy(h);
1006
1007         return (NULL);
1008 }
1009
1010 /*
1011  * int stop_instance()
1012  *
1013  *   Stop the instance identified by the instance given as the second argument,
1014  *   for the cause stated.
1015  *
1016  *   Returns
1017  *     0 - success
1018  *     -1 - inst is in transition
1019  */
1020 static int
1021 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1022     stop_cause_t cause)
1023 {
1024         fork_info_t *info;
1025         const char *cp;
1026         int err;
1027         restarter_error_t re;
1028         restarter_str_t reason;
1029         restarter_instance_state_t new_state;
1030
1031         assert(MUTEX_HELD(&inst->ri_lock));
1032         assert(inst->ri_method_thread == 0);
1033
1034         switch (cause) {
1035         case RSTOP_EXIT:
1036                 re = RERR_RESTART;
1037                 reason = restarter_str_ct_ev_exit;
1038                 cp = "all processes in service exited";
1039                 break;
1040         case RSTOP_ERR_CFG:
1041                 re = RERR_FAULT;
1042                 reason = restarter_str_method_failed;
1043                 cp = "service exited with a configuration error";
1044                 break;
1045         case RSTOP_ERR_EXIT:
1046                 re = RERR_RESTART;
1047                 reason = restarter_str_ct_ev_exit;
1048                 cp = "service exited with an error";
1049                 break;
1050         case RSTOP_CORE:
1051                 re = RERR_FAULT;
1052                 reason = restarter_str_ct_ev_core;
1053                 cp = "process dumped core";
1054                 break;
1055         case RSTOP_SIGNAL:
1056                 re = RERR_FAULT;
1057                 reason = restarter_str_ct_ev_signal;
1058                 cp = "process received fatal signal from outside the service";
1059                 break;
1060         case RSTOP_HWERR:
1061                 re = RERR_FAULT;
1062                 reason = restarter_str_ct_ev_hwerr;
1063                 cp = "process killed due to uncorrectable hardware error";
1064                 break;
1065         case RSTOP_DEPENDENCY:
1066                 re = RERR_RESTART;
1067                 reason = restarter_str_dependency_activity;
1068                 cp = "dependency activity requires stop";
1069                 break;
1070         case RSTOP_DISABLE:
1071                 re = RERR_RESTART;
1072                 reason = restarter_str_disable_request;
1073                 cp = "service disabled";
1074                 break;
1075         case RSTOP_RESTART:
1076                 re = RERR_RESTART;
1077                 reason = restarter_str_restart_request;
1078                 cp = "service restarting";
1079                 break;
1080         default:
1081 #ifndef NDEBUG
1082                 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1083                     cause, __FILE__, __LINE__);
1084 #endif
1085                 abort();
1086         }
1087
1088         /* Services in the disabled and maintenance state are ignored */
1089         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1090             inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1091                 log_framework(LOG_DEBUG,
1092                     "%s: stop_instance -> is maint/disabled\n",
1093                     inst->ri_i.i_fmri);
1094                 return (0);
1095         }
1096
1097         /* Already stopped instances are left alone */
1098         if (instance_started(inst) == 0) {
1099                 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1100                     inst->ri_i.i_fmri);
1101                 return (0);
1102         }
1103
1104         if (instance_in_transition(inst)) {
1105                 /* requeue event by returning -1 */
1106                 log_framework(LOG_DEBUG,
1107                     "Restarter: Not stopping %s, in transition.\n",
1108                     inst->ri_i.i_fmri);
1109                 return (-1);
1110         }
1111
1112         log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1113
1114         log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1115             "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1116
1117         if (instance_is_wait_style(inst) &&
1118             (cause == RSTOP_EXIT ||
1119             cause == RSTOP_ERR_CFG ||
1120             cause == RSTOP_ERR_EXIT)) {
1121                 /*
1122                  * No need to stop instance, as child has exited; remove
1123                  * contract and move the instance to the offline state.
1124                  */
1125                 switch (err = restarter_instance_update_states(local_handle,
1126                     inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1127                     reason)) {
1128                 case 0:
1129                 case ECONNRESET:
1130                         break;
1131
1132                 default:
1133                         bad_error("restarter_instance_update_states", err);
1134                 }
1135
1136                 if (cause == RSTOP_ERR_EXIT) {
1137                         /*
1138                          * The RSTOP_ERR_EXIT cause is set via the
1139                          * wait_thread -> wait_remove code path when we have
1140                          * a "wait" style svc that exited with an error. If
1141                          * the svc is failing too quickly, we throttle it so
1142                          * that we don't restart it more than once/second.
1143                          * Since we know we're running in the wait thread its
1144                          * ok to throttle it right here.
1145                          */
1146                         (void) update_fault_count(inst, FAULT_COUNT_INCR);
1147                         if (method_rate_critical(inst)) {
1148                                 log_instance(inst, B_TRUE, "Failing too "
1149                                     "quickly, throttling.");
1150                                 (void) sleep(WT_SVC_ERR_THROTTLE);
1151                         }
1152                 } else {
1153                         (void) update_fault_count(inst, FAULT_COUNT_RESET);
1154                         reset_start_times(inst);
1155                 }
1156
1157                 if (inst->ri_i.i_primary_ctid != 0) {
1158                         inst->ri_m_inst =
1159                             safe_scf_instance_create(local_handle);
1160                         inst->ri_mi_deleted = B_FALSE;
1161
1162                         libscf_reget_instance(inst);
1163                         method_remove_contract(inst, B_TRUE, B_TRUE);
1164
1165                         scf_instance_destroy(inst->ri_m_inst);
1166                         inst->ri_m_inst = NULL;
1167                 }
1168
1169                 switch (err = restarter_instance_update_states(local_handle,
1170                     inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1171                     reason)) {
1172                 case 0:
1173                 case ECONNRESET:
1174                         break;
1175
1176                 default:
1177                         bad_error("restarter_instance_update_states", err);
1178                 }
1179
1180                 if (cause != RSTOP_ERR_CFG)
1181                         return (0);
1182         } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1183                 /*
1184                  * Stopping a wait service through means other than the pid
1185                  * exiting should keep wait_thread() from restarting the
1186                  * service, by removing it from the wait list.
1187                  * We cannot remove it right now otherwise the process will
1188                  * end up <defunct> so mark it to be ignored.
1189                  */
1190                 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1191         }
1192
1193         /*
1194          * There are some configuration errors which we cannot detect until we
1195          * try to run the method.  For example, see exec_method() where the
1196          * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1197          * in several cases. If this happens for a "wait-style" svc,
1198          * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1199          * the configuration error and go into maintenance, even though it is
1200          * a "wait-style" svc.
1201          */
1202         if (cause == RSTOP_ERR_CFG)
1203                 new_state = RESTARTER_STATE_MAINT;
1204         else
1205                 new_state = inst->ri_i.i_enabled ?
1206                     RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1207
1208         switch (err = restarter_instance_update_states(local_handle, inst,
1209             inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1210         case 0:
1211         case ECONNRESET:
1212                 break;
1213
1214         default:
1215                 bad_error("restarter_instance_update_states", err);
1216         }
1217
1218         info = startd_zalloc(sizeof (fork_info_t));
1219
1220         info->sf_id = inst->ri_id;
1221         info->sf_method_type = METHOD_STOP;
1222         info->sf_event_type = re;
1223         info->sf_reason = reason;
1224         inst->ri_method_thread = startd_thread_create(method_thread, info);
1225
1226         return (0);
1227 }
1228
1229 /*
1230  * Returns
1231  *   ENOENT - fmri is not in instance_list
1232  *   0 - success
1233  *   ECONNRESET - success, though handle was rebound
1234  *   -1 - instance is in transition
1235  */
1236 int
1237 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1238 {
1239         restarter_inst_t *rip;
1240         int r;
1241
1242         rip = inst_lookup_by_name(fmri);
1243         if (rip == NULL)
1244                 return (ENOENT);
1245
1246         r = stop_instance(h, rip, flags);
1247
1248         MUTEX_UNLOCK(&rip->ri_lock);
1249
1250         return (r);
1251 }
1252
1253 static void
1254 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1255     unmaint_cause_t cause)
1256 {
1257         ctid_t ctid;
1258         scf_instance_t *inst;
1259         int r;
1260         uint_t tries = 0, msecs = ALLOC_DELAY;
1261         const char *cp;
1262         restarter_str_t reason;
1263
1264         assert(MUTEX_HELD(&rip->ri_lock));
1265
1266         if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1267                 log_error(LOG_DEBUG, "Restarter: "
1268                     "Ignoring maintenance off command because %s is not in the "
1269                     "maintenance state.\n", rip->ri_i.i_fmri);
1270                 return;
1271         }
1272
1273         switch (cause) {
1274         case RUNMAINT_CLEAR:
1275                 cp = "clear requested";
1276                 reason = restarter_str_clear_request;
1277                 break;
1278         case RUNMAINT_DISABLE:
1279                 cp = "disable requested";
1280                 reason = restarter_str_disable_request;
1281                 break;
1282         default:
1283 #ifndef NDEBUG
1284                 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1285                     cause, __FILE__, __LINE__);
1286 #endif
1287                 abort();
1288         }
1289
1290         log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1291             cp);
1292         log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1293             "%s.\n", rip->ri_i.i_fmri, cp);
1294
1295         (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1296             RESTARTER_STATE_NONE, RERR_RESTART, reason);
1297
1298         /*
1299          * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1300          * a primary contract.
1301          */
1302         if (rip->ri_i.i_primary_ctid == 0)
1303                 return;
1304
1305         ctid = rip->ri_i.i_primary_ctid;
1306         contract_abandon(ctid);
1307         rip->ri_i.i_primary_ctid = 0;
1308
1309 rep_retry:
1310         switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1311         case 0:
1312                 break;
1313
1314         case ECONNABORTED:
1315                 libscf_handle_rebind(h);
1316                 goto rep_retry;
1317
1318         case ENOENT:
1319                 /* Must have been deleted. */
1320                 return;
1321
1322         case EINVAL:
1323         case ENOTSUP:
1324         default:
1325                 bad_error("libscf_handle_rebind", r);
1326         }
1327
1328 again:
1329         r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1330         switch (r) {
1331         case 0:
1332                 break;
1333
1334         case ENOMEM:
1335                 ++tries;
1336                 if (tries < ALLOC_RETRY) {
1337                         (void) poll(NULL, 0, msecs);
1338                         msecs *= ALLOC_DELAY_MULT;
1339                         goto again;
1340                 }
1341
1342                 uu_die("Insufficient memory.\n");
1343                 /* NOTREACHED */
1344
1345         case ECONNABORTED:
1346                 scf_instance_destroy(inst);
1347                 libscf_handle_rebind(h);
1348                 goto rep_retry;
1349
1350         case ECANCELED:
1351                 break;
1352
1353         case EPERM:
1354         case EACCES:
1355         case EROFS:
1356                 log_error(LOG_INFO,
1357                     "Could not remove contract id %lu for %s (%s).\n", ctid,
1358                     rip->ri_i.i_fmri, strerror(r));
1359                 break;
1360
1361         case EINVAL:
1362         case EBADF:
1363         default:
1364                 bad_error("restarter_remove_contract", r);
1365         }
1366
1367         scf_instance_destroy(inst);
1368 }
1369
1370 /*
1371  * enable_inst()
1372  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1373  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1374  *   disabled, move it to offline.  If the event is _DISABLE or
1375  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1376  *
1377  *   Returns
1378  *     0 - success
1379  *     ECONNRESET - h was rebound
1380  */
1381 static int
1382 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1383     restarter_instance_qentry_t *riq)
1384 {
1385         restarter_instance_state_t state;
1386         restarter_event_type_t e = riq->riq_type;
1387         restarter_str_t reason = restarter_str_per_configuration;
1388         int r;
1389
1390         assert(MUTEX_HELD(&inst->ri_lock));
1391         assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1392             e == RESTARTER_EVENT_TYPE_DISABLE ||
1393             e == RESTARTER_EVENT_TYPE_ENABLE);
1394         assert(instance_in_transition(inst) == 0);
1395
1396         state = inst->ri_i.i_state;
1397
1398         if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1399                 inst->ri_i.i_enabled = 1;
1400
1401                 if (state == RESTARTER_STATE_UNINIT ||
1402                     state == RESTARTER_STATE_DISABLED) {
1403                         /*
1404                          * B_FALSE: Don't log an error if the log_instance()
1405                          * fails because it will fail on the miniroot before
1406                          * install-discovery runs.
1407                          */
1408                         log_instance(inst, B_FALSE, "Enabled.");
1409                         log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1410                             inst->ri_i.i_fmri);
1411
1412                         /*
1413                          * If we are coming from DISABLED, it was obviously an
1414                          * enable request. If we are coming from UNINIT, it may
1415                          * have been a sevice in MAINT that was cleared.
1416                          */
1417                         if (riq->riq_reason == restarter_str_clear_request)
1418                                 reason = restarter_str_clear_request;
1419                         else if (state == RESTARTER_STATE_DISABLED)
1420                                 reason = restarter_str_enable_request;
1421                         (void) restarter_instance_update_states(h, inst,
1422                             RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1423                             RERR_NONE, reason);
1424                 } else {
1425                         log_framework(LOG_DEBUG, "Restarter: "
1426                             "Not changing state of %s for enable command.\n",
1427                             inst->ri_i.i_fmri);
1428                 }
1429         } else {
1430                 inst->ri_i.i_enabled = 0;
1431
1432                 switch (state) {
1433                 case RESTARTER_STATE_ONLINE:
1434                 case RESTARTER_STATE_DEGRADED:
1435                         r = stop_instance(h, inst, RSTOP_DISABLE);
1436                         return (r == ECONNRESET ? 0 : r);
1437
1438                 case RESTARTER_STATE_OFFLINE:
1439                 case RESTARTER_STATE_UNINIT:
1440                         if (inst->ri_i.i_primary_ctid != 0) {
1441                                 inst->ri_m_inst = safe_scf_instance_create(h);
1442                                 inst->ri_mi_deleted = B_FALSE;
1443
1444                                 libscf_reget_instance(inst);
1445                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1446
1447                                 scf_instance_destroy(inst->ri_m_inst);
1448                         }
1449                         /* B_FALSE: See log_instance(..., "Enabled."); above */
1450                         log_instance(inst, B_FALSE, "Disabled.");
1451                         log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1452                             inst->ri_i.i_fmri);
1453
1454                         /*
1455                          * If we are coming from OFFLINE, it was obviously a
1456                          * disable request. But if we are coming from
1457                          * UNINIT, it may have been a disable request for a
1458                          * service in MAINT.
1459                          */
1460                         if (riq->riq_reason == restarter_str_disable_request ||
1461                             state == RESTARTER_STATE_OFFLINE)
1462                                 reason = restarter_str_disable_request;
1463                         (void) restarter_instance_update_states(h, inst,
1464                             RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1465                             RERR_RESTART, reason);
1466                         return (0);
1467
1468                 case RESTARTER_STATE_DISABLED:
1469                         break;
1470
1471                 case RESTARTER_STATE_MAINT:
1472                         /*
1473                          * We only want to pull the instance out of maintenance
1474                          * if the disable is on adminstrative request.  The
1475                          * graph engine sends _DISABLE events whenever a
1476                          * service isn't in the disabled state, and we don't
1477                          * want to pull the service out of maintenance if,
1478                          * for example, it is there due to a dependency cycle.
1479                          */
1480                         if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1481                                 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1482                         break;
1483
1484                 default:
1485 #ifndef NDEBUG
1486                         (void) fprintf(stderr, "Restarter instance %s has "
1487                             "unknown state %d.\n", inst->ri_i.i_fmri, state);
1488 #endif
1489                         abort();
1490                 }
1491         }
1492
1493         return (0);
1494 }
1495
1496 static void
1497 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1498     int32_t reason)
1499 {
1500         fork_info_t *info;
1501         restarter_str_t new_reason;
1502
1503         assert(MUTEX_HELD(&inst->ri_lock));
1504         assert(instance_in_transition(inst) == 0);
1505         assert(inst->ri_method_thread == 0);
1506
1507         log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1508             inst->ri_i.i_fmri);
1509
1510         /*
1511          * We want to keep the original reason for restarts and clear actions
1512          */
1513         switch (reason) {
1514         case restarter_str_restart_request:
1515         case restarter_str_clear_request:
1516                 new_reason = reason;
1517                 break;
1518         default:
1519                 new_reason = restarter_str_dependencies_satisfied;
1520         }
1521
1522         /* Services in the disabled and maintenance state are ignored */
1523         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1524             inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1525             inst->ri_i.i_enabled == 0) {
1526                 log_framework(LOG_DEBUG,
1527                     "%s: start_instance -> is maint/disabled\n",
1528                     inst->ri_i.i_fmri);
1529                 return;
1530         }
1531
1532         /* Already started instances are left alone */
1533         if (instance_started(inst) == 1) {
1534                 log_framework(LOG_DEBUG,
1535                     "%s: start_instance -> is already started\n",
1536                     inst->ri_i.i_fmri);
1537                 return;
1538         }
1539
1540         log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1541
1542         (void) restarter_instance_update_states(local_handle, inst,
1543             inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1544
1545         info = startd_zalloc(sizeof (fork_info_t));
1546
1547         info->sf_id = inst->ri_id;
1548         info->sf_method_type = METHOD_START;
1549         info->sf_event_type = RERR_NONE;
1550         info->sf_reason = new_reason;
1551         inst->ri_method_thread = startd_thread_create(method_thread, info);
1552 }
1553
1554 static int
1555 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1556 {
1557         scf_instance_t *inst;
1558         int ret = 0;
1559
1560         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1561                 return (-1);
1562
1563         ret = restarter_inst_ractions_from_tty(inst);
1564
1565         scf_instance_destroy(inst);
1566         return (ret);
1567 }
1568
1569 static boolean_t
1570 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1571 {
1572         scf_instance_t *inst;
1573         boolean_t ret = B_FALSE;
1574
1575         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1576                 return (-1);
1577
1578         if (restarter_inst_dump(inst) == 1)
1579                 ret = B_TRUE;
1580
1581         scf_instance_destroy(inst);
1582         return (ret);
1583 }
1584
1585 static void
1586 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1587     restarter_str_t reason)
1588 {
1589         fork_info_t *info;
1590         scf_instance_t *scf_inst = NULL;
1591
1592         assert(MUTEX_HELD(&rip->ri_lock));
1593         assert(reason != restarter_str_none);
1594         assert(rip->ri_method_thread == 0);
1595
1596         log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1597             restarter_get_str_short(reason));
1598         log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1599             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1600
1601         /* Services in the maintenance state are ignored */
1602         if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1603                 log_framework(LOG_DEBUG,
1604                     "%s: maintain_instance -> is already in maintenance\n",
1605                     rip->ri_i.i_fmri);
1606                 return;
1607         }
1608
1609         /*
1610          * If reason state is restarter_str_service_request and
1611          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1612          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1613          */
1614         if (reason == restarter_str_service_request &&
1615             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1616                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1617                         if (restarter_inst_set_aux_fmri(scf_inst))
1618                                 log_framework(LOG_DEBUG, "%s: "
1619                                     "restarter_inst_set_aux_fmri failed: ",
1620                                     rip->ri_i.i_fmri);
1621                 } else {
1622                         log_framework(LOG_DEBUG, "%s: "
1623                             "restarter_inst_validate_ractions_aux_fmri "
1624                             "failed: ", rip->ri_i.i_fmri);
1625
1626                         if (restarter_inst_reset_aux_fmri(scf_inst))
1627                                 log_framework(LOG_DEBUG, "%s: "
1628                                     "restarter_inst_reset_aux_fmri failed: ",
1629                                     rip->ri_i.i_fmri);
1630                 }
1631                 scf_instance_destroy(scf_inst);
1632         }
1633
1634         if (immediate || !instance_started(rip)) {
1635                 if (rip->ri_i.i_primary_ctid != 0) {
1636                         rip->ri_m_inst = safe_scf_instance_create(h);
1637                         rip->ri_mi_deleted = B_FALSE;
1638
1639                         libscf_reget_instance(rip);
1640                         method_remove_contract(rip, B_TRUE, B_TRUE);
1641
1642                         scf_instance_destroy(rip->ri_m_inst);
1643                 }
1644
1645                 (void) restarter_instance_update_states(h, rip,
1646                     RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1647                     reason);
1648                 return;
1649         }
1650
1651         (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1652             RESTARTER_STATE_MAINT, RERR_NONE, reason);
1653
1654         log_transition(rip, MAINT_REQUESTED);
1655
1656         info = startd_zalloc(sizeof (*info));
1657         info->sf_id = rip->ri_id;
1658         info->sf_method_type = METHOD_STOP;
1659         info->sf_event_type = RERR_RESTART;
1660         info->sf_reason = reason;
1661         rip->ri_method_thread = startd_thread_create(method_thread, info);
1662 }
1663
1664 static void
1665 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1666 {
1667         scf_instance_t *inst;
1668         scf_snapshot_t *snap;
1669         fork_info_t *info;
1670         int r;
1671
1672         assert(MUTEX_HELD(&rip->ri_lock));
1673
1674         log_instance(rip, B_TRUE, "Rereading configuration.");
1675         log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1676             rip->ri_i.i_fmri);
1677
1678 rep_retry:
1679         r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1680         switch (r) {
1681         case 0:
1682                 break;
1683
1684         case ECONNABORTED:
1685                 libscf_handle_rebind(h);
1686                 goto rep_retry;
1687
1688         case ENOENT:
1689                 /* Must have been deleted. */
1690                 return;
1691
1692         case EINVAL:
1693         case ENOTSUP:
1694         default:
1695                 bad_error("libscf_fmri_get_instance", r);
1696         }
1697
1698         snap = libscf_get_running_snapshot(inst);
1699
1700         r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1701             &rip->ri_utmpx_prefix);
1702         switch (r) {
1703         case 0:
1704                 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1705                     rip->ri_i.i_fmri, service_style(rip->ri_flags));
1706                 break;
1707
1708         case ECONNABORTED:
1709                 scf_instance_destroy(inst);
1710                 scf_snapshot_destroy(snap);
1711                 libscf_handle_rebind(h);
1712                 goto rep_retry;
1713
1714         case ECANCELED:
1715         case ENOENT:
1716                 /* Succeed in anticipation of REMOVE_INSTANCE. */
1717                 break;
1718
1719         default:
1720                 bad_error("libscf_get_startd_properties", r);
1721         }
1722
1723         if (instance_started(rip)) {
1724                 /* Refresh does not change the state. */
1725                 (void) restarter_instance_update_states(h, rip,
1726                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1727                     restarter_str_refresh);
1728
1729                 info = startd_zalloc(sizeof (*info));
1730                 info->sf_id = rip->ri_id;
1731                 info->sf_method_type = METHOD_REFRESH;
1732                 info->sf_event_type = RERR_REFRESH;
1733                 info->sf_reason = 0;
1734
1735                 assert(rip->ri_method_thread == 0);
1736                 rip->ri_method_thread =
1737                     startd_thread_create(method_thread, info);
1738         }
1739
1740         scf_snapshot_destroy(snap);
1741         scf_instance_destroy(inst);
1742 }
1743
1744 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1745         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1746         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1747         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1748         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1749 };
1750
1751 /*
1752  * void *restarter_process_events()
1753  *
1754  *   Called in a separate thread to process the events on an instance's
1755  *   queue.  Empties the queue completely, and tries to keep the thread
1756  *   around for a little while after the queue is empty to save on
1757  *   startup costs.
1758  */
1759 static void *
1760 restarter_process_events(void *arg)
1761 {
1762         scf_handle_t *h;
1763         restarter_instance_qentry_t *event;
1764         restarter_inst_t *rip;
1765         char *fmri = (char *)arg;
1766         struct timespec to;
1767
1768         assert(fmri != NULL);
1769
1770         h = libscf_handle_create_bound_loop();
1771
1772         /* grab the queue lock */
1773         rip = inst_lookup_queue(fmri);
1774         if (rip == NULL)
1775                 goto out;
1776
1777 again:
1778
1779         while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1780                 restarter_inst_t *inst;
1781
1782                 /* drop the queue lock */
1783                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1784
1785                 /*
1786                  * Grab the inst lock -- this waits until any outstanding
1787                  * method finishes running.
1788                  */
1789                 inst = inst_lookup_by_name(fmri);
1790                 if (inst == NULL) {
1791                         /* Getting deleted in the middle isn't an error. */
1792                         goto cont;
1793                 }
1794
1795                 assert(instance_in_transition(inst) == 0);
1796
1797                 /* process the event */
1798                 switch (event->riq_type) {
1799                 case RESTARTER_EVENT_TYPE_ENABLE:
1800                 case RESTARTER_EVENT_TYPE_DISABLE:
1801                         (void) enable_inst(h, inst, event);
1802                         break;
1803
1804                 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1805                         if (enable_inst(h, inst, event) == 0)
1806                                 reset_start_times(inst);
1807                         break;
1808
1809                 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1810                         restarter_delete_inst(inst);
1811                         inst = NULL;
1812                         goto cont;
1813
1814                 case RESTARTER_EVENT_TYPE_STOP_RESET:
1815                         reset_start_times(inst);
1816                         /* FALLTHROUGH */
1817                 case RESTARTER_EVENT_TYPE_STOP:
1818                         (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1819                         break;
1820
1821                 case RESTARTER_EVENT_TYPE_START:
1822                         start_instance(h, inst, event->riq_reason);
1823                         break;
1824
1825                 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1826                         maintain_instance(h, inst, 0,
1827                             restarter_str_dependency_cycle);
1828                         break;
1829
1830                 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1831                         maintain_instance(h, inst, 0,
1832                             restarter_str_invalid_dependency);
1833                         break;
1834
1835                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1836                         if (event_from_tty(h, inst) == 0)
1837                                 maintain_instance(h, inst, 0,
1838                                     restarter_str_service_request);
1839                         else
1840                                 maintain_instance(h, inst, 0,
1841                                     restarter_str_administrative_request);
1842                         break;
1843
1844                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1845                         if (event_from_tty(h, inst) == 0)
1846                                 maintain_instance(h, inst, 1,
1847                                     restarter_str_service_request);
1848                         else
1849                                 maintain_instance(h, inst, 1,
1850                                     restarter_str_administrative_request);
1851                         break;
1852
1853                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1854                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1855                         reset_start_times(inst);
1856                         break;
1857
1858                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1859                         refresh_instance(h, inst);
1860                         break;
1861
1862                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1863                         log_framework(LOG_WARNING, "Restarter: "
1864                             "%s command (for %s) unimplemented.\n",
1865                             event_names[event->riq_type], inst->ri_i.i_fmri);
1866                         break;
1867
1868                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1869                         if (!instance_started(inst)) {
1870                                 log_framework(LOG_DEBUG, "Restarter: "
1871                                     "Not restarting %s; not running.\n",
1872                                     inst->ri_i.i_fmri);
1873                         } else {
1874                                 /*
1875                                  * Stop the instance.  If it can be restarted,
1876                                  * the graph engine will send a new event.
1877                                  */
1878                                 if (restart_dump(h, inst)) {
1879                                         (void) contract_kill(
1880                                             inst->ri_i.i_primary_ctid, SIGABRT,
1881                                             inst->ri_i.i_fmri);
1882                                 } else if (stop_instance(h, inst,
1883                                     RSTOP_RESTART) == 0) {
1884                                         reset_start_times(inst);
1885                                 }
1886                         }
1887                         break;
1888
1889                 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1890                 default:
1891 #ifndef NDEBUG
1892                         uu_warn("%s:%d: Bad restarter event %d.  "
1893                             "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1894 #endif
1895                         abort();
1896                 }
1897
1898                 assert(inst != NULL);
1899                 MUTEX_UNLOCK(&inst->ri_lock);
1900
1901 cont:
1902                 /* grab the queue lock */
1903                 rip = inst_lookup_queue(fmri);
1904                 if (rip == NULL)
1905                         goto out;
1906
1907                 /* delete the event */
1908                 uu_list_remove(rip->ri_queue, event);
1909                 startd_free(event, sizeof (restarter_instance_qentry_t));
1910         }
1911
1912         assert(rip != NULL);
1913
1914         /*
1915          * Try to preserve the thread for a little while for future use.
1916          */
1917         to.tv_sec = 3;
1918         to.tv_nsec = 0;
1919         (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1920             &rip->ri_queue_lock, &to);
1921
1922         if (uu_list_first(rip->ri_queue) != NULL)
1923                 goto again;
1924
1925         rip->ri_queue_thread = 0;
1926         MUTEX_UNLOCK(&rip->ri_queue_lock);
1927
1928 out:
1929         (void) scf_handle_unbind(h);
1930         scf_handle_destroy(h);
1931         free(fmri);
1932         return (NULL);
1933 }
1934
1935 static int
1936 is_admin_event(restarter_event_type_t t) {
1937
1938         switch (t) {
1939         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1940         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1941         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1942         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1943         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1944         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1945                 return (1);
1946         default:
1947                 return (0);
1948         }
1949 }
1950
1951 static void
1952 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1953 {
1954         restarter_instance_qentry_t *qe;
1955         int r;
1956
1957         assert(MUTEX_HELD(&ri->ri_queue_lock));
1958         assert(!MUTEX_HELD(&ri->ri_lock));
1959
1960         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1961         qe->riq_type = e->rpe_type;
1962         qe->riq_reason = e->rpe_reason;
1963
1964         uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1965         r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1966         assert(r == 0);
1967 }
1968
1969 /*
1970  * void *restarter_event_thread()
1971  *
1972  *  Handle incoming graph events by placing them on a per-instance
1973  *  queue.  We can't lock the main part of the instance structure, so
1974  *  just modify the seprarately locked event queue portion.
1975  */
1976 /*ARGSUSED*/
1977 static void *
1978 restarter_event_thread(void *unused)
1979 {
1980         scf_handle_t *h;
1981
1982         /*
1983          * This is a new thread, and thus, gets its own handle
1984          * to the repository.
1985          */
1986         h = libscf_handle_create_bound_loop();
1987
1988         MUTEX_LOCK(&ru->restarter_update_lock);
1989
1990         /*CONSTCOND*/
1991         while (1) {
1992                 restarter_protocol_event_t *e;
1993
1994                 while (ru->restarter_update_wakeup == 0)
1995                         (void) pthread_cond_wait(&ru->restarter_update_cv,
1996                             &ru->restarter_update_lock);
1997
1998                 ru->restarter_update_wakeup = 0;
1999
2000                 while ((e = restarter_event_dequeue()) != NULL) {
2001                         restarter_inst_t *rip;
2002                         char *fmri;
2003
2004                         MUTEX_UNLOCK(&ru->restarter_update_lock);
2005
2006                         /*
2007                          * ADD_INSTANCE is special: there's likely no
2008                          * instance structure yet, so we need to handle the
2009                          * addition synchronously.
2010                          */
2011                         switch (e->rpe_type) {
2012                         case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2013                                 if (restarter_insert_inst(h, e->rpe_inst) != 0)
2014                                         log_error(LOG_INFO, "Restarter: "
2015                                             "Could not add %s.\n", e->rpe_inst);
2016
2017                                 MUTEX_LOCK(&st->st_load_lock);
2018                                 if (--st->st_load_instances == 0)
2019                                         (void) pthread_cond_broadcast(
2020                                             &st->st_load_cv);
2021                                 MUTEX_UNLOCK(&st->st_load_lock);
2022
2023                                 goto nolookup;
2024                         }
2025
2026                         /*
2027                          * Lookup the instance, locking only the event queue.
2028                          * Can't grab ri_lock here because it might be held
2029                          * by a long-running method.
2030                          */
2031                         rip = inst_lookup_queue(e->rpe_inst);
2032                         if (rip == NULL) {
2033                                 log_error(LOG_INFO, "Restarter: "
2034                                     "Ignoring %s command for unknown service "
2035                                     "%s.\n", event_names[e->rpe_type],
2036                                     e->rpe_inst);
2037                                 goto nolookup;
2038                         }
2039
2040                         /* Keep ADMIN events from filling up the queue. */
2041                         if (is_admin_event(e->rpe_type) &&
2042                             uu_list_numnodes(rip->ri_queue) >
2043                             RINST_QUEUE_THRESHOLD) {
2044                                 MUTEX_UNLOCK(&rip->ri_queue_lock);
2045                                 log_instance(rip, B_TRUE, "Instance event "
2046                                     "queue overflow.  Dropping administrative "
2047                                     "request.");
2048                                 log_framework(LOG_DEBUG, "%s: Instance event "
2049                                     "queue overflow.  Dropping administrative "
2050                                     "request.\n", rip->ri_i.i_fmri);
2051                                 goto nolookup;
2052                         }
2053
2054                         /* Now add the event to the instance queue. */
2055                         restarter_queue_event(rip, e);
2056
2057                         if (rip->ri_queue_thread == 0) {
2058                                 /*
2059                                  * Start a thread if one isn't already
2060                                  * running.
2061                                  */
2062                                 fmri = safe_strdup(e->rpe_inst);
2063                                 rip->ri_queue_thread =  startd_thread_create(
2064                                     restarter_process_events, (void *)fmri);
2065                         } else {
2066                                 /*
2067                                  * Signal the existing thread that there's
2068                                  * a new event.
2069                                  */
2070                                 (void) pthread_cond_broadcast(
2071                                     &rip->ri_queue_cv);
2072                         }
2073
2074                         MUTEX_UNLOCK(&rip->ri_queue_lock);
2075 nolookup:
2076                         restarter_event_release(e);
2077
2078                         MUTEX_LOCK(&ru->restarter_update_lock);
2079                 }
2080         }
2081
2082         /*
2083          * Unreachable for now -- there's currently no graceful cleanup
2084          * called on exit().
2085          */
2086         (void) scf_handle_unbind(h);
2087         scf_handle_destroy(h);
2088         return (NULL);
2089 }
2090
2091 static restarter_inst_t *
2092 contract_to_inst(ctid_t ctid)
2093 {
2094         restarter_inst_t *inst;
2095         int id;
2096
2097         id = lookup_inst_by_contract(ctid);
2098         if (id == -1)
2099                 return (NULL);
2100
2101         inst = inst_lookup_by_id(id);
2102         if (inst != NULL) {
2103                 /*
2104                  * Since ri_lock isn't held by the contract id lookup, this
2105                  * instance may have been restarted and now be in a new
2106                  * contract, making the old contract no longer valid for this
2107                  * instance.
2108                  */
2109                 if (ctid != inst->ri_i.i_primary_ctid) {
2110                         MUTEX_UNLOCK(&inst->ri_lock);
2111                         inst = NULL;
2112                 }
2113         }
2114         return (inst);
2115 }
2116
2117 /*
2118  * void contract_action()
2119  *   Take action on contract events.
2120  */
2121 static void
2122 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2123     uint32_t type)
2124 {
2125         const char *fmri = inst->ri_i.i_fmri;
2126
2127         assert(MUTEX_HELD(&inst->ri_lock));
2128
2129         /*
2130          * If startd has stopped this contract, there is no need to
2131          * stop it again.
2132          */
2133         if (inst->ri_i.i_primary_ctid > 0 &&
2134             inst->ri_i.i_primary_ctid_stopped)
2135                 return;
2136
2137         if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2138             | CT_PR_EV_HWERR)) == 0) {
2139                 /*
2140                  * There shouldn't be other events, since that's not how we set
2141                  * the terms. Thus, just log an error and drive on.
2142                  */
2143                 log_framework(LOG_NOTICE,
2144                     "%s: contract %ld received unexpected critical event "
2145                     "(%d)\n", fmri, id, type);
2146                 return;
2147         }
2148
2149         assert(instance_in_transition(inst) == 0);
2150
2151         if (instance_is_wait_style(inst)) {
2152                 /*
2153                  * We ignore all events; if they impact the
2154                  * process we're monitoring, then the
2155                  * wait_thread will stop the instance.
2156                  */
2157                 log_framework(LOG_DEBUG,
2158                     "%s: ignoring contract event on wait-style service\n",
2159                     fmri);
2160         } else {
2161                 /*
2162                  * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2163                  */
2164                 switch (type) {
2165                 case CT_PR_EV_EMPTY:
2166                         (void) stop_instance(h, inst, RSTOP_EXIT);
2167                         break;
2168                 case CT_PR_EV_CORE:
2169                         (void) stop_instance(h, inst, RSTOP_CORE);
2170                         break;
2171                 case CT_PR_EV_SIGNAL:
2172                         (void) stop_instance(h, inst, RSTOP_SIGNAL);
2173                         break;
2174                 case CT_PR_EV_HWERR:
2175                         (void) stop_instance(h, inst, RSTOP_HWERR);
2176                         break;
2177                 }
2178         }
2179 }
2180
2181 /*
2182  * void *restarter_contract_event_thread(void *)
2183  *   Listens to the process contract bundle for critical events, taking action
2184  *   on events from contracts we know we are responsible for.
2185  */
2186 /*ARGSUSED*/
2187 static void *
2188 restarter_contracts_event_thread(void *unused)
2189 {
2190         int fd, err;
2191         scf_handle_t *local_handle;
2192
2193         /*
2194          * Await graph load completion.  That is, stop here, until we've scanned
2195          * the repository for contract - instance associations.
2196          */
2197         MUTEX_LOCK(&st->st_load_lock);
2198         while (!(st->st_load_complete && st->st_load_instances == 0))
2199                 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2200         MUTEX_UNLOCK(&st->st_load_lock);
2201
2202         /*
2203          * This is a new thread, and thus, gets its own handle
2204          * to the repository.
2205          */
2206         if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2207                 uu_die("Unable to bind a new repository handle: %s\n",
2208                     scf_strerror(scf_error()));
2209
2210         fd = open(CTFS_ROOT "/process/pbundle", O_RDONLY);
2211         if (fd == -1)
2212                 uu_die("process bundle open failed");
2213
2214         /*
2215          * Make sure we get all events (including those generated by configd
2216          * before this thread was started).
2217          */
2218         err = ct_event_reset(fd);
2219         assert(err == 0);
2220
2221         for (;;) {
2222                 int efd, sfd;
2223                 ct_evthdl_t ev;
2224                 uint32_t type;
2225                 ctevid_t evid;
2226                 ct_stathdl_t status;
2227                 ctid_t ctid;
2228                 restarter_inst_t *inst;
2229                 uint64_t cookie;
2230
2231                 if (err = ct_event_read_critical(fd, &ev)) {
2232                         log_error(LOG_WARNING,
2233                             "Error reading next contract event: %s",
2234                             strerror(err));
2235                         continue;
2236                 }
2237
2238                 evid = ct_event_get_evid(ev);
2239                 ctid = ct_event_get_ctid(ev);
2240                 type = ct_event_get_type(ev);
2241
2242                 /* Fetch cookie. */
2243                 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2244                     < 0) {
2245                         ct_event_free(ev);
2246                         continue;
2247                 }
2248
2249                 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2250                         log_framework(LOG_WARNING, "Could not get status for "
2251                             "contract %ld: %s\n", ctid, strerror(err));
2252
2253                         startd_close(sfd);
2254                         ct_event_free(ev);
2255                         continue;
2256                 }
2257
2258                 cookie = ct_status_get_cookie(status);
2259
2260                 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2261                     "cookie %lld\n", type, ctid, cookie);
2262
2263                 ct_status_free(status);
2264
2265                 startd_close(sfd);
2266
2267                 /*
2268                  * svc.configd(8) restart handling performed by the
2269                  * fork_configd_thread.  We don't acknowledge, as that thread
2270                  * will do so.
2271                  */
2272                 if (cookie == CONFIGD_COOKIE) {
2273                         ct_event_free(ev);
2274                         continue;
2275                 }
2276
2277                 inst = NULL;
2278                 if (storing_contract != 0 &&
2279                     (inst = contract_to_inst(ctid)) == NULL) {
2280                         /*
2281                          * This can happen for two reasons:
2282                          * - method_run() has not yet stored the
2283                          *    the contract into the internal hash table.
2284                          * - we receive an EMPTY event for an abandoned
2285                          *    contract.
2286                          * If there is any contract in the process of
2287                          * being stored into the hash table then re-read
2288                          * the event later.
2289                          */
2290                         log_framework(LOG_DEBUG,
2291                             "Reset event %d for unknown "
2292                             "contract id %ld\n", type, ctid);
2293
2294                         /* don't go too fast */
2295                         (void) poll(NULL, 0, 100);
2296
2297                         (void) ct_event_reset(fd);
2298                         ct_event_free(ev);
2299                         continue;
2300                 }
2301
2302                 /*
2303                  * Do not call contract_to_inst() again if first
2304                  * call succeeded.
2305                  */
2306                 if (inst == NULL)
2307                         inst = contract_to_inst(ctid);
2308                 if (inst == NULL) {
2309                         /*
2310                          * This can happen if we receive an EMPTY
2311                          * event for an abandoned contract.
2312                          */
2313                         log_framework(LOG_DEBUG,
2314                             "Received event %d for unknown contract id "
2315                             "%ld\n", type, ctid);
2316                 } else {
2317                         log_framework(LOG_DEBUG,
2318                             "Received event %d for contract id "
2319                             "%ld (%s)\n", type, ctid,
2320                             inst->ri_i.i_fmri);
2321
2322                         contract_action(local_handle, inst, ctid, type);
2323
2324                         MUTEX_UNLOCK(&inst->ri_lock);
2325                 }
2326
2327                 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2328                     O_WRONLY);
2329                 if (efd != -1) {
2330                         (void) ct_ctl_ack(efd, evid);
2331                         startd_close(efd);
2332                 }
2333
2334                 ct_event_free(ev);
2335
2336         }
2337
2338         /*NOTREACHED*/
2339         return (NULL);
2340 }
2341
2342 /*
2343  * Timeout queue, processed by restarter_timeouts_event_thread().
2344  */
2345 timeout_queue_t *timeouts;
2346 static uu_list_pool_t *timeout_pool;
2347
2348 typedef struct timeout_update {
2349         pthread_mutex_t         tu_lock;
2350         pthread_cond_t          tu_cv;
2351         int                     tu_wakeup;
2352 } timeout_update_t;
2353
2354 timeout_update_t *tu;
2355
2356 static const char *timeout_ovr_svcs[] = {
2357         "svc:/system/manifest-import:default",
2358         "svc:/network/initial:default",
2359         "svc:/network/service:default",
2360         "svc:/system/rmtmpfiles:default",
2361         "svc:/network/loopback:default",
2362         "svc:/network/physical:default",
2363         "svc:/system/device/local:default",
2364         "svc:/system/filesystem/usr:default",
2365         "svc:/system/filesystem/minimal:default",
2366         "svc:/system/filesystem/local:default",
2367         NULL
2368 };
2369
2370 int
2371 is_timeout_ovr(restarter_inst_t *inst)
2372 {
2373         int i;
2374
2375         for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2376                 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2377                         log_instance(inst, B_TRUE, "Timeout override by "
2378                             "svc.startd.  Using infinite timeout.");
2379                         return (1);
2380                 }
2381         }
2382
2383         return (0);
2384 }
2385
2386 /*ARGSUSED*/
2387 static int
2388 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2389 {
2390         hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2391         hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2392
2393         if (t1 > t2)
2394                 return (1);
2395         else if (t1 < t2)
2396                 return (-1);
2397         return (0);
2398 }
2399
2400 void
2401 timeout_init()
2402 {
2403         timeouts = startd_zalloc(sizeof (timeout_queue_t));
2404
2405         (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2406
2407         timeout_pool = startd_list_pool_create("timeouts",
2408             sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2409             timeout_compare, UU_LIST_POOL_DEBUG);
2410         assert(timeout_pool != NULL);
2411
2412         timeouts->tq_list = startd_list_create(timeout_pool,
2413             timeouts, UU_LIST_SORTED);
2414         assert(timeouts->tq_list != NULL);
2415
2416         tu = startd_zalloc(sizeof (timeout_update_t));
2417         (void) pthread_cond_init(&tu->tu_cv, NULL);
2418         (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2419 }
2420
2421 void
2422 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2423 {
2424         hrtime_t now, timeout;
2425         timeout_entry_t *entry;
2426         uu_list_index_t idx;
2427
2428         assert(MUTEX_HELD(&inst->ri_lock));
2429
2430         now = gethrtime();
2431
2432         /*
2433          * If we overflow LLONG_MAX, we're never timing out anyways, so
2434          * just return.
2435          */
2436         if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2437                 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2438                     "treating as infinite.");
2439                 return;
2440         }
2441
2442         /* hrtime is in nanoseconds. Convert timeout_sec. */
2443         timeout = now + (timeout_sec * 1000000000LL);
2444
2445         entry = startd_alloc(sizeof (timeout_entry_t));
2446         entry->te_timeout = timeout;
2447         entry->te_ctid = cid;
2448         entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2449         entry->te_logstem = safe_strdup(inst->ri_logstem);
2450         entry->te_fired = 0;
2451         /* Insert the calculated timeout time onto the queue. */
2452         MUTEX_LOCK(&timeouts->tq_lock);
2453         (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2454         uu_list_node_init(entry, &entry->te_link, timeout_pool);
2455         uu_list_insert(timeouts->tq_list, entry, idx);
2456         MUTEX_UNLOCK(&timeouts->tq_lock);
2457
2458         assert(inst->ri_timeout == NULL);
2459         inst->ri_timeout = entry;
2460
2461         MUTEX_LOCK(&tu->tu_lock);
2462         tu->tu_wakeup = 1;
2463         (void) pthread_cond_broadcast(&tu->tu_cv);
2464         MUTEX_UNLOCK(&tu->tu_lock);
2465 }
2466
2467
2468 void
2469 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2470 {
2471         assert(MUTEX_HELD(&inst->ri_lock));
2472
2473         if (inst->ri_timeout == NULL)
2474                 return;
2475
2476         assert(inst->ri_timeout->te_ctid == cid);
2477
2478         MUTEX_LOCK(&timeouts->tq_lock);
2479         uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2480         MUTEX_UNLOCK(&timeouts->tq_lock);
2481
2482         free(inst->ri_timeout->te_fmri);
2483         free(inst->ri_timeout->te_logstem);
2484         startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2485         inst->ri_timeout = NULL;
2486 }
2487
2488 static int
2489 timeout_now()
2490 {
2491         timeout_entry_t *e;
2492         hrtime_t now;
2493         int ret;
2494
2495         now = gethrtime();
2496
2497         /*
2498          * Walk through the (sorted) timeouts list.  While the timeout
2499          * at the head of the list is <= the current time, kill the
2500          * method.
2501          */
2502         MUTEX_LOCK(&timeouts->tq_lock);
2503
2504         for (e = uu_list_first(timeouts->tq_list);
2505             e != NULL && e->te_timeout <= now;
2506             e = uu_list_next(timeouts->tq_list, e)) {
2507                 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2508                     "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2509                 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2510                     "Method or service exit timed out.  Killing contract %ld.",
2511                     e->te_ctid);
2512                 e->te_fired = 1;
2513                 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2514         }
2515
2516         if (uu_list_numnodes(timeouts->tq_list) > 0)
2517                 ret = 0;
2518         else
2519                 ret = -1;
2520
2521         MUTEX_UNLOCK(&timeouts->tq_lock);
2522
2523         return (ret);
2524 }
2525
2526 /*
2527  * void *restarter_timeouts_event_thread(void *)
2528  *   Responsible for monitoring the method timeouts.  This thread must
2529  *   be started before any methods are called.
2530  */
2531 /*ARGSUSED*/
2532 static void *
2533 restarter_timeouts_event_thread(void *unused)
2534 {
2535         /*
2536          * Timeouts are entered on a priority queue, which is processed by
2537          * this thread.  As timeouts are specified in seconds, we'll do
2538          * the necessary processing every second, as long as the queue
2539          * is not empty.
2540          */
2541
2542         /*CONSTCOND*/
2543         while (1) {
2544                 /*
2545                  * As long as the timeout list isn't empty, process it
2546                  * every second.
2547                  */
2548                 if (timeout_now() == 0) {
2549                         (void) sleep(1);
2550                         continue;
2551                 }
2552
2553                 /* The list is empty, wait until we have more timeouts. */
2554                 MUTEX_LOCK(&tu->tu_lock);
2555
2556                 while (tu->tu_wakeup == 0)
2557                         (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2558
2559                 tu->tu_wakeup = 0;
2560                 MUTEX_UNLOCK(&tu->tu_lock);
2561         }
2562
2563         return (NULL);
2564 }
2565
2566 void
2567 restarter_start()
2568 {
2569         (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2570         (void) startd_thread_create(restarter_event_thread, NULL);
2571         (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2572         (void) startd_thread_create(wait_thread, NULL);
2573 }
2574
2575
2576 void
2577 restarter_init()
2578 {
2579         restarter_instance_pool = startd_list_pool_create("restarter_instances",
2580             sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2581             ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2582         (void) memset(&instance_list, 0, sizeof (instance_list));
2583
2584         (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2585         instance_list.ril_instance_list = startd_list_create(
2586             restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2587
2588         restarter_queue_pool = startd_list_pool_create(
2589             "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2590             offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2591             UU_LIST_POOL_DEBUG);
2592
2593         contract_list_pool = startd_list_pool_create(
2594             "contract_list", sizeof (contract_entry_t),
2595             offsetof(contract_entry_t,  ce_link), NULL,
2596             UU_LIST_POOL_DEBUG);
2597         contract_hash_init();
2598
2599         log_framework(LOG_DEBUG, "Initialized restarter\n");
2600 }