usr/src/cmd/svc/startd/restarter.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Joyent, Inc.
  25  */
  26
  27 /*
  28  * restarter.c - service manipulation
  29  *
  30  * This component manages services whose restarter is svc.startd, the standard
  31  * restarter.  It translates restarter protocol events from the graph engine
  32  * into actions on processes, as a delegated restarter would do.
  33  *
  34  * The master restarter manages a number of always-running threads:
  35  *   - restarter event thread: events from the graph engine
  36  *   - timeout thread: thread to fire queued timeouts
  37  *   - contract thread: thread to handle contract events
  38  *   - wait thread: thread to handle wait-based services
  39  *
  40  * The other threads are created as-needed:
  41  *   - per-instance method threads
  42  *   - per-instance event processing threads
  43  *
  44  * The interaction of all threads must result in the following conditions
  45  * being satisfied (on a per-instance basis):
  46  *   - restarter events must be processed in order
  47  *   - method execution must be serialized
  48  *   - instance delete must be held until outstanding methods are complete
  49  *   - contract events shouldn't be processed while a method is running
  50  *   - timeouts should fire even when a method is running
  51  *
  52  * Service instances are represented by restarter_inst_t's and are kept in the
  53  * instance_list list.
  54  *
  55  * Service States
  56  *   The current state of a service instance is kept in
  57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
  58  *   some time, then before we effect the transition we set
  59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
  60  *   rotate i_next_state to i_state and set i_next_state to
  61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
  62  *   held.  The exception is when we launch methods, which are done with
  63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
  64  *   method_thread() does, we set ri_method_thread to the thread id of the
  65  *   method thread, and when it is nonzero any thread with a different thread id
  66  *   waits on ri_method_cv.
  67  *
  68  * Method execution is serialized by blocking on ri_method_cv in
  69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
  70  * also prevents the instance structure from being deleted until all
  71  * outstanding operations such as method_thread() have finished.
  72  *
  73  * Lock ordering:
  74  *
  75  * dgraph_lock [can be held when taking:]
  76  *   utmpx_lock
  77  *   dictionary->dict_lock
  78  *   st->st_load_lock
  79  *   wait_info_lock
  80  *   ru->restarter_update_lock
  81  *     restarter_queue->rpeq_lock
  82  *   instance_list.ril_lock
  83  *     inst->ri_lock
  84  *   st->st_configd_live_lock
  85  *
  86  * instance_list.ril_lock
  87  *   graph_queue->gpeq_lock
  88  *   gu->gu_lock
  89  *   st->st_configd_live_lock
  90  *   dictionary->dict_lock
  91  *   inst->ri_lock
  92  *     graph_queue->gpeq_lock
  93  *     gu->gu_lock
  94  *     tu->tu_lock
  95  *     tq->tq_lock
  96  *     inst->ri_queue_lock
  97  *       wait_info_lock
  98  *       bp->cb_lock
  99  *     utmpx_lock
 100  *
 101  * single_user_thread_lock
 102  *   wait_info_lock
 103  *   utmpx_lock
 104  *
 105  * gu_freeze_lock
 106  *
 107  * logbuf_mutex nests inside pretty much everything.
 108  */
 109
 110 #include <sys/contract/process.h>
 111 #include <sys/ctfs.h>
 112 #include <sys/stat.h>
 113 #include <sys/time.h>
 114 #include <sys/types.h>
 115 #include <sys/uio.h>
 116 #include <sys/wait.h>
 117 #include <assert.h>
 118 #include <errno.h>
 119 #include <fcntl.h>
 120 #include <libcontract.h>
 121 #include <libcontract_priv.h>
 122 #include <libintl.h>
 123 #include <librestart.h>
 124 #include <librestart_priv.h>
 125 #include <libuutil.h>
 126 #include <limits.h>
 127 #include <poll.h>
 128 #include <port.h>
 129 #include <pthread.h>
 130 #include <stdarg.h>
 131 #include <stdio.h>
 132 #include <strings.h>
 133 #include <unistd.h>
 134
 135 #include "startd.h"
 136 #include "protocol.h"
 137
 138 static uu_list_pool_t *restarter_instance_pool;
 139 static restarter_instance_list_t instance_list;
 140
 141 static uu_list_pool_t *restarter_queue_pool;
 142
 143 #define WT_SVC_ERR_THROTTLE     1       /* 1 sec delay for erroring wait svc */
 144
 145 /*
 146  * Function used to reset the restart times for an instance, when
 147  * an administrative task comes along and essentially makes the times
 148  * in this array ineffective.
 149  */
 150 static void
 151 reset_start_times(restarter_inst_t *inst)
 152 {
 153         inst->ri_start_index = 0;
 154         bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
 155 }
 156
 157 /*ARGSUSED*/
 158 static int
 159 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
 160     void *private)
 161 {
 162         int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
 163         int rc_id = *(int *)rc_arg;
 164
 165         if (lc_id > rc_id)
 166                 return (1);
 167         if (lc_id < rc_id)
 168                 return (-1);
 169         return (0);
 170 }
 171
 172 static restarter_inst_t *
 173 inst_lookup_by_name(const char *name)
 174 {
 175         int id;
 176
 177         id = dict_lookup_byname(name);
 178         if (id == -1)
 179                 return (NULL);
 180
 181         return (inst_lookup_by_id(id));
 182 }
 183
 184 restarter_inst_t *
 185 inst_lookup_by_id(int id)
 186 {
 187         restarter_inst_t *inst;
 188
 189         MUTEX_LOCK(&instance_list.ril_lock);
 190         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 191         if (inst != NULL)
 192                 MUTEX_LOCK(&inst->ri_lock);
 193         MUTEX_UNLOCK(&instance_list.ril_lock);
 194
 195         if (inst != NULL) {
 196                 while (inst->ri_method_thread != 0 &&
 197                     !pthread_equal(inst->ri_method_thread, pthread_self())) {
 198                         ++inst->ri_method_waiters;
 199                         (void) pthread_cond_wait(&inst->ri_method_cv,
 200                             &inst->ri_lock);
 201                         assert(inst->ri_method_waiters > 0);
 202                         --inst->ri_method_waiters;
 203                 }
 204         }
 205
 206         return (inst);
 207 }
 208
 209 static restarter_inst_t *
 210 inst_lookup_queue(const char *name)
 211 {
 212         int id;
 213         restarter_inst_t *inst;
 214
 215         id = dict_lookup_byname(name);
 216         if (id == -1)
 217                 return (NULL);
 218
 219         MUTEX_LOCK(&instance_list.ril_lock);
 220         inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 221         if (inst != NULL)
 222                 MUTEX_LOCK(&inst->ri_queue_lock);
 223         MUTEX_UNLOCK(&instance_list.ril_lock);
 224
 225         return (inst);
 226 }
 227
 228 const char *
 229 service_style(int flags)
 230 {
 231         switch (flags & RINST_STYLE_MASK) {
 232         case RINST_CONTRACT:    return ("contract");
 233         case RINST_TRANSIENT:   return ("transient");
 234         case RINST_WAIT:        return ("wait");
 235
 236         default:
 237 #ifndef NDEBUG
 238                 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
 239 #endif
 240                 abort();
 241                 /* NOTREACHED */
 242         }
 243 }
 244
 245 /*
 246  * Fails with ECONNABORTED or ECANCELED.
 247  */
 248 static int
 249 check_contract(restarter_inst_t *inst, boolean_t primary,
 250     scf_instance_t *scf_inst)
 251 {
 252         ctid_t *ctidp;
 253         int fd, r;
 254
 255         ctidp = primary ? &inst->ri_i.i_primary_ctid :
 256             &inst->ri_i.i_transient_ctid;
 257
 258         assert(*ctidp >= 1);
 259
 260         fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
 261         if (fd >= 0) {
 262                 r = close(fd);
 263                 assert(r == 0);
 264                 return (0);
 265         }
 266
 267         r = restarter_remove_contract(scf_inst, *ctidp, primary ?
 268             RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
 269         switch (r) {
 270         case 0:
 271         case ECONNABORTED:
 272         case ECANCELED:
 273                 *ctidp = 0;
 274                 return (r);
 275
 276         case ENOMEM:
 277                 uu_die("Out of memory\n");
 278                 /* NOTREACHED */
 279
 280         case EPERM:
 281                 uu_die("Insufficient privilege.\n");
 282                 /* NOTREACHED */
 283
 284         case EACCES:
 285                 uu_die("Repository backend access denied.\n");
 286                 /* NOTREACHED */
 287
 288         case EROFS:
 289                 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
 290                     "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
 291                 return (0);
 292
 293         case EINVAL:
 294         case EBADF:
 295         default:
 296                 assert(0);
 297                 abort();
 298                 /* NOTREACHED */
 299         }
 300 }
 301
 302 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
 303
 304 /*
 305  * int restarter_insert_inst(scf_handle_t *, char *)
 306  *   If the inst is already in the restarter list, return its id.  If the inst
 307  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
 308  *   states, insert it into the list, and return 0.
 309  *
 310  *   Fails with
 311  *     ENOENT - name is not in the repository
 312  */
 313 static int
 314 restarter_insert_inst(scf_handle_t *h, const char *name)
 315 {
 316         int id, r;
 317         restarter_inst_t *inst;
 318         uu_list_index_t idx;
 319         scf_service_t *scf_svc;
 320         scf_instance_t *scf_inst;
 321         scf_snapshot_t *snap = NULL;
 322         scf_propertygroup_t *pg;
 323         char *svc_name, *inst_name;
 324         char logfilebuf[PATH_MAX];
 325         char *c;
 326         boolean_t do_commit_states;
 327         restarter_instance_state_t state, next_state;
 328         protocol_states_t *ps;
 329         pid_t start_pid;
 330         restarter_str_t reason = restarter_str_insert_in_graph;
 331
 332         MUTEX_LOCK(&instance_list.ril_lock);
 333
 334         /*
 335          * We don't use inst_lookup_by_name() here because we want the lookup
 336          * & insert to be atomic.
 337          */
 338         id = dict_lookup_byname(name);
 339         if (id != -1) {
 340                 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
 341                     &idx);
 342                 if (inst != NULL) {
 343                         MUTEX_UNLOCK(&instance_list.ril_lock);
 344                         return (0);
 345                 }
 346         }
 347
 348         /* Allocate an instance */
 349         inst = startd_zalloc(sizeof (restarter_inst_t));
 350         inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
 351         inst->ri_utmpx_prefix[0] = '\0';
 352
 353         inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
 354         (void) strcpy((char *)inst->ri_i.i_fmri, name);
 355
 356         inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
 357
 358         /*
 359          * id shouldn't be -1 since we use the same dictionary as graph.c, but
 360          * just in case.
 361          */
 362         inst->ri_id = (id != -1 ? id : dict_insert(name));
 363
 364         special_online_hooks_get(name, &inst->ri_pre_online_hook,
 365             &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
 366
 367         scf_svc = safe_scf_service_create(h);
 368         scf_inst = safe_scf_instance_create(h);
 369         pg = safe_scf_pg_create(h);
 370         svc_name = startd_alloc(max_scf_name_size);
 371         inst_name = startd_alloc(max_scf_name_size);
 372
 373 rep_retry:
 374         if (snap != NULL)
 375                 scf_snapshot_destroy(snap);
 376         if (inst->ri_logstem != NULL)
 377                 startd_free(inst->ri_logstem, PATH_MAX);
 378         if (inst->ri_common_name != NULL)
 379                 free(inst->ri_common_name);
 380         if (inst->ri_C_common_name != NULL)
 381                 free(inst->ri_C_common_name);
 382         snap = NULL;
 383         inst->ri_logstem = NULL;
 384         inst->ri_common_name = NULL;
 385         inst->ri_C_common_name = NULL;
 386
 387         if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
 388             NULL, SCF_DECODE_FMRI_EXACT) != 0) {
 389                 switch (scf_error()) {
 390                 case SCF_ERROR_CONNECTION_BROKEN:
 391                         libscf_handle_rebind(h);
 392                         goto rep_retry;
 393
 394                 case SCF_ERROR_NOT_FOUND:
 395                         goto deleted;
 396                 }
 397
 398                 uu_die("Can't decode FMRI %s: %s\n", name,
 399                     scf_strerror(scf_error()));
 400         }
 401
 402         /*
 403          * If there's no running snapshot, then we execute using the editing
 404          * snapshot.  Pending snapshots will be taken later.
 405          */
 406         snap = libscf_get_running_snapshot(scf_inst);
 407
 408         if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
 409             (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
 410             0)) {
 411                 switch (scf_error()) {
 412                 case SCF_ERROR_NOT_SET:
 413                         break;
 414
 415                 case SCF_ERROR_CONNECTION_BROKEN:
 416                         libscf_handle_rebind(h);
 417                         goto rep_retry;
 418
 419                 default:
 420                         assert(0);
 421                         abort();
 422                 }
 423
 424                 goto deleted;
 425         }
 426
 427         (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
 428         for (c = logfilebuf; *c != '\0'; c++)
 429                 if (*c == '/')
 430                         *c = '-';
 431
 432         inst->ri_logstem = startd_alloc(PATH_MAX);
 433         (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
 434             LOG_SUFFIX);
 435
 436         /*
 437          * If the restarter group is missing, use uninit/none.  Otherwise,
 438          * we're probably being restarted & don't want to mess up the states
 439          * that are there.
 440          */
 441         state = RESTARTER_STATE_UNINIT;
 442         next_state = RESTARTER_STATE_NONE;
 443
 444         r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
 445         if (r != 0) {
 446                 switch (scf_error()) {
 447                 case SCF_ERROR_CONNECTION_BROKEN:
 448                         libscf_handle_rebind(h);
 449                         goto rep_retry;
 450
 451                 case SCF_ERROR_NOT_SET:
 452                         goto deleted;
 453
 454                 case SCF_ERROR_NOT_FOUND:
 455                         /*
 456                          * This shouldn't happen since the graph engine should
 457                          * have initialized the state to uninitialized/none if
 458                          * there was no restarter pg.  In case somebody
 459                          * deleted it, though....
 460                          */
 461                         do_commit_states = B_TRUE;
 462                         break;
 463
 464                 default:
 465                         assert(0);
 466                         abort();
 467                 }
 468         } else {
 469                 r = libscf_read_states(pg, &state, &next_state);
 470                 if (r != 0) {
 471                         do_commit_states = B_TRUE;
 472                 } else {
 473                         if (next_state != RESTARTER_STATE_NONE) {
 474                                 /*
 475                                  * Force next_state to _NONE since we
 476                                  * don't look for method processes.
 477                                  */
 478                                 next_state = RESTARTER_STATE_NONE;
 479                                 do_commit_states = B_TRUE;
 480                         } else {
 481                                 /*
 482                                  * The reason for transition will depend on
 483                                  * state.
 484                                  */
 485                                 if (st->st_initial == 0)
 486                                         reason = restarter_str_startd_restart;
 487                                 else if (state == RESTARTER_STATE_MAINT)
 488                                         reason = restarter_str_bad_repo_state;
 489                                 /*
 490                                  * Inform the restarter of our state without
 491                                  * changing the STIME in the repository.
 492                                  */
 493                                 ps = startd_alloc(sizeof (*ps));
 494                                 inst->ri_i.i_state = ps->ps_state = state;
 495                                 inst->ri_i.i_next_state = ps->ps_state_next =
 496                                     next_state;
 497                                 ps->ps_reason = reason;
 498
 499                                 graph_protocol_send_event(inst->ri_i.i_fmri,
 500                                     GRAPH_UPDATE_STATE_CHANGE, ps);
 501
 502                                 do_commit_states = B_FALSE;
 503                         }
 504                 }
 505         }
 506
 507         switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
 508             &inst->ri_utmpx_prefix)) {
 509         case 0:
 510                 break;
 511
 512         case ECONNABORTED:
 513                 libscf_handle_rebind(h);
 514                 goto rep_retry;
 515
 516         case ECANCELED:
 517                 goto deleted;
 518
 519         case ENOENT:
 520                 /*
 521                  * This is odd, because the graph engine should have required
 522                  * the general property group.  So we'll just use default
 523                  * flags in anticipation of the graph engine sending us
 524                  * REMOVE_INSTANCE when it finds out that the general property
 525                  * group has been deleted.
 526                  */
 527                 inst->ri_flags = RINST_CONTRACT;
 528                 break;
 529
 530         default:
 531                 assert(0);
 532                 abort();
 533         }
 534
 535         r = libscf_get_template_values(scf_inst, snap,
 536             &inst->ri_common_name, &inst->ri_C_common_name);
 537
 538         /*
 539          * Copy our names to smaller buffers to reduce our memory footprint.
 540          */
 541         if (inst->ri_common_name != NULL) {
 542                 char *tmp = safe_strdup(inst->ri_common_name);
 543                 startd_free(inst->ri_common_name, max_scf_value_size);
 544                 inst->ri_common_name = tmp;
 545         }
 546
 547         if (inst->ri_C_common_name != NULL) {
 548                 char *tmp = safe_strdup(inst->ri_C_common_name);
 549                 startd_free(inst->ri_C_common_name, max_scf_value_size);
 550                 inst->ri_C_common_name = tmp;
 551         }
 552
 553         switch (r) {
 554         case 0:
 555                 break;
 556
 557         case ECONNABORTED:
 558                 libscf_handle_rebind(h);
 559                 goto rep_retry;
 560
 561         case ECANCELED:
 562                 goto deleted;
 563
 564         case ECHILD:
 565         case ENOENT:
 566                 break;
 567
 568         default:
 569                 assert(0);
 570                 abort();
 571         }
 572
 573         switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
 574             &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
 575             &start_pid)) {
 576         case 0:
 577                 break;
 578
 579         case ECONNABORTED:
 580                 libscf_handle_rebind(h);
 581                 goto rep_retry;
 582
 583         case ECANCELED:
 584                 goto deleted;
 585
 586         default:
 587                 assert(0);
 588                 abort();
 589         }
 590
 591         if (inst->ri_i.i_primary_ctid >= 1) {
 592                 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
 593
 594                 switch (check_contract(inst, B_TRUE, scf_inst)) {
 595                 case 0:
 596                         break;
 597
 598                 case ECONNABORTED:
 599                         libscf_handle_rebind(h);
 600                         goto rep_retry;
 601
 602                 case ECANCELED:
 603                         goto deleted;
 604
 605                 default:
 606                         assert(0);
 607                         abort();
 608                 }
 609         }
 610
 611         if (inst->ri_i.i_transient_ctid >= 1) {
 612                 switch (check_contract(inst, B_FALSE, scf_inst)) {
 613                 case 0:
 614                         break;
 615
 616                 case ECONNABORTED:
 617                         libscf_handle_rebind(h);
 618                         goto rep_retry;
 619
 620                 case ECANCELED:
 621                         goto deleted;
 622
 623                 default:
 624                         assert(0);
 625                         abort();
 626                 }
 627         }
 628
 629         /* No more failures we live through, so add it to the list. */
 630         (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
 631         (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
 632         MUTEX_LOCK(&inst->ri_lock);
 633         MUTEX_LOCK(&inst->ri_queue_lock);
 634
 635         (void) pthread_cond_init(&inst->ri_method_cv, NULL);
 636
 637         uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
 638         uu_list_insert(instance_list.ril_instance_list, inst, idx);
 639         MUTEX_UNLOCK(&instance_list.ril_lock);
 640
 641         if (start_pid != -1 &&
 642             (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
 643                 int ret;
 644                 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
 645                 if (ret == -1) {
 646                         /*
 647                          * Implication:  if we can't reregister the
 648                          * instance, we will start another one.  Two
 649                          * instances may or may not result in a resource
 650                          * conflict.
 651                          */
 652                         log_error(LOG_WARNING,
 653                             "%s: couldn't reregister %ld for wait\n",
 654                             inst->ri_i.i_fmri, start_pid);
 655                 } else if (ret == 1) {
 656                         /*
 657                          * Leading PID has exited.
 658                          */
 659                         (void) stop_instance(h, inst, RSTOP_EXIT);
 660                 }
 661         }
 662
 663
 664         scf_pg_destroy(pg);
 665
 666         if (do_commit_states)
 667                 (void) restarter_instance_update_states(h, inst, state,
 668                     next_state, RERR_NONE, reason);
 669
 670         log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
 671             service_style(inst->ri_flags));
 672
 673         MUTEX_UNLOCK(&inst->ri_queue_lock);
 674         MUTEX_UNLOCK(&inst->ri_lock);
 675
 676         startd_free(svc_name, max_scf_name_size);
 677         startd_free(inst_name, max_scf_name_size);
 678         scf_snapshot_destroy(snap);
 679         scf_instance_destroy(scf_inst);
 680         scf_service_destroy(scf_svc);
 681
 682         log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
 683             name);
 684
 685         return (0);
 686
 687 deleted:
 688         MUTEX_UNLOCK(&instance_list.ril_lock);
 689         startd_free(inst_name, max_scf_name_size);
 690         startd_free(svc_name, max_scf_name_size);
 691         if (snap != NULL)
 692                 scf_snapshot_destroy(snap);
 693         scf_pg_destroy(pg);
 694         scf_instance_destroy(scf_inst);
 695         scf_service_destroy(scf_svc);
 696         startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
 697         uu_list_destroy(inst->ri_queue);
 698         if (inst->ri_logstem != NULL)
 699                 startd_free(inst->ri_logstem, PATH_MAX);
 700         if (inst->ri_common_name != NULL)
 701                 free(inst->ri_common_name);
 702         if (inst->ri_C_common_name != NULL)
 703                 free(inst->ri_C_common_name);
 704         startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
 705         startd_free(inst, sizeof (restarter_inst_t));
 706         return (ENOENT);
 707 }
 708
 709 static void
 710 restarter_delete_inst(restarter_inst_t *ri)
 711 {
 712         int id;
 713         restarter_inst_t *rip;
 714         void *cookie = NULL;
 715         restarter_instance_qentry_t *e;
 716
 717         assert(MUTEX_HELD(&ri->ri_lock));
 718
 719         /*
 720          * Must drop the instance lock so we can pick up the instance_list
 721          * lock & remove the instance.
 722          */
 723         id = ri->ri_id;
 724         MUTEX_UNLOCK(&ri->ri_lock);
 725
 726         MUTEX_LOCK(&instance_list.ril_lock);
 727
 728         rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
 729         if (rip == NULL) {
 730                 MUTEX_UNLOCK(&instance_list.ril_lock);
 731                 return;
 732         }
 733
 734         assert(ri == rip);
 735
 736         uu_list_remove(instance_list.ril_instance_list, ri);
 737
 738         log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
 739             ri->ri_i.i_fmri);
 740
 741         MUTEX_UNLOCK(&instance_list.ril_lock);
 742
 743         /*
 744          * We can lock the instance without holding the instance_list lock
 745          * since we removed the instance from the list.
 746          */
 747         MUTEX_LOCK(&ri->ri_lock);
 748         MUTEX_LOCK(&ri->ri_queue_lock);
 749
 750         if (ri->ri_i.i_primary_ctid >= 1)
 751                 contract_hash_remove(ri->ri_i.i_primary_ctid);
 752
 753         while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
 754                 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
 755
 756         while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
 757                 startd_free(e, sizeof (*e));
 758         uu_list_destroy(ri->ri_queue);
 759
 760         startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
 761         startd_free(ri->ri_logstem, PATH_MAX);
 762         if (ri->ri_common_name != NULL)
 763                 free(ri->ri_common_name);
 764         if (ri->ri_C_common_name != NULL)
 765                 free(ri->ri_C_common_name);
 766         startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
 767         (void) pthread_mutex_destroy(&ri->ri_lock);
 768         (void) pthread_mutex_destroy(&ri->ri_queue_lock);
 769         startd_free(ri, sizeof (restarter_inst_t));
 770 }
 771
 772 /*
 773  * instance_is_wait_style()
 774  *
 775  *   Returns 1 if the given instance is a "wait-style" service instance.
 776  */
 777 int
 778 instance_is_wait_style(restarter_inst_t *inst)
 779 {
 780         assert(MUTEX_HELD(&inst->ri_lock));
 781         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
 782 }
 783
 784 /*
 785  * instance_is_transient_style()
 786  *
 787  *   Returns 1 if the given instance is a transient service instance.
 788  */
 789 int
 790 instance_is_transient_style(restarter_inst_t *inst)
 791 {
 792         assert(MUTEX_HELD(&inst->ri_lock));
 793         return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
 794 }
 795
 796 /*
 797  * instance_in_transition()
 798  * Returns 1 if instance is in transition, 0 if not
 799  */
 800 int
 801 instance_in_transition(restarter_inst_t *inst)
 802 {
 803         assert(MUTEX_HELD(&inst->ri_lock));
 804         if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
 805                 return (0);
 806         return (1);
 807 }
 808
 809 /*
 810  * returns 1 if instance is already started, 0 if not
 811  */
 812 static int
 813 instance_started(restarter_inst_t *inst)
 814 {
 815         int ret;
 816
 817         assert(MUTEX_HELD(&inst->ri_lock));
 818
 819         if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
 820             inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
 821                 ret = 1;
 822         else
 823                 ret = 0;
 824
 825         return (ret);
 826 }
 827
 828 /*
 829  * Returns
 830  *   0 - success
 831  *   ECONNRESET - success, but h was rebound
 832  */
 833 int
 834 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
 835     restarter_instance_state_t new_state,
 836     restarter_instance_state_t new_state_next, restarter_error_t err,
 837     restarter_str_t reason)
 838 {
 839         protocol_states_t *states;
 840         int e;
 841         uint_t retry_count = 0, msecs = ALLOC_DELAY;
 842         boolean_t rebound = B_FALSE;
 843         int prev_state_online;
 844         int state_online;
 845
 846         assert(MUTEX_HELD(&ri->ri_lock));
 847
 848         prev_state_online = instance_started(ri);
 849
 850 retry:
 851         e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
 852             restarter_get_str_short(reason));
 853         switch (e) {
 854         case 0:
 855                 break;
 856
 857         case ENOMEM:
 858                 ++retry_count;
 859                 if (retry_count < ALLOC_RETRY) {
 860                         (void) poll(NULL, 0, msecs);
 861                         msecs *= ALLOC_DELAY_MULT;
 862                         goto retry;
 863                 }
 864
 865                 /* Like startd_alloc(). */
 866                 uu_die("Insufficient memory.\n");
 867                 /* NOTREACHED */
 868
 869         case ECONNABORTED:
 870                 libscf_handle_rebind(h);
 871                 rebound = B_TRUE;
 872                 goto retry;
 873
 874         case EPERM:
 875         case EACCES:
 876         case EROFS:
 877                 log_error(LOG_NOTICE, "Could not commit state change for %s "
 878                     "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
 879                 /* FALLTHROUGH */
 880
 881         case ENOENT:
 882                 ri->ri_i.i_state = new_state;
 883                 ri->ri_i.i_next_state = new_state_next;
 884                 break;
 885
 886         case EINVAL:
 887         default:
 888                 bad_error("_restarter_commit_states", e);
 889         }
 890
 891         states = startd_alloc(sizeof (protocol_states_t));
 892         states->ps_state = new_state;
 893         states->ps_state_next = new_state_next;
 894         states->ps_err = err;
 895         states->ps_reason = reason;
 896         graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
 897             (void *)states);
 898
 899         state_online = instance_started(ri);
 900
 901         if (prev_state_online && !state_online)
 902                 ri->ri_post_offline_hook();
 903         else if (!prev_state_online && state_online)
 904                 ri->ri_post_online_hook();
 905
 906         return (rebound ? ECONNRESET : 0);
 907 }
 908
 909 void
 910 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
 911 {
 912         restarter_inst_t *inst;
 913
 914         assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
 915
 916         inst = inst_lookup_by_name(fmri);
 917         if (inst == NULL)
 918                 return;
 919
 920         inst->ri_flags |= flag;
 921
 922         MUTEX_UNLOCK(&inst->ri_lock);
 923 }
 924
 925 static void
 926 restarter_take_pending_snapshots(scf_handle_t *h)
 927 {
 928         restarter_inst_t *inst;
 929         int r;
 930
 931         MUTEX_LOCK(&instance_list.ril_lock);
 932
 933         for (inst = uu_list_first(instance_list.ril_instance_list);
 934             inst != NULL;
 935             inst = uu_list_next(instance_list.ril_instance_list, inst)) {
 936                 const char *fmri;
 937                 scf_instance_t *sinst = NULL;
 938
 939                 MUTEX_LOCK(&inst->ri_lock);
 940
 941                 /*
 942                  * This is where we'd check inst->ri_method_thread and if it
 943                  * were nonzero we'd wait in anticipation of another thread
 944                  * executing a method for inst.  Doing so with the instance_list
 945                  * locked, though, leads to deadlock.  Since taking a snapshot
 946                  * during that window won't hurt anything, we'll just continue.
 947                  */
 948
 949                 fmri = inst->ri_i.i_fmri;
 950
 951                 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
 952                         scf_snapshot_t *rsnap;
 953
 954                         (void) libscf_fmri_get_instance(h, fmri, &sinst);
 955
 956                         rsnap = libscf_get_or_make_running_snapshot(sinst,
 957                             fmri, B_FALSE);
 958
 959                         scf_instance_destroy(sinst);
 960
 961                         if (rsnap != NULL)
 962                                 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
 963
 964                         scf_snapshot_destroy(rsnap);
 965                 }
 966
 967                 if (inst->ri_flags & RINST_RETAKE_START) {
 968                         switch (r = libscf_snapshots_poststart(h, fmri,
 969                             B_FALSE)) {
 970                         case 0:
 971                         case ENOENT:
 972                                 inst->ri_flags &= ~RINST_RETAKE_START;
 973                                 break;
 974
 975                         case ECONNABORTED:
 976                                 break;
 977
 978                         case EACCES:
 979                         default:
 980                                 bad_error("libscf_snapshots_poststart", r);
 981                         }
 982                 }
 983
 984                 MUTEX_UNLOCK(&inst->ri_lock);
 985         }
 986
 987         MUTEX_UNLOCK(&instance_list.ril_lock);
 988 }
 989
 990 /* ARGSUSED */
 991 void *
 992 restarter_post_fsminimal_thread(void *unused)
 993 {
 994         scf_handle_t *h;
 995         int r;
 996
 997         (void) pthread_setname_np(pthread_self(), "restarter_post_fsmin");
 998
 999         h = libscf_handle_create_bound_loop();
1000
1001         for (;;) {
1002                 r = libscf_create_self(h);
1003                 if (r == 0)
1004                         break;
1005
1006                 assert(r == ECONNABORTED);
1007                 libscf_handle_rebind(h);
1008         }
1009
1010         restarter_take_pending_snapshots(h);
1011
1012         (void) scf_handle_unbind(h);
1013         scf_handle_destroy(h);
1014
1015         return (NULL);
1016 }
1017
1018 /*
1019  * int stop_instance()
1020  *
1021  *   Stop the instance identified by the instance given as the second argument,
1022  *   for the cause stated.
1023  *
1024  *   Returns
1025  *     0 - success
1026  *     -1 - inst is in transition
1027  */
1028 static int
1029 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1030     stop_cause_t cause)
1031 {
1032         fork_info_t *info;
1033         const char *cp;
1034         int err;
1035         restarter_error_t re;
1036         restarter_str_t reason;
1037         restarter_instance_state_t new_state;
1038
1039         assert(MUTEX_HELD(&inst->ri_lock));
1040         assert(inst->ri_method_thread == 0);
1041
1042         switch (cause) {
1043         case RSTOP_EXIT:
1044                 re = RERR_RESTART;
1045                 reason = restarter_str_ct_ev_exit;
1046                 cp = "all processes in service exited";
1047                 break;
1048         case RSTOP_ERR_CFG:
1049                 re = RERR_FAULT;
1050                 reason = restarter_str_method_failed;
1051                 cp = "service exited with a configuration error";
1052                 break;
1053         case RSTOP_ERR_EXIT:
1054                 re = RERR_RESTART;
1055                 reason = restarter_str_ct_ev_exit;
1056                 cp = "service exited with an error";
1057                 break;
1058         case RSTOP_CORE:
1059                 re = RERR_FAULT;
1060                 reason = restarter_str_ct_ev_core;
1061                 cp = "process dumped core";
1062                 break;
1063         case RSTOP_SIGNAL:
1064                 re = RERR_FAULT;
1065                 reason = restarter_str_ct_ev_signal;
1066                 cp = "process received fatal signal from outside the service";
1067                 break;
1068         case RSTOP_HWERR:
1069                 re = RERR_FAULT;
1070                 reason = restarter_str_ct_ev_hwerr;
1071                 cp = "process killed due to uncorrectable hardware error";
1072                 break;
1073         case RSTOP_DEPENDENCY:
1074                 re = RERR_RESTART;
1075                 reason = restarter_str_dependency_activity;
1076                 cp = "dependency activity requires stop";
1077                 break;
1078         case RSTOP_DISABLE:
1079                 re = RERR_RESTART;
1080                 reason = restarter_str_disable_request;
1081                 cp = "service disabled";
1082                 break;
1083         case RSTOP_RESTART:
1084                 re = RERR_RESTART;
1085                 reason = restarter_str_restart_request;
1086                 cp = "service restarting";
1087                 break;
1088         default:
1089 #ifndef NDEBUG
1090                 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1091                     cause, __FILE__, __LINE__);
1092 #endif
1093                 abort();
1094         }
1095
1096         /* Services in the disabled and maintenance state are ignored */
1097         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1098             inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1099                 log_framework(LOG_DEBUG,
1100                     "%s: stop_instance -> is maint/disabled\n",
1101                     inst->ri_i.i_fmri);
1102                 return (0);
1103         }
1104
1105         /* Already stopped instances are left alone */
1106         if (instance_started(inst) == 0) {
1107                 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1108                     inst->ri_i.i_fmri);
1109                 return (0);
1110         }
1111
1112         if (instance_in_transition(inst)) {
1113                 /* requeue event by returning -1 */
1114                 log_framework(LOG_DEBUG,
1115                     "Restarter: Not stopping %s, in transition.\n",
1116                     inst->ri_i.i_fmri);
1117                 return (-1);
1118         }
1119
1120         log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1121
1122         log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1123             "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1124
1125         if (instance_is_wait_style(inst) &&
1126             (cause == RSTOP_EXIT ||
1127             cause == RSTOP_ERR_CFG ||
1128             cause == RSTOP_ERR_EXIT)) {
1129                 /*
1130                  * No need to stop instance, as child has exited; remove
1131                  * contract and move the instance to the offline state.
1132                  */
1133                 switch (err = restarter_instance_update_states(local_handle,
1134                     inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1135                     reason)) {
1136                 case 0:
1137                 case ECONNRESET:
1138                         break;
1139
1140                 default:
1141                         bad_error("restarter_instance_update_states", err);
1142                 }
1143
1144                 if (cause == RSTOP_ERR_EXIT) {
1145                         /*
1146                          * The RSTOP_ERR_EXIT cause is set via the
1147                          * wait_thread -> wait_remove code path when we have
1148                          * a "wait" style svc that exited with an error. If
1149                          * the svc is failing too quickly, we throttle it so
1150                          * that we don't restart it more than once/second.
1151                          * Since we know we're running in the wait thread its
1152                          * ok to throttle it right here.
1153                          */
1154                         (void) update_fault_count(inst, FAULT_COUNT_INCR);
1155                         if (method_rate_critical(inst)) {
1156                                 log_instance(inst, B_TRUE, "Failing too "
1157                                     "quickly, throttling.");
1158                                 (void) sleep(WT_SVC_ERR_THROTTLE);
1159                         }
1160                 } else {
1161                         (void) update_fault_count(inst, FAULT_COUNT_RESET);
1162                         reset_start_times(inst);
1163                 }
1164
1165                 if (inst->ri_i.i_primary_ctid != 0) {
1166                         inst->ri_m_inst =
1167                             safe_scf_instance_create(local_handle);
1168                         inst->ri_mi_deleted = B_FALSE;
1169
1170                         libscf_reget_instance(inst);
1171                         method_remove_contract(inst, B_TRUE, B_TRUE);
1172
1173                         scf_instance_destroy(inst->ri_m_inst);
1174                         inst->ri_m_inst = NULL;
1175                 }
1176
1177                 switch (err = restarter_instance_update_states(local_handle,
1178                     inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1179                     reason)) {
1180                 case 0:
1181                 case ECONNRESET:
1182                         break;
1183
1184                 default:
1185                         bad_error("restarter_instance_update_states", err);
1186                 }
1187
1188                 if (cause != RSTOP_ERR_CFG)
1189                         return (0);
1190         } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1191                 /*
1192                  * Stopping a wait service through means other than the pid
1193                  * exiting should keep wait_thread() from restarting the
1194                  * service, by removing it from the wait list.
1195                  * We cannot remove it right now otherwise the process will
1196                  * end up <defunct> so mark it to be ignored.
1197                  */
1198                 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1199         }
1200
1201         /*
1202          * There are some configuration errors which we cannot detect until we
1203          * try to run the method.  For example, see exec_method() where the
1204          * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1205          * in several cases. If this happens for a "wait-style" svc,
1206          * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1207          * the configuration error and go into maintenance, even though it is
1208          * a "wait-style" svc.
1209          */
1210         if (cause == RSTOP_ERR_CFG)
1211                 new_state = RESTARTER_STATE_MAINT;
1212         else
1213                 new_state = inst->ri_i.i_enabled ?
1214                     RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1215
1216         switch (err = restarter_instance_update_states(local_handle, inst,
1217             inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1218         case 0:
1219         case ECONNRESET:
1220                 break;
1221
1222         default:
1223                 bad_error("restarter_instance_update_states", err);
1224         }
1225
1226         info = startd_zalloc(sizeof (fork_info_t));
1227
1228         info->sf_id = inst->ri_id;
1229         info->sf_method_type = METHOD_STOP;
1230         info->sf_event_type = re;
1231         info->sf_reason = reason;
1232         inst->ri_method_thread = startd_thread_create(method_thread, info);
1233
1234         return (0);
1235 }
1236
1237 /*
1238  * Returns
1239  *   ENOENT - fmri is not in instance_list
1240  *   0 - success
1241  *   ECONNRESET - success, though handle was rebound
1242  *   -1 - instance is in transition
1243  */
1244 int
1245 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1246 {
1247         restarter_inst_t *rip;
1248         int r;
1249
1250         rip = inst_lookup_by_name(fmri);
1251         if (rip == NULL)
1252                 return (ENOENT);
1253
1254         r = stop_instance(h, rip, flags);
1255
1256         MUTEX_UNLOCK(&rip->ri_lock);
1257
1258         return (r);
1259 }
1260
1261 static void
1262 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1263     unmaint_cause_t cause)
1264 {
1265         ctid_t ctid;
1266         scf_instance_t *inst;
1267         int r;
1268         uint_t tries = 0, msecs = ALLOC_DELAY;
1269         const char *cp;
1270         restarter_str_t reason;
1271
1272         assert(MUTEX_HELD(&rip->ri_lock));
1273
1274         if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1275                 log_error(LOG_DEBUG, "Restarter: "
1276                     "Ignoring maintenance off command because %s is not in the "
1277                     "maintenance state.\n", rip->ri_i.i_fmri);
1278                 return;
1279         }
1280
1281         switch (cause) {
1282         case RUNMAINT_CLEAR:
1283                 cp = "clear requested";
1284                 reason = restarter_str_clear_request;
1285                 break;
1286         case RUNMAINT_DISABLE:
1287                 cp = "disable requested";
1288                 reason = restarter_str_disable_request;
1289                 break;
1290         default:
1291 #ifndef NDEBUG
1292                 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1293                     cause, __FILE__, __LINE__);
1294 #endif
1295                 abort();
1296         }
1297
1298         log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1299             cp);
1300         log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1301             "%s.\n", rip->ri_i.i_fmri, cp);
1302
1303         (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1304             RESTARTER_STATE_NONE, RERR_RESTART, reason);
1305
1306         /*
1307          * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1308          * a primary contract.
1309          */
1310         if (rip->ri_i.i_primary_ctid == 0)
1311                 return;
1312
1313         ctid = rip->ri_i.i_primary_ctid;
1314         contract_abandon(ctid);
1315         rip->ri_i.i_primary_ctid = 0;
1316
1317 rep_retry:
1318         switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1319         case 0:
1320                 break;
1321
1322         case ECONNABORTED:
1323                 libscf_handle_rebind(h);
1324                 goto rep_retry;
1325
1326         case ENOENT:
1327                 /* Must have been deleted. */
1328                 return;
1329
1330         case EINVAL:
1331         case ENOTSUP:
1332         default:
1333                 bad_error("libscf_handle_rebind", r);
1334         }
1335
1336 again:
1337         r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1338         switch (r) {
1339         case 0:
1340                 break;
1341
1342         case ENOMEM:
1343                 ++tries;
1344                 if (tries < ALLOC_RETRY) {
1345                         (void) poll(NULL, 0, msecs);
1346                         msecs *= ALLOC_DELAY_MULT;
1347                         goto again;
1348                 }
1349
1350                 uu_die("Insufficient memory.\n");
1351                 /* NOTREACHED */
1352
1353         case ECONNABORTED:
1354                 scf_instance_destroy(inst);
1355                 libscf_handle_rebind(h);
1356                 goto rep_retry;
1357
1358         case ECANCELED:
1359                 break;
1360
1361         case EPERM:
1362         case EACCES:
1363         case EROFS:
1364                 log_error(LOG_INFO,
1365                     "Could not remove contract id %lu for %s (%s).\n", ctid,
1366                     rip->ri_i.i_fmri, strerror(r));
1367                 break;
1368
1369         case EINVAL:
1370         case EBADF:
1371         default:
1372                 bad_error("restarter_remove_contract", r);
1373         }
1374
1375         scf_instance_destroy(inst);
1376 }
1377
1378 /*
1379  * enable_inst()
1380  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1381  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1382  *   disabled, move it to offline.  If the event is _DISABLE or
1383  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1384  *
1385  *   Returns
1386  *     0 - success
1387  *     ECONNRESET - h was rebound
1388  */
1389 static int
1390 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1391     restarter_instance_qentry_t *riq)
1392 {
1393         restarter_instance_state_t state;
1394         restarter_event_type_t e = riq->riq_type;
1395         restarter_str_t reason = restarter_str_per_configuration;
1396         int r;
1397
1398         assert(MUTEX_HELD(&inst->ri_lock));
1399         assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1400             e == RESTARTER_EVENT_TYPE_DISABLE ||
1401             e == RESTARTER_EVENT_TYPE_ENABLE);
1402         assert(instance_in_transition(inst) == 0);
1403
1404         state = inst->ri_i.i_state;
1405
1406         if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1407                 inst->ri_i.i_enabled = 1;
1408
1409                 if (state == RESTARTER_STATE_UNINIT ||
1410                     state == RESTARTER_STATE_DISABLED) {
1411                         /*
1412                          * B_FALSE: Don't log an error if the log_instance()
1413                          * fails because it will fail on the miniroot before
1414                          * install-discovery runs.
1415                          */
1416                         log_instance(inst, B_FALSE, "Enabled.");
1417                         log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1418                             inst->ri_i.i_fmri);
1419
1420                         /*
1421                          * If we are coming from DISABLED, it was obviously an
1422                          * enable request. If we are coming from UNINIT, it may
1423                          * have been a sevice in MAINT that was cleared.
1424                          */
1425                         if (riq->riq_reason == restarter_str_clear_request)
1426                                 reason = restarter_str_clear_request;
1427                         else if (state == RESTARTER_STATE_DISABLED)
1428                                 reason = restarter_str_enable_request;
1429                         (void) restarter_instance_update_states(h, inst,
1430                             RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1431                             RERR_NONE, reason);
1432                 } else {
1433                         log_framework(LOG_DEBUG, "Restarter: "
1434                             "Not changing state of %s for enable command.\n",
1435                             inst->ri_i.i_fmri);
1436                 }
1437         } else {
1438                 inst->ri_i.i_enabled = 0;
1439
1440                 switch (state) {
1441                 case RESTARTER_STATE_ONLINE:
1442                 case RESTARTER_STATE_DEGRADED:
1443                         r = stop_instance(h, inst, RSTOP_DISABLE);
1444                         return (r == ECONNRESET ? 0 : r);
1445
1446                 case RESTARTER_STATE_OFFLINE:
1447                 case RESTARTER_STATE_UNINIT:
1448                         if (inst->ri_i.i_primary_ctid != 0) {
1449                                 inst->ri_m_inst = safe_scf_instance_create(h);
1450                                 inst->ri_mi_deleted = B_FALSE;
1451
1452                                 libscf_reget_instance(inst);
1453                                 method_remove_contract(inst, B_TRUE, B_TRUE);
1454
1455                                 scf_instance_destroy(inst->ri_m_inst);
1456                         }
1457                         /* B_FALSE: See log_instance(..., "Enabled."); above */
1458                         log_instance(inst, B_FALSE, "Disabled.");
1459                         log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1460                             inst->ri_i.i_fmri);
1461
1462                         /*
1463                          * If we are coming from OFFLINE, it was obviously a
1464                          * disable request. But if we are coming from
1465                          * UNINIT, it may have been a disable request for a
1466                          * service in MAINT.
1467                          */
1468                         if (riq->riq_reason == restarter_str_disable_request ||
1469                             state == RESTARTER_STATE_OFFLINE)
1470                                 reason = restarter_str_disable_request;
1471                         (void) restarter_instance_update_states(h, inst,
1472                             RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1473                             RERR_RESTART, reason);
1474                         return (0);
1475
1476                 case RESTARTER_STATE_DISABLED:
1477                         break;
1478
1479                 case RESTARTER_STATE_MAINT:
1480                         /*
1481                          * We only want to pull the instance out of maintenance
1482                          * if the disable is on adminstrative request.  The
1483                          * graph engine sends _DISABLE events whenever a
1484                          * service isn't in the disabled state, and we don't
1485                          * want to pull the service out of maintenance if,
1486                          * for example, it is there due to a dependency cycle.
1487                          */
1488                         if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1489                                 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1490                         break;
1491
1492                 default:
1493 #ifndef NDEBUG
1494                         (void) fprintf(stderr, "Restarter instance %s has "
1495                             "unknown state %d.\n", inst->ri_i.i_fmri, state);
1496 #endif
1497                         abort();
1498                 }
1499         }
1500
1501         return (0);
1502 }
1503
1504 static void
1505 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1506     int32_t reason)
1507 {
1508         fork_info_t *info;
1509         restarter_str_t new_reason;
1510
1511         assert(MUTEX_HELD(&inst->ri_lock));
1512         assert(instance_in_transition(inst) == 0);
1513         assert(inst->ri_method_thread == 0);
1514
1515         log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1516             inst->ri_i.i_fmri);
1517
1518         /*
1519          * We want to keep the original reason for restarts and clear actions
1520          */
1521         switch (reason) {
1522         case restarter_str_restart_request:
1523         case restarter_str_clear_request:
1524                 new_reason = reason;
1525                 break;
1526         default:
1527                 new_reason = restarter_str_dependencies_satisfied;
1528         }
1529
1530         /* Services in the disabled and maintenance state are ignored */
1531         if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1532             inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1533             inst->ri_i.i_enabled == 0) {
1534                 log_framework(LOG_DEBUG,
1535                     "%s: start_instance -> is maint/disabled\n",
1536                     inst->ri_i.i_fmri);
1537                 return;
1538         }
1539
1540         /* Already started instances are left alone */
1541         if (instance_started(inst) == 1) {
1542                 log_framework(LOG_DEBUG,
1543                     "%s: start_instance -> is already started\n",
1544                     inst->ri_i.i_fmri);
1545                 return;
1546         }
1547
1548         log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1549
1550         (void) restarter_instance_update_states(local_handle, inst,
1551             inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1552
1553         info = startd_zalloc(sizeof (fork_info_t));
1554
1555         info->sf_id = inst->ri_id;
1556         info->sf_method_type = METHOD_START;
1557         info->sf_event_type = RERR_NONE;
1558         info->sf_reason = new_reason;
1559         inst->ri_method_thread = startd_thread_create(method_thread, info);
1560 }
1561
1562 static int
1563 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1564 {
1565         scf_instance_t *inst;
1566         int ret = 0;
1567
1568         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1569                 return (-1);
1570
1571         ret = restarter_inst_ractions_from_tty(inst);
1572
1573         scf_instance_destroy(inst);
1574         return (ret);
1575 }
1576
1577 static boolean_t
1578 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1579 {
1580         scf_instance_t *inst;
1581         boolean_t ret = B_FALSE;
1582
1583         if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1584                 return (-1);
1585
1586         if (restarter_inst_dump(inst) == 1)
1587                 ret = B_TRUE;
1588
1589         scf_instance_destroy(inst);
1590         return (ret);
1591 }
1592
1593 static void
1594 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1595     restarter_str_t reason)
1596 {
1597         fork_info_t *info;
1598         scf_instance_t *scf_inst = NULL;
1599
1600         assert(MUTEX_HELD(&rip->ri_lock));
1601         assert(reason != restarter_str_none);
1602         assert(rip->ri_method_thread == 0);
1603
1604         log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1605             restarter_get_str_short(reason));
1606         log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1607             rip->ri_i.i_fmri, restarter_get_str_short(reason));
1608
1609         /* Services in the maintenance state are ignored */
1610         if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1611                 log_framework(LOG_DEBUG,
1612                     "%s: maintain_instance -> is already in maintenance\n",
1613                     rip->ri_i.i_fmri);
1614                 return;
1615         }
1616
1617         /*
1618          * If reason state is restarter_str_service_request and
1619          * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1620          * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1621          */
1622         if (reason == restarter_str_service_request &&
1623             libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1624                 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1625                         if (restarter_inst_set_aux_fmri(scf_inst))
1626                                 log_framework(LOG_DEBUG, "%s: "
1627                                     "restarter_inst_set_aux_fmri failed: ",
1628                                     rip->ri_i.i_fmri);
1629                 } else {
1630                         log_framework(LOG_DEBUG, "%s: "
1631                             "restarter_inst_validate_ractions_aux_fmri "
1632                             "failed: ", rip->ri_i.i_fmri);
1633
1634                         if (restarter_inst_reset_aux_fmri(scf_inst))
1635                                 log_framework(LOG_DEBUG, "%s: "
1636                                     "restarter_inst_reset_aux_fmri failed: ",
1637                                     rip->ri_i.i_fmri);
1638                 }
1639                 scf_instance_destroy(scf_inst);
1640         }
1641
1642         if (immediate || !instance_started(rip)) {
1643                 if (rip->ri_i.i_primary_ctid != 0) {
1644                         rip->ri_m_inst = safe_scf_instance_create(h);
1645                         rip->ri_mi_deleted = B_FALSE;
1646
1647                         libscf_reget_instance(rip);
1648                         method_remove_contract(rip, B_TRUE, B_TRUE);
1649
1650                         scf_instance_destroy(rip->ri_m_inst);
1651                 }
1652
1653                 (void) restarter_instance_update_states(h, rip,
1654                     RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1655                     reason);
1656                 return;
1657         }
1658
1659         (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1660             RESTARTER_STATE_MAINT, RERR_NONE, reason);
1661
1662         log_transition(rip, MAINT_REQUESTED);
1663
1664         info = startd_zalloc(sizeof (*info));
1665         info->sf_id = rip->ri_id;
1666         info->sf_method_type = METHOD_STOP;
1667         info->sf_event_type = RERR_RESTART;
1668         info->sf_reason = reason;
1669         rip->ri_method_thread = startd_thread_create(method_thread, info);
1670 }
1671
1672 static void
1673 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1674 {
1675         scf_instance_t *inst;
1676         scf_snapshot_t *snap;
1677         fork_info_t *info;
1678         int r;
1679
1680         assert(MUTEX_HELD(&rip->ri_lock));
1681
1682         log_instance(rip, B_TRUE, "Rereading configuration.");
1683         log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1684             rip->ri_i.i_fmri);
1685
1686 rep_retry:
1687         r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1688         switch (r) {
1689         case 0:
1690                 break;
1691
1692         case ECONNABORTED:
1693                 libscf_handle_rebind(h);
1694                 goto rep_retry;
1695
1696         case ENOENT:
1697                 /* Must have been deleted. */
1698                 return;
1699
1700         case EINVAL:
1701         case ENOTSUP:
1702         default:
1703                 bad_error("libscf_fmri_get_instance", r);
1704         }
1705
1706         snap = libscf_get_running_snapshot(inst);
1707
1708         r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1709             &rip->ri_utmpx_prefix);
1710         switch (r) {
1711         case 0:
1712                 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1713                     rip->ri_i.i_fmri, service_style(rip->ri_flags));
1714                 break;
1715
1716         case ECONNABORTED:
1717                 scf_instance_destroy(inst);
1718                 scf_snapshot_destroy(snap);
1719                 libscf_handle_rebind(h);
1720                 goto rep_retry;
1721
1722         case ECANCELED:
1723         case ENOENT:
1724                 /* Succeed in anticipation of REMOVE_INSTANCE. */
1725                 break;
1726
1727         default:
1728                 bad_error("libscf_get_startd_properties", r);
1729         }
1730
1731         if (instance_started(rip)) {
1732                 /* Refresh does not change the state. */
1733                 (void) restarter_instance_update_states(h, rip,
1734                     rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1735                     restarter_str_refresh);
1736
1737                 info = startd_zalloc(sizeof (*info));
1738                 info->sf_id = rip->ri_id;
1739                 info->sf_method_type = METHOD_REFRESH;
1740                 info->sf_event_type = RERR_REFRESH;
1741                 info->sf_reason = NULL;
1742
1743                 assert(rip->ri_method_thread == 0);
1744                 rip->ri_method_thread =
1745                     startd_thread_create(method_thread, info);
1746         }
1747
1748         scf_snapshot_destroy(snap);
1749         scf_instance_destroy(inst);
1750 }
1751
1752 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1753         "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1754         "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1755         "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1756         "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1757 };
1758
1759 /*
1760  * void *restarter_process_events()
1761  *
1762  *   Called in a separate thread to process the events on an instance's
1763  *   queue.  Empties the queue completely, and tries to keep the thread
1764  *   around for a little while after the queue is empty to save on
1765  *   startup costs.
1766  */
1767 static void *
1768 restarter_process_events(void *arg)
1769 {
1770         scf_handle_t *h;
1771         restarter_instance_qentry_t *event;
1772         restarter_inst_t *rip;
1773         char *fmri = (char *)arg;
1774         struct timespec to;
1775
1776         (void) pthread_setname_np(pthread_self(), "restarter_process_events");
1777
1778         assert(fmri != NULL);
1779
1780         h = libscf_handle_create_bound_loop();
1781
1782         /* grab the queue lock */
1783         rip = inst_lookup_queue(fmri);
1784         if (rip == NULL)
1785                 goto out;
1786
1787 again:
1788
1789         while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1790                 restarter_inst_t *inst;
1791
1792                 /* drop the queue lock */
1793                 MUTEX_UNLOCK(&rip->ri_queue_lock);
1794
1795                 /*
1796                  * Grab the inst lock -- this waits until any outstanding
1797                  * method finishes running.
1798                  */
1799                 inst = inst_lookup_by_name(fmri);
1800                 if (inst == NULL) {
1801                         /* Getting deleted in the middle isn't an error. */
1802                         goto cont;
1803                 }
1804
1805                 assert(instance_in_transition(inst) == 0);
1806
1807                 /* process the event */
1808                 switch (event->riq_type) {
1809                 case RESTARTER_EVENT_TYPE_ENABLE:
1810                 case RESTARTER_EVENT_TYPE_DISABLE:
1811                         (void) enable_inst(h, inst, event);
1812                         break;
1813
1814                 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1815                         if (enable_inst(h, inst, event) == 0)
1816                                 reset_start_times(inst);
1817                         break;
1818
1819                 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1820                         restarter_delete_inst(inst);
1821                         inst = NULL;
1822                         goto cont;
1823
1824                 case RESTARTER_EVENT_TYPE_STOP_RESET:
1825                         reset_start_times(inst);
1826                         /* FALLTHROUGH */
1827                 case RESTARTER_EVENT_TYPE_STOP:
1828                         (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1829                         break;
1830
1831                 case RESTARTER_EVENT_TYPE_START:
1832                         start_instance(h, inst, event->riq_reason);
1833                         break;
1834
1835                 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1836                         maintain_instance(h, inst, 0,
1837                             restarter_str_dependency_cycle);
1838                         break;
1839
1840                 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1841                         maintain_instance(h, inst, 0,
1842                             restarter_str_invalid_dependency);
1843                         break;
1844
1845                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1846                         if (event_from_tty(h, inst) == 0)
1847                                 maintain_instance(h, inst, 0,
1848                                     restarter_str_service_request);
1849                         else
1850                                 maintain_instance(h, inst, 0,
1851                                     restarter_str_administrative_request);
1852                         break;
1853
1854                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1855                         if (event_from_tty(h, inst) == 0)
1856                                 maintain_instance(h, inst, 1,
1857                                     restarter_str_service_request);
1858                         else
1859                                 maintain_instance(h, inst, 1,
1860                                     restarter_str_administrative_request);
1861                         break;
1862
1863                 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1864                         unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1865                         reset_start_times(inst);
1866                         break;
1867
1868                 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1869                         refresh_instance(h, inst);
1870                         break;
1871
1872                 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1873                         log_framework(LOG_WARNING, "Restarter: "
1874                             "%s command (for %s) unimplemented.\n",
1875                             event_names[event->riq_type], inst->ri_i.i_fmri);
1876                         break;
1877
1878                 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1879                         if (!instance_started(inst)) {
1880                                 log_framework(LOG_DEBUG, "Restarter: "
1881                                     "Not restarting %s; not running.\n",
1882                                     inst->ri_i.i_fmri);
1883                         } else {
1884                                 /*
1885                                  * Stop the instance.  If it can be restarted,
1886                                  * the graph engine will send a new event.
1887                                  */
1888                                 if (restart_dump(h, inst)) {
1889                                         (void) contract_kill(
1890                                             inst->ri_i.i_primary_ctid, SIGABRT,
1891                                             inst->ri_i.i_fmri);
1892                                 } else if (stop_instance(h, inst,
1893                                     RSTOP_RESTART) == 0) {
1894                                         reset_start_times(inst);
1895                                 }
1896                         }
1897                         break;
1898
1899                 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1900                 default:
1901 #ifndef NDEBUG
1902                         uu_warn("%s:%d: Bad restarter event %d.  "
1903                             "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1904 #endif
1905                         abort();
1906                 }
1907
1908                 assert(inst != NULL);
1909                 MUTEX_UNLOCK(&inst->ri_lock);
1910
1911 cont:
1912                 /* grab the queue lock */
1913                 rip = inst_lookup_queue(fmri);
1914                 if (rip == NULL)
1915                         goto out;
1916
1917                 /* delete the event */
1918                 uu_list_remove(rip->ri_queue, event);
1919                 startd_free(event, sizeof (restarter_instance_qentry_t));
1920         }
1921
1922         assert(rip != NULL);
1923
1924         /*
1925          * Try to preserve the thread for a little while for future use.
1926          */
1927         to.tv_sec = 3;
1928         to.tv_nsec = 0;
1929         (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1930             &rip->ri_queue_lock, &to);
1931
1932         if (uu_list_first(rip->ri_queue) != NULL)
1933                 goto again;
1934
1935         rip->ri_queue_thread = 0;
1936         MUTEX_UNLOCK(&rip->ri_queue_lock);
1937
1938 out:
1939         (void) scf_handle_unbind(h);
1940         scf_handle_destroy(h);
1941         free(fmri);
1942         return (NULL);
1943 }
1944
1945 static int
1946 is_admin_event(restarter_event_type_t t)
1947 {
1948         switch (t) {
1949         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1950         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1951         case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1952         case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1953         case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1954         case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1955                 return (1);
1956         default:
1957                 return (0);
1958         }
1959 }
1960
1961 static void
1962 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1963 {
1964         restarter_instance_qentry_t *qe;
1965         int r;
1966
1967         assert(MUTEX_HELD(&ri->ri_queue_lock));
1968         assert(!MUTEX_HELD(&ri->ri_lock));
1969
1970         qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1971         qe->riq_type = e->rpe_type;
1972         qe->riq_reason = e->rpe_reason;
1973
1974         uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1975         r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1976         assert(r == 0);
1977 }
1978
1979 /*
1980  * void *restarter_event_thread()
1981  *
1982  *  Handle incoming graph events by placing them on a per-instance
1983  *  queue.  We can't lock the main part of the instance structure, so
1984  *  just modify the seprarately locked event queue portion.
1985  */
1986 /*ARGSUSED*/
1987 static void *
1988 restarter_event_thread(void *unused)
1989 {
1990         scf_handle_t *h;
1991
1992         (void) pthread_setname_np(pthread_self(), "restarter_event");
1993
1994         /*
1995          * This is a new thread, and thus, gets its own handle
1996          * to the repository.
1997          */
1998         h = libscf_handle_create_bound_loop();
1999
2000         MUTEX_LOCK(&ru->restarter_update_lock);
2001
2002         /*CONSTCOND*/
2003         while (1) {
2004                 restarter_protocol_event_t *e;
2005
2006                 while (ru->restarter_update_wakeup == 0)
2007                         (void) pthread_cond_wait(&ru->restarter_update_cv,
2008                             &ru->restarter_update_lock);
2009
2010                 ru->restarter_update_wakeup = 0;
2011
2012                 while ((e = restarter_event_dequeue()) != NULL) {
2013                         restarter_inst_t *rip;
2014                         char *fmri;
2015
2016                         MUTEX_UNLOCK(&ru->restarter_update_lock);
2017
2018                         /*
2019                          * ADD_INSTANCE is special: there's likely no
2020                          * instance structure yet, so we need to handle the
2021                          * addition synchronously.
2022                          */
2023                         switch (e->rpe_type) {
2024                         case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2025                                 if (restarter_insert_inst(h, e->rpe_inst) != 0)
2026                                         log_error(LOG_INFO, "Restarter: "
2027                                             "Could not add %s.\n", e->rpe_inst);
2028
2029                                 MUTEX_LOCK(&st->st_load_lock);
2030                                 if (--st->st_load_instances == 0)
2031                                         (void) pthread_cond_broadcast(
2032                                             &st->st_load_cv);
2033                                 MUTEX_UNLOCK(&st->st_load_lock);
2034
2035                                 goto nolookup;
2036                         }
2037
2038                         /*
2039                          * Lookup the instance, locking only the event queue.
2040                          * Can't grab ri_lock here because it might be held
2041                          * by a long-running method.
2042                          */
2043                         rip = inst_lookup_queue(e->rpe_inst);
2044                         if (rip == NULL) {
2045                                 log_error(LOG_INFO, "Restarter: "
2046                                     "Ignoring %s command for unknown service "
2047                                     "%s.\n", event_names[e->rpe_type],
2048                                     e->rpe_inst);
2049                                 goto nolookup;
2050                         }
2051
2052                         /* Keep ADMIN events from filling up the queue. */
2053                         if (is_admin_event(e->rpe_type) &&
2054                             uu_list_numnodes(rip->ri_queue) >
2055                             RINST_QUEUE_THRESHOLD) {
2056                                 MUTEX_UNLOCK(&rip->ri_queue_lock);
2057                                 log_instance(rip, B_TRUE, "Instance event "
2058                                     "queue overflow.  Dropping administrative "
2059                                     "request.");
2060                                 log_framework(LOG_DEBUG, "%s: Instance event "
2061                                     "queue overflow.  Dropping administrative "
2062                                     "request.\n", rip->ri_i.i_fmri);
2063                                 goto nolookup;
2064                         }
2065
2066                         /* Now add the event to the instance queue. */
2067                         restarter_queue_event(rip, e);
2068
2069                         if (rip->ri_queue_thread == 0) {
2070                                 /*
2071                                  * Start a thread if one isn't already
2072                                  * running.
2073                                  */
2074                                 fmri = safe_strdup(e->rpe_inst);
2075                                 rip->ri_queue_thread =  startd_thread_create(
2076                                     restarter_process_events, (void *)fmri);
2077                         } else {
2078                                 /*
2079                                  * Signal the existing thread that there's
2080                                  * a new event.
2081                                  */
2082                                 (void) pthread_cond_broadcast(
2083                                     &rip->ri_queue_cv);
2084                         }
2085
2086                         MUTEX_UNLOCK(&rip->ri_queue_lock);
2087 nolookup:
2088                         restarter_event_release(e);
2089
2090                         MUTEX_LOCK(&ru->restarter_update_lock);
2091                 }
2092         }
2093
2094         /*
2095          * Unreachable for now -- there's currently no graceful cleanup
2096          * called on exit().
2097          */
2098         (void) scf_handle_unbind(h);
2099         scf_handle_destroy(h);
2100         return (NULL);
2101 }
2102
2103 static restarter_inst_t *
2104 contract_to_inst(ctid_t ctid)
2105 {
2106         restarter_inst_t *inst;
2107         int id;
2108
2109         id = lookup_inst_by_contract(ctid);
2110         if (id == -1)
2111                 return (NULL);
2112
2113         inst = inst_lookup_by_id(id);
2114         if (inst != NULL) {
2115                 /*
2116                  * Since ri_lock isn't held by the contract id lookup, this
2117                  * instance may have been restarted and now be in a new
2118                  * contract, making the old contract no longer valid for this
2119                  * instance.
2120                  */
2121                 if (ctid != inst->ri_i.i_primary_ctid) {
2122                         MUTEX_UNLOCK(&inst->ri_lock);
2123                         inst = NULL;
2124                 }
2125         }
2126         return (inst);
2127 }
2128
2129 /*
2130  * void contract_action()
2131  *   Take action on contract events.
2132  */
2133 static void
2134 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2135     uint32_t type)
2136 {
2137         const char *fmri = inst->ri_i.i_fmri;
2138
2139         assert(MUTEX_HELD(&inst->ri_lock));
2140
2141         /*
2142          * If startd has stopped this contract, there is no need to
2143          * stop it again.
2144          */
2145         if (inst->ri_i.i_primary_ctid > 0 &&
2146             inst->ri_i.i_primary_ctid_stopped)
2147                 return;
2148
2149         if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2150             | CT_PR_EV_HWERR)) == 0) {
2151                 /*
2152                  * There shouldn't be other events, since that's not how we set
2153                  * the terms. Thus, just log an error and drive on.
2154                  */
2155                 log_framework(LOG_NOTICE,
2156                     "%s: contract %ld received unexpected critical event "
2157                     "(%d)\n", fmri, id, type);
2158                 return;
2159         }
2160
2161         assert(instance_in_transition(inst) == 0);
2162
2163         if (instance_is_wait_style(inst)) {
2164                 /*
2165                  * We ignore all events; if they impact the
2166                  * process we're monitoring, then the
2167                  * wait_thread will stop the instance.
2168                  */
2169                 log_framework(LOG_DEBUG,
2170                     "%s: ignoring contract event on wait-style service\n",
2171                     fmri);
2172         } else {
2173                 /*
2174                  * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2175                  */
2176                 switch (type) {
2177                 case CT_PR_EV_EMPTY:
2178                         (void) stop_instance(h, inst, RSTOP_EXIT);
2179                         break;
2180                 case CT_PR_EV_CORE:
2181                         (void) stop_instance(h, inst, RSTOP_CORE);
2182                         break;
2183                 case CT_PR_EV_SIGNAL:
2184                         (void) stop_instance(h, inst, RSTOP_SIGNAL);
2185                         break;
2186                 case CT_PR_EV_HWERR:
2187                         (void) stop_instance(h, inst, RSTOP_HWERR);
2188                         break;
2189                 }
2190         }
2191 }
2192
2193 /*
2194  * void *restarter_contract_event_thread(void *)
2195  *   Listens to the process contract bundle for critical events, taking action
2196  *   on events from contracts we know we are responsible for.
2197  */
2198 /*ARGSUSED*/
2199 static void *
2200 restarter_contracts_event_thread(void *unused)
2201 {
2202         int fd, err;
2203         scf_handle_t *local_handle;
2204
2205         (void) pthread_setname_np(pthread_self(), "restarter_contracts_event");
2206
2207         /*
2208          * Await graph load completion.  That is, stop here, until we've scanned
2209          * the repository for contract - instance associations.
2210          */
2211         MUTEX_LOCK(&st->st_load_lock);
2212         while (!(st->st_load_complete && st->st_load_instances == 0))
2213                 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2214         MUTEX_UNLOCK(&st->st_load_lock);
2215
2216         /*
2217          * This is a new thread, and thus, gets its own handle
2218          * to the repository.
2219          */
2220         if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2221                 uu_die("Unable to bind a new repository handle: %s\n",
2222                     scf_strerror(scf_error()));
2223
2224         fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2225         if (fd == -1)
2226                 uu_die("process bundle open failed");
2227
2228         /*
2229          * Make sure we get all events (including those generated by configd
2230          * before this thread was started).
2231          */
2232         err = ct_event_reset(fd);
2233         assert(err == 0);
2234
2235         for (;;) {
2236                 int efd, sfd;
2237                 ct_evthdl_t ev;
2238                 uint32_t type;
2239                 ctevid_t evid;
2240                 ct_stathdl_t status;
2241                 ctid_t ctid;
2242                 restarter_inst_t *inst;
2243                 uint64_t cookie;
2244
2245                 if (err = ct_event_read_critical(fd, &ev)) {
2246                         log_error(LOG_WARNING,
2247                             "Error reading next contract event: %s",
2248                             strerror(err));
2249                         continue;
2250                 }
2251
2252                 evid = ct_event_get_evid(ev);
2253                 ctid = ct_event_get_ctid(ev);
2254                 type = ct_event_get_type(ev);
2255
2256                 /* Fetch cookie. */
2257                 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2258                     < 0) {
2259                         ct_event_free(ev);
2260                         continue;
2261                 }
2262
2263                 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2264                         log_framework(LOG_WARNING, "Could not get status for "
2265                             "contract %ld: %s\n", ctid, strerror(err));
2266
2267                         startd_close(sfd);
2268                         ct_event_free(ev);
2269                         continue;
2270                 }
2271
2272                 cookie = ct_status_get_cookie(status);
2273
2274                 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2275                     "cookie %lld\n", type, ctid, cookie);
2276
2277                 ct_status_free(status);
2278
2279                 startd_close(sfd);
2280
2281                 /*
2282                  * svc.configd(1M) restart handling performed by the
2283                  * fork_configd_thread.  We don't acknowledge, as that thread
2284                  * will do so.
2285                  */
2286                 if (cookie == CONFIGD_COOKIE) {
2287                         ct_event_free(ev);
2288                         continue;
2289                 }
2290
2291                 inst = NULL;
2292                 if (storing_contract != 0 &&
2293                     (inst = contract_to_inst(ctid)) == NULL) {
2294                         /*
2295                          * This can happen for two reasons:
2296                          * - method_run() has not yet stored the
2297                          *    the contract into the internal hash table.
2298                          * - we receive an EMPTY event for an abandoned
2299                          *    contract.
2300                          * If there is any contract in the process of
2301                          * being stored into the hash table then re-read
2302                          * the event later.
2303                          */
2304                         log_framework(LOG_DEBUG,
2305                             "Reset event %d for unknown "
2306                             "contract id %ld\n", type, ctid);
2307
2308                         /* don't go too fast */
2309                         (void) poll(NULL, 0, 100);
2310
2311                         (void) ct_event_reset(fd);
2312                         ct_event_free(ev);
2313                         continue;
2314                 }
2315
2316                 /*
2317                  * Do not call contract_to_inst() again if first
2318                  * call succeeded.
2319                  */
2320                 if (inst == NULL)
2321                         inst = contract_to_inst(ctid);
2322                 if (inst == NULL) {
2323                         /*
2324                          * This can happen if we receive an EMPTY
2325                          * event for an abandoned contract.
2326                          */
2327                         log_framework(LOG_DEBUG,
2328                             "Received event %d for unknown contract id "
2329                             "%ld\n", type, ctid);
2330                 } else {
2331                         log_framework(LOG_DEBUG,
2332                             "Received event %d for contract id "
2333                             "%ld (%s)\n", type, ctid,
2334                             inst->ri_i.i_fmri);
2335
2336                         contract_action(local_handle, inst, ctid, type);
2337
2338                         MUTEX_UNLOCK(&inst->ri_lock);
2339                 }
2340
2341                 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2342                     O_WRONLY);
2343                 if (efd != -1) {
2344                         (void) ct_ctl_ack(efd, evid);
2345                         startd_close(efd);
2346                 }
2347
2348                 ct_event_free(ev);
2349
2350         }
2351
2352         /*NOTREACHED*/
2353         return (NULL);
2354 }
2355
2356 /*
2357  * Timeout queue, processed by restarter_timeouts_event_thread().
2358  */
2359 timeout_queue_t *timeouts;
2360 static uu_list_pool_t *timeout_pool;
2361
2362 typedef struct timeout_update {
2363         pthread_mutex_t         tu_lock;
2364         pthread_cond_t          tu_cv;
2365         int                     tu_wakeup;
2366 } timeout_update_t;
2367
2368 timeout_update_t *tu;
2369
2370 static const char *timeout_ovr_svcs[] = {
2371         "svc:/system/manifest-import:default",
2372         "svc:/network/initial:default",
2373         "svc:/network/service:default",
2374         "svc:/system/rmtmpfiles:default",
2375         "svc:/network/loopback:default",
2376         "svc:/network/physical:default",
2377         "svc:/system/device/local:default",
2378         "svc:/system/filesystem/usr:default",
2379         "svc:/system/filesystem/minimal:default",
2380         "svc:/system/filesystem/local:default",
2381         NULL
2382 };
2383
2384 int
2385 is_timeout_ovr(restarter_inst_t *inst)
2386 {
2387         int i;
2388
2389         for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2390                 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2391                         log_instance(inst, B_TRUE, "Timeout override by "
2392                             "svc.startd.  Using infinite timeout.");
2393                         return (1);
2394                 }
2395         }
2396
2397         return (0);
2398 }
2399
2400 /*ARGSUSED*/
2401 static int
2402 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2403 {
2404         hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2405         hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2406
2407         if (t1 > t2)
2408                 return (1);
2409         else if (t1 < t2)
2410                 return (-1);
2411         return (0);
2412 }
2413
2414 void
2415 timeout_init()
2416 {
2417         timeouts = startd_zalloc(sizeof (timeout_queue_t));
2418
2419         (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2420
2421         timeout_pool = startd_list_pool_create("timeouts",
2422             sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2423             timeout_compare, UU_LIST_POOL_DEBUG);
2424         assert(timeout_pool != NULL);
2425
2426         timeouts->tq_list = startd_list_create(timeout_pool,
2427             timeouts, UU_LIST_SORTED);
2428         assert(timeouts->tq_list != NULL);
2429
2430         tu = startd_zalloc(sizeof (timeout_update_t));
2431         (void) pthread_cond_init(&tu->tu_cv, NULL);
2432         (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2433 }
2434
2435 void
2436 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2437 {
2438         hrtime_t now, timeout;
2439         timeout_entry_t *entry;
2440         uu_list_index_t idx;
2441
2442         assert(MUTEX_HELD(&inst->ri_lock));
2443
2444         now = gethrtime();
2445
2446         /*
2447          * If we overflow LLONG_MAX, we're never timing out anyways, so
2448          * just return.
2449          */
2450         if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2451                 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2452                     "treating as infinite.");
2453                 return;
2454         }
2455
2456         /* hrtime is in nanoseconds. Convert timeout_sec. */
2457         timeout = now + (timeout_sec * 1000000000LL);
2458
2459         entry = startd_alloc(sizeof (timeout_entry_t));
2460         entry->te_timeout = timeout;
2461         entry->te_ctid = cid;
2462         entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2463         entry->te_logstem = safe_strdup(inst->ri_logstem);
2464         entry->te_fired = 0;
2465         /* Insert the calculated timeout time onto the queue. */
2466         MUTEX_LOCK(&timeouts->tq_lock);
2467         (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2468         uu_list_node_init(entry, &entry->te_link, timeout_pool);
2469         uu_list_insert(timeouts->tq_list, entry, idx);
2470         MUTEX_UNLOCK(&timeouts->tq_lock);
2471
2472         assert(inst->ri_timeout == NULL);
2473         inst->ri_timeout = entry;
2474
2475         MUTEX_LOCK(&tu->tu_lock);
2476         tu->tu_wakeup = 1;
2477         (void) pthread_cond_broadcast(&tu->tu_cv);
2478         MUTEX_UNLOCK(&tu->tu_lock);
2479 }
2480
2481
2482 void
2483 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2484 {
2485         assert(MUTEX_HELD(&inst->ri_lock));
2486
2487         if (inst->ri_timeout == NULL)
2488                 return;
2489
2490         assert(inst->ri_timeout->te_ctid == cid);
2491
2492         MUTEX_LOCK(&timeouts->tq_lock);
2493         uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2494         MUTEX_UNLOCK(&timeouts->tq_lock);
2495
2496         free(inst->ri_timeout->te_fmri);
2497         free(inst->ri_timeout->te_logstem);
2498         startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2499         inst->ri_timeout = NULL;
2500 }
2501
2502 static int
2503 timeout_now()
2504 {
2505         timeout_entry_t *e;
2506         hrtime_t now;
2507         int ret;
2508
2509         now = gethrtime();
2510
2511         /*
2512          * Walk through the (sorted) timeouts list.  While the timeout
2513          * at the head of the list is <= the current time, kill the
2514          * method.
2515          */
2516         MUTEX_LOCK(&timeouts->tq_lock);
2517
2518         for (e = uu_list_first(timeouts->tq_list);
2519             e != NULL && e->te_timeout <= now;
2520             e = uu_list_next(timeouts->tq_list, e)) {
2521                 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2522                     "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2523                 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2524                     "Method or service exit timed out.  Killing contract %ld.",
2525                     e->te_ctid);
2526                 e->te_fired = 1;
2527                 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2528         }
2529
2530         if (uu_list_numnodes(timeouts->tq_list) > 0)
2531                 ret = 0;
2532         else
2533                 ret = -1;
2534
2535         MUTEX_UNLOCK(&timeouts->tq_lock);
2536
2537         return (ret);
2538 }
2539
2540 /*
2541  * void *restarter_timeouts_event_thread(void *)
2542  *   Responsible for monitoring the method timeouts.  This thread must
2543  *   be started before any methods are called.
2544  */
2545 /*ARGSUSED*/
2546 static void *
2547 restarter_timeouts_event_thread(void *unused)
2548 {
2549         /*
2550          * Timeouts are entered on a priority queue, which is processed by
2551          * this thread.  As timeouts are specified in seconds, we'll do
2552          * the necessary processing every second, as long as the queue
2553          * is not empty.
2554          */
2555
2556         (void) pthread_setname_np(pthread_self(), "restarter_timeouts_event");
2557
2558         /*CONSTCOND*/
2559         while (1) {
2560                 /*
2561                  * As long as the timeout list isn't empty, process it
2562                  * every second.
2563                  */
2564                 if (timeout_now() == 0) {
2565                         (void) sleep(1);
2566                         continue;
2567                 }
2568
2569                 /* The list is empty, wait until we have more timeouts. */
2570                 MUTEX_LOCK(&tu->tu_lock);
2571
2572                 while (tu->tu_wakeup == 0)
2573                         (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2574
2575                 tu->tu_wakeup = 0;
2576                 MUTEX_UNLOCK(&tu->tu_lock);
2577         }
2578
2579         return (NULL);
2580 }
2581
2582 void
2583 restarter_start()
2584 {
2585         (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2586         (void) startd_thread_create(restarter_event_thread, NULL);
2587         (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2588         (void) startd_thread_create(wait_thread, NULL);
2589 }
2590
2591
2592 void
2593 restarter_init()
2594 {
2595         restarter_instance_pool = startd_list_pool_create("restarter_instances",
2596             sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2597             ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2598         (void) memset(&instance_list, 0, sizeof (instance_list));
2599
2600         (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2601         instance_list.ril_instance_list = startd_list_create(
2602             restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2603
2604         restarter_queue_pool = startd_list_pool_create(
2605             "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2606             offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2607             UU_LIST_POOL_DEBUG);
2608
2609         contract_list_pool = startd_list_pool_create(
2610             "contract_list", sizeof (contract_entry_t),
2611             offsetof(contract_entry_t,  ce_link), NULL,
2612             UU_LIST_POOL_DEBUG);
2613         contract_hash_init();
2614
2615         log_framework(LOG_DEBUG, "Initialized restarter\n");
2616 }