763 FMD msg URLs should refer to something visible
[illumos-gate.git] / usr / src / cmd / svc / configd / backend.c
blobb7ed400cfd3975799e6087469816f1fcdef23fad
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
29 * be able to statvfs(2) possibly large systems. This define gives us
30 * access to the transitional interfaces. See lfcompile64(5) for how
31 * _LARGEFILE64_SOURCE works.
33 #define _LARGEFILE64_SOURCE
35 #include <assert.h>
36 #include <atomic.h>
37 #include <door.h>
38 #include <dirent.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <limits.h>
42 #include <pthread.h>
43 #include <stdarg.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <sys/stat.h>
49 #include <sys/statvfs.h>
50 #include <time.h>
51 #include <unistd.h>
52 #include <zone.h>
53 #include <libscf_priv.h>
55 #include "configd.h"
56 #include "repcache_protocol.h"
58 #include <sqlite.h>
59 #include <sqlite-misc.h>
62 * This file has two purposes:
64 * 1. It contains the database schema, and the code for setting up our backend
65 * databases, including installing said schema.
67 * 2. It provides a simplified interface to the SQL database library, and
68 * synchronizes MT access to the database.
71 #define IS_VOLATILE(be) ((be)->be_ppath != NULL)
72 #define MAX_FLIGHT_RECORDER_EVENTS 100
74 typedef enum backend_switch_results {
75 BACKEND_SWITCH_FATAL = -1,
76 BACKEND_SWITCH_OK = 0,
77 BACKEND_SWITCH_RO
78 } backend_switch_results_t;
80 typedef struct backend_spent {
81 uint64_t bs_count;
82 hrtime_t bs_time;
83 hrtime_t bs_vtime;
84 } backend_spent_t;
86 typedef struct backend_totals {
87 backend_spent_t bt_lock; /* waiting for lock */
88 backend_spent_t bt_exec; /* time spent executing SQL */
89 } backend_totals_t;
92 * There are times when svcadm asks configd to move the BACKEND_TYPE_NORMAL
93 * repository to volatile storage. See backend_switch(). When the
94 * repository is on volatile storage, we save the location of the permanent
95 * repository in be_ppath. We use the saved path when the time comes to
96 * move the repository back. When the repository is on permanent storage,
97 * be_ppath is set to NULL. Also see the definition of IS_VOLATILE() above
98 * for testing if the repository is on volatile storage.
100 typedef struct sqlite_backend {
101 pthread_mutex_t be_lock;
102 pthread_t be_thread; /* thread holding lock */
103 struct sqlite *be_db;
104 const char *be_path; /* path to db */
105 const char *be_ppath; /* saved path to persistent db when */
106 /* backend is volatile */
107 const char *be_checkpoint; /* path to repository checkpoint */
108 int be_readonly; /* readonly at start, and still is */
109 int be_writing; /* held for writing */
110 backend_type_t be_type; /* type of db */
111 hrtime_t be_lastcheck; /* time of last read-only check */
112 backend_totals_t be_totals[2]; /* one for reading, one for writing */
113 } sqlite_backend_t;
115 struct backend_tx {
116 sqlite_backend_t *bt_be;
117 int bt_readonly;
118 int bt_type;
119 int bt_full; /* SQLITE_FULL during tx */
122 #define UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
123 backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
124 __bsp->bs_count++; \
125 __bsp->bs_time += (gethrtime() - ts); \
126 __bsp->bs_vtime += (gethrvtime() - vts); \
129 #define UPDATE_TOTALS(sb, field, ts, vts) \
130 UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
132 struct backend_query {
133 char *bq_buf;
134 size_t bq_size;
137 struct backend_tbl_info {
138 const char *bti_name;
139 const char *bti_cols;
142 struct backend_idx_info {
143 const char *bxi_tbl;
144 const char *bxi_idx;
145 const char *bxi_cols;
148 /* Definitions for the flight recorder: */
150 typedef enum be_flight_type {
151 BE_FLIGHT_EV_NOEVENT = 0, /* No event yet recorded. */
152 BE_FLIGHT_EV_BACKUP, /* Information about repo. backup */
153 BE_FLIGHT_EV_BACKUP_ENTER, /* Enter */
154 /* backend_create_backup_locked() */
155 BE_FLIGHT_EV_CHECKPOINT, /* Request to checkpoint repository */
156 /* for boot time backup */
157 BE_FLIGHT_EV_CHECKPOINT_EXISTS, /* Existing checkpoint detected on */
158 /* restart */
159 BE_FLIGHT_EV_LINGERING_FAST, /* Use lingering fast repository */
160 BE_FLIGHT_EV_NO_BACKUP, /* Requested backup not made */
161 BE_FLIGHT_EV_REPO_CREATE, /* Main repository created */
162 BE_FLIGHT_EV_RESTART, /* This is a restart of configd */
163 BE_FLIGHT_EV_SWITCH, /* Switch repositories */
164 BE_FLIGHT_EV_TRANS_RW /* Root transitioned to read/write */
165 } be_flight_type_t;
167 typedef enum be_flight_status {
168 BE_FLIGHT_ST_INFO = 0, /* No status. Event is informative */
169 BE_FLIGHT_ST_BOOT_BACKUP, /* Boot time backup */
170 BE_FLIGHT_ST_CHECKPOINT_BACKUP, /* Backup from checkpoint */
171 BE_FLIGHT_ST_CLIENT, /* Request form client as opposed to */
172 /* internal call */
173 BE_FLIGHT_ST_DUPLICATE, /* Backup duplicates existing one */
174 BE_FLIGHT_ST_FAIL, /* Operation failed. */
175 BE_FLIGHT_ST_FAST, /* Fast repository (tmpfs) */
176 BE_FLIGHT_ST_MI_BACKUP, /* Manifest-import backup */
177 BE_FLIGHT_ST_NO_SWITCH, /* Don't switch repositories */
178 BE_FLIGHT_ST_OTHER_BACKUP, /* Other type of backup */
179 BE_FLIGHT_ST_PERMANENT, /* Repository on permanet storage */
180 BE_FLIGHT_ST_REPO_BACKUP, /* Backup from repository */
181 BE_FLIGHT_ST_RO, /* Main repository is read-only */
182 BE_FLIGHT_ST_RW, /* Main repository is read/write */
183 BE_FLIGHT_ST_SUCCESS, /* Operation was successful */
184 BE_FLIGHT_ST_SWITCH /* Switch repository */
185 } be_flight_status_t;
187 typedef struct be_flight_event {
188 be_flight_type_t bfe_type; /* Type of event. */
189 be_flight_status_t bfe_status; /* Result of the event. */
190 time_t bfe_time; /* Time of the event. */
191 uint_t bfe_sequence; /* Sequence number. */
192 } be_flight_event_t;
194 static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
195 static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
196 pthread_t backend_panic_thread = 0;
198 int backend_do_trace = 0; /* invoke tracing callback */
199 int backend_print_trace = 0; /* tracing callback prints SQL */
200 int backend_panic_abort = 0; /* abort when panicking */
202 /* Data for the flight_recorder. */
204 static pthread_mutex_t backend_flight_recorder_lock = PTHREAD_MUTEX_INITIALIZER;
205 static be_flight_event_t flight_recorder[MAX_FLIGHT_RECORDER_EVENTS];
206 static uint_t flight_recorder_next = 0;
207 static uint_t flight_recorder_missed = 0;
208 static uint_t flight_recorder_sequence = 0;
210 /* interval between read-only checks while starting up */
211 #define BACKEND_READONLY_CHECK_INTERVAL (2 * (hrtime_t)NANOSEC)
214 * Any incompatible change to the below schema should bump the version number.
215 * The schema has been changed to support value ordering, but this change
216 * is backwards-compatible - i.e. a previous svc.configd can use a
217 * repository database with the new schema perfectly well. As a result,
218 * the schema version has not been updated, allowing downgrade of systems
219 * without losing repository data.
221 #define BACKEND_SCHEMA_VERSION 5
223 static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
225 * service_tbl holds all services. svc_id is the identifier of the
226 * service.
229 "service_tbl",
230 "svc_id INTEGER PRIMARY KEY,"
231 "svc_name CHAR(256) NOT NULL"
235 * instance_tbl holds all of the instances. The parent service id
236 * is instance_svc.
239 "instance_tbl",
240 "instance_id INTEGER PRIMARY KEY,"
241 "instance_name CHAR(256) NOT NULL,"
242 "instance_svc INTEGER NOT NULL"
246 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
249 "snapshot_lnk_tbl",
250 "lnk_id INTEGER PRIMARY KEY,"
251 "lnk_inst_id INTEGER NOT NULL,"
252 "lnk_snap_name CHAR(256) NOT NULL,"
253 "lnk_snap_id INTEGER NOT NULL"
257 * snaplevel_tbl maps a snapshot id to a set of named, ordered
258 * snaplevels.
261 "snaplevel_tbl",
262 "snap_id INTEGER NOT NULL,"
263 "snap_level_num INTEGER NOT NULL,"
264 "snap_level_id INTEGER NOT NULL,"
265 "snap_level_service_id INTEGER NOT NULL,"
266 "snap_level_service CHAR(256) NOT NULL,"
267 "snap_level_instance_id INTEGER NULL,"
268 "snap_level_instance CHAR(256) NULL"
272 * snaplevel_lnk_tbl links snaplevels to property groups.
273 * snaplvl_pg_* is identical to the original property group,
274 * and snaplvl_gen_id overrides the generation number.
275 * The service/instance ids are as in the snaplevel.
278 "snaplevel_lnk_tbl",
279 "snaplvl_level_id INTEGER NOT NULL,"
280 "snaplvl_pg_id INTEGER NOT NULL,"
281 "snaplvl_pg_name CHAR(256) NOT NULL,"
282 "snaplvl_pg_type CHAR(256) NOT NULL,"
283 "snaplvl_pg_flags INTEGER NOT NULL,"
284 "snaplvl_gen_id INTEGER NOT NULL"
287 { NULL, NULL }
290 static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
291 { "service_tbl", "name", "svc_name" },
292 { "instance_tbl", "name", "instance_svc, instance_name" },
293 { "snapshot_lnk_tbl", "name", "lnk_inst_id, lnk_snap_name" },
294 { "snapshot_lnk_tbl", "snapid", "lnk_snap_id" },
295 { "snaplevel_tbl", "id", "snap_id" },
296 { "snaplevel_lnk_tbl", "id", "snaplvl_pg_id" },
297 { "snaplevel_lnk_tbl", "level", "snaplvl_level_id" },
298 { NULL, NULL, NULL }
301 static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
302 { NULL, NULL }
305 static struct backend_idx_info idxs_np[] = { /* BACKEND_TYPE_NONPERSIST */
306 { NULL, NULL, NULL }
309 static struct backend_tbl_info tbls_common[] = { /* all backend types */
311 * pg_tbl defines property groups. They are associated with a single
312 * service or instance. The pg_gen_id links them with the latest
313 * "edited" version of its properties.
316 "pg_tbl",
317 "pg_id INTEGER PRIMARY KEY,"
318 "pg_parent_id INTEGER NOT NULL,"
319 "pg_name CHAR(256) NOT NULL,"
320 "pg_type CHAR(256) NOT NULL,"
321 "pg_flags INTEGER NOT NULL,"
322 "pg_gen_id INTEGER NOT NULL"
326 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
327 * (prop_name, prop_type, val_id) trios.
330 "prop_lnk_tbl",
331 "lnk_prop_id INTEGER PRIMARY KEY,"
332 "lnk_pg_id INTEGER NOT NULL,"
333 "lnk_gen_id INTEGER NOT NULL,"
334 "lnk_prop_name CHAR(256) NOT NULL,"
335 "lnk_prop_type CHAR(2) NOT NULL,"
336 "lnk_val_id INTEGER"
340 * value_tbl maps a value_id to a set of values. For any given
341 * value_id, value_type is constant. The table definition here
342 * is repeated in backend_check_upgrade(), and must be kept in-sync.
345 "value_tbl",
346 "value_id INTEGER NOT NULL,"
347 "value_type CHAR(1) NOT NULL,"
348 "value_value VARCHAR NOT NULL,"
349 "value_order INTEGER DEFAULT 0"
353 * id_tbl has one row per id space
356 "id_tbl",
357 "id_name STRING NOT NULL,"
358 "id_next INTEGER NOT NULL"
362 * schema_version has a single row, which contains
363 * BACKEND_SCHEMA_VERSION at the time of creation.
366 "schema_version",
367 "schema_version INTEGER"
369 { NULL, NULL }
373 * The indexing of value_tbl is repeated in backend_check_upgrade() and
374 * must be kept in sync with the indexing specification here.
376 static struct backend_idx_info idxs_common[] = { /* all backend types */
377 { "pg_tbl", "parent", "pg_parent_id" },
378 { "pg_tbl", "name", "pg_parent_id, pg_name" },
379 { "pg_tbl", "type", "pg_parent_id, pg_type" },
380 { "prop_lnk_tbl", "base", "lnk_pg_id, lnk_gen_id" },
381 { "prop_lnk_tbl", "val", "lnk_val_id" },
382 { "value_tbl", "id", "value_id" },
383 { "id_tbl", "id", "id_name" },
384 { NULL, NULL, NULL }
387 struct run_single_int_info {
388 uint32_t *rs_out;
389 int rs_result;
392 static rep_protocol_responseid_t backend_copy_repository(const char *,
393 const char *, int);
394 static rep_protocol_responseid_t backend_do_copy(const char *, int,
395 const char *, int, size_t *);
398 * The flight recorder keeps track of events that happen primarily while
399 * the system is booting. Once the system is up an running, one can take a
400 * gcore(1) of configd and examine the events with mdb. Since we're most
401 * interested in early boot events, we stop recording events when the
402 * recorder is full.
404 static void
405 flight_recorder_event(be_flight_type_t type, be_flight_status_t res)
407 be_flight_event_t *data;
408 uint_t item;
409 uint_t sequence;
411 if (pthread_mutex_lock(&backend_flight_recorder_lock) != 0) {
412 atomic_inc_uint(&flight_recorder_missed);
413 return;
415 if (flight_recorder_next >= MAX_FLIGHT_RECORDER_EVENTS) {
416 /* Hit end of the array. No more event recording. */
417 item = flight_recorder_next;
418 } else {
419 item = flight_recorder_next++;
420 sequence = flight_recorder_sequence++;
422 (void) pthread_mutex_unlock(&backend_flight_recorder_lock);
424 if (item >= MAX_FLIGHT_RECORDER_EVENTS) {
425 /* Array is filled. Stop recording events */
426 atomic_inc_uint(&flight_recorder_missed);
427 return;
429 data = &flight_recorder[item];
430 (void) memset(data, 0, sizeof (*data));
431 data->bfe_type = type;
432 data->bfe_status = res;
433 data->bfe_sequence = sequence;
434 data->bfe_time = time(NULL);
437 /*ARGSUSED*/
438 static int
439 run_single_int_callback(void *arg, int columns, char **vals, char **names)
441 struct run_single_int_info *info = arg;
442 uint32_t val;
444 char *endptr = vals[0];
446 assert(info->rs_result != REP_PROTOCOL_SUCCESS);
447 assert(columns == 1);
449 if (vals[0] == NULL)
450 return (BACKEND_CALLBACK_CONTINUE);
452 errno = 0;
453 val = strtoul(vals[0], &endptr, 10);
454 if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
455 backend_panic("malformed integer \"%20s\"", vals[0]);
457 *info->rs_out = val;
458 info->rs_result = REP_PROTOCOL_SUCCESS;
459 return (BACKEND_CALLBACK_CONTINUE);
462 /*ARGSUSED*/
464 backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
466 return (BACKEND_CALLBACK_ABORT);
470 * check to see if we can successfully start a transaction; if not, the
471 * filesystem is mounted read-only.
473 static int
474 backend_is_readonly(struct sqlite *db, const char *path)
476 int r;
477 statvfs64_t stat;
479 if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
480 return (SQLITE_READONLY);
482 r = sqlite_exec(db,
483 "BEGIN TRANSACTION; "
484 "UPDATE schema_version SET schema_version = schema_version; ",
485 NULL, NULL, NULL);
486 (void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
487 return (r);
490 static void
491 backend_trace_sql(void *arg, const char *sql)
493 sqlite_backend_t *be = arg;
495 if (backend_print_trace) {
496 (void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
500 static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
501 static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
504 * For a native build, repositories are created from scratch, so upgrade
505 * is not an issue. This variable is implicitly protected by
506 * bes[BACKEND_TYPE_NORMAL]->be_lock.
508 #ifdef NATIVE_BUILD
509 static boolean_t be_normal_upgraded = B_TRUE;
510 #else
511 static boolean_t be_normal_upgraded = B_FALSE;
512 #endif /* NATIVE_BUILD */
515 * Has backend been upgraded? In nonpersistent case, answer is always
516 * yes.
518 boolean_t
519 backend_is_upgraded(backend_tx_t *bt)
521 if (bt->bt_type == BACKEND_TYPE_NONPERSIST)
522 return (B_TRUE);
523 return (be_normal_upgraded);
526 #define BACKEND_PANIC_TIMEOUT (50 * MILLISEC)
528 * backend_panic() -- some kind of database problem or corruption has been hit.
529 * We attempt to quiesce the other database users -- all of the backend sql
530 * entry points will call backend_panic(NULL) if a panic is in progress, as
531 * will any attempt to start a transaction.
533 * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
534 * either drop the lock or call backend_panic(). If they don't respond in
535 * time, we'll just exit anyway.
537 void
538 backend_panic(const char *format, ...)
540 int i;
541 va_list args;
542 int failed = 0;
544 (void) pthread_mutex_lock(&backend_panic_lock);
545 if (backend_panic_thread != 0) {
546 (void) pthread_mutex_unlock(&backend_panic_lock);
548 * first, drop any backend locks we're holding, then
549 * sleep forever on the panic_cv.
551 for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
552 if (bes[i] != NULL &&
553 bes[i]->be_thread == pthread_self())
554 (void) pthread_mutex_unlock(&bes[i]->be_lock);
556 (void) pthread_mutex_lock(&backend_panic_lock);
557 for (;;)
558 (void) pthread_cond_wait(&backend_panic_cv,
559 &backend_panic_lock);
561 backend_panic_thread = pthread_self();
562 (void) pthread_mutex_unlock(&backend_panic_lock);
564 for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
565 if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
566 (void) pthread_mutex_unlock(&bes[i]->be_lock);
569 va_start(args, format);
570 configd_vcritical(format, args);
571 va_end(args);
573 for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
574 timespec_t rel;
576 rel.tv_sec = 0;
577 rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
579 if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
580 if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
581 &rel) != 0)
582 failed++;
585 if (failed) {
586 configd_critical("unable to quiesce database\n");
589 if (backend_panic_abort)
590 abort();
592 exit(CONFIGD_EXIT_DATABASE_BAD);
596 * Returns
597 * _SUCCESS
598 * _DONE - callback aborted query
599 * _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
601 static int
602 backend_error(sqlite_backend_t *be, int error, char *errmsg)
604 if (error == SQLITE_OK)
605 return (REP_PROTOCOL_SUCCESS);
607 switch (error) {
608 case SQLITE_ABORT:
609 free(errmsg);
610 return (REP_PROTOCOL_DONE);
612 case SQLITE_NOMEM:
613 case SQLITE_FULL:
614 case SQLITE_TOOBIG:
615 free(errmsg);
616 return (REP_PROTOCOL_FAIL_NO_RESOURCES);
618 default:
619 backend_panic("%s: db error: %s", be->be_path, errmsg);
620 /*NOTREACHED*/
624 static void
625 backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
627 char **out = (char **)out_arg;
629 while (out_sz-- > 0)
630 free(*out++);
631 free(out_arg);
635 * builds a inverse-time-sorted array of backup files. The path is a
636 * a single buffer, and the pointers look like:
638 * /this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
639 * ^pathname ^ ^(pathname+pathlen)
640 * basename
642 * dirname will either be pathname, or ".".
644 * Returns the number of elements in the array, 0 if there are no previous
645 * backups, or -1 on error.
647 static ssize_t
648 backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
650 char b_start, b_end;
651 DIR *dir;
652 char **out = NULL;
653 char *name, *p;
654 char *dirname, *basename;
655 char *pathend;
656 struct dirent *ent;
658 size_t count = 0;
659 size_t baselen;
662 * year, month, day, hour, min, sec, plus an '_'.
664 const size_t ndigits = 4 + 5*2 + 1;
665 const size_t baroffset = 4 + 2*2;
667 size_t idx;
669 pathend = pathname + pathlen;
670 b_end = *pathend;
671 *pathend = '\0';
673 basename = strrchr(pathname, '/');
675 if (basename != NULL) {
676 assert(pathend > pathname && basename < pathend);
677 basename++;
678 dirname = pathname;
679 } else {
680 basename = pathname;
681 dirname = ".";
684 baselen = strlen(basename);
687 * munge the string temporarily for the opendir(), then restore it.
689 b_start = basename[0];
691 basename[0] = '\0';
692 dir = opendir(dirname);
693 basename[0] = b_start; /* restore path */
695 if (dir == NULL)
696 goto fail;
699 while ((ent = readdir(dir)) != NULL) {
701 * Must match:
702 * basename-YYYYMMDD_HHMMSS
703 * or we ignore it.
705 if (strncmp(ent->d_name, basename, baselen) != 0)
706 continue;
708 name = ent->d_name;
709 if (name[baselen] != '-')
710 continue;
712 p = name + baselen + 1;
714 for (idx = 0; idx < ndigits; idx++) {
715 char c = p[idx];
716 if (idx == baroffset && c != '_')
717 break;
718 if (idx != baroffset && (c < '0' || c > '9'))
719 break;
721 if (idx != ndigits || p[idx] != '\0')
722 continue;
725 * We have a match. insertion-sort it into our list.
727 name = strdup(name);
728 if (name == NULL)
729 goto fail_closedir;
730 p = strrchr(name, '-');
732 for (idx = 0; idx < count; idx++) {
733 char *tmp = out[idx];
734 char *tp = strrchr(tmp, '-');
736 int cmp = strcmp(p, tp);
737 if (cmp == 0)
738 cmp = strcmp(name, tmp);
740 if (cmp == 0) {
741 free(name);
742 name = NULL;
743 break;
744 } else if (cmp > 0) {
745 out[idx] = name;
746 name = tmp;
747 p = tp;
751 if (idx == count) {
752 char **new_out = realloc(out,
753 (count + 1) * sizeof (*out));
755 if (new_out == NULL) {
756 free(name);
757 goto fail_closedir;
760 out = new_out;
761 out[count++] = name;
762 } else {
763 assert(name == NULL);
766 (void) closedir(dir);
768 basename[baselen] = b_end;
770 *out_arg = (const char **)out;
771 return (count);
773 fail_closedir:
774 (void) closedir(dir);
775 fail:
776 basename[0] = b_start;
777 *pathend = b_end;
779 backend_backup_cleanup((const char **)out, count);
781 *out_arg = NULL;
782 return (-1);
786 * Copies the repository path into out, a buffer of out_len bytes,
787 * removes the ".db" (or whatever) extension, and, if name is non-NULL,
788 * appends "-name" to it. If name is non-NULL, it can fail with:
790 * _TRUNCATED will not fit in buffer.
791 * _BAD_REQUEST name is not a valid identifier
793 static rep_protocol_responseid_t
794 backend_backup_base(sqlite_backend_t *be, const char *name,
795 char *out, size_t out_len)
797 char *p, *q;
798 size_t len;
801 * for paths of the form /path/to/foo.db, we truncate at the final
802 * '.'.
804 (void) strlcpy(out, IS_VOLATILE(be) ? be->be_ppath : be->be_path,
805 out_len);
807 p = strrchr(out, '/');
808 q = strrchr(out, '.');
810 if (p != NULL && q != NULL && q > p)
811 *q = 0;
813 if (name != NULL) {
814 len = strlen(out);
815 assert(len < out_len);
817 out += len;
818 out_len -= len;
820 len = strlen(name);
823 * verify that the name tag is entirely alphabetic,
824 * non-empty, and not too long.
826 if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
827 uu_check_name(name, UU_NAME_DOMAIN) < 0)
828 return (REP_PROTOCOL_FAIL_BAD_REQUEST);
830 if (snprintf(out, out_len, "-%s", name) >= out_len)
831 return (REP_PROTOCOL_FAIL_TRUNCATED);
834 return (REP_PROTOCOL_SUCCESS);
838 * Make a checkpoint of the repository, so that we can use it for a backup
839 * when the root file system becomes read/write. We'll first copy the
840 * repository into a temporary file and then rename it to
841 * REPOSITORY_CHECKPOINT. This is protection against configd crashing in
842 * the middle of the copy and leaving a partial copy at
843 * REPOSITORY_CHECKPOINT. Renames are atomic.
845 static rep_protocol_responseid_t
846 backend_checkpoint_repository(sqlite_backend_t *be)
848 rep_protocol_responseid_t r;
850 assert(be->be_readonly); /* Only need a checkpoint if / is ro */
851 assert(be->be_type == BACKEND_TYPE_NORMAL);
852 assert(be->be_checkpoint == NULL); /* Only 1 checkpoint */
854 r = backend_copy_repository(be->be_path, REPOSITORY_CHECKPOINT, 0);
855 if (r == REP_PROTOCOL_SUCCESS)
856 be->be_checkpoint = REPOSITORY_CHECKPOINT;
858 flight_recorder_event(BE_FLIGHT_EV_CHECKPOINT,
859 r == REP_PROTOCOL_SUCCESS ? BE_FLIGHT_ST_SUCCESS :
860 BE_FLIGHT_ST_FAIL);
862 return (r);
866 * See if a backup is needed. We do a backup unless both files are
867 * byte-for-byte identical.
869 static int
870 backend_check_backup_needed(const char *rep_name, const char *backup_name)
872 int repfd = open(rep_name, O_RDONLY);
873 int fd = open(backup_name, O_RDONLY);
874 struct stat s_rep, s_backup;
875 int c1, c2;
877 FILE *f_rep = NULL;
878 FILE *f_backup = NULL;
880 if (repfd < 0 || fd < 0)
881 goto fail;
883 if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
884 goto fail;
887 * if they are the same file, we need to do a backup to break the
888 * hard link or symlink involved.
890 if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
891 goto fail;
893 if (s_rep.st_size != s_backup.st_size)
894 goto fail;
896 if ((f_rep = fdopen(repfd, "r")) == NULL ||
897 (f_backup = fdopen(fd, "r")) == NULL)
898 goto fail;
900 do {
901 c1 = getc(f_rep);
902 c2 = getc(f_backup);
903 if (c1 != c2)
904 goto fail;
905 } while (c1 != EOF);
907 if (!ferror(f_rep) && !ferror(f_backup)) {
908 (void) fclose(f_rep);
909 (void) fclose(f_backup);
910 (void) close(repfd);
911 (void) close(fd);
912 return (0);
915 fail:
916 if (f_rep != NULL)
917 (void) fclose(f_rep);
918 if (f_backup != NULL)
919 (void) fclose(f_backup);
920 if (repfd >= 0)
921 (void) close(repfd);
922 if (fd >= 0)
923 (void) close(fd);
924 return (1);
928 * This interface is called to perform the actual copy
930 * Return:
931 * _FAIL_UNKNOWN read/write fails
932 * _FAIL_NO_RESOURCES out of memory
933 * _SUCCESS copy succeeds
935 static rep_protocol_responseid_t
936 backend_do_copy(const char *src, int srcfd, const char *dst,
937 int dstfd, size_t *sz)
939 char *buf;
940 off_t nrd, nwr, n, r_off = 0, w_off = 0;
942 if ((buf = malloc(8192)) == NULL)
943 return (REP_PROTOCOL_FAIL_NO_RESOURCES);
945 while ((nrd = read(srcfd, buf, 8192)) != 0) {
946 if (nrd < 0) {
947 if (errno == EINTR)
948 continue;
950 configd_critical(
951 "Backend copy failed: fails to read from %s "
952 "at offset %d: %s\n", src, r_off, strerror(errno));
953 free(buf);
954 return (REP_PROTOCOL_FAIL_UNKNOWN);
957 r_off += nrd;
959 nwr = 0;
960 do {
961 if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
962 if (errno == EINTR)
963 continue;
965 configd_critical(
966 "Backend copy failed: fails to write to %s "
967 "at offset %d: %s\n", dst, w_off,
968 strerror(errno));
969 free(buf);
970 return (REP_PROTOCOL_FAIL_UNKNOWN);
973 nwr += n;
974 w_off += n;
976 } while (nwr < nrd);
979 if (sz)
980 *sz = w_off;
982 free(buf);
983 return (REP_PROTOCOL_SUCCESS);
987 * Can return:
988 * _BAD_REQUEST name is not valid
989 * _TRUNCATED name is too long for current repository path
990 * _UNKNOWN failed for unknown reason (details written to
991 * console)
992 * _BACKEND_READONLY backend is not writable
993 * _NO_RESOURCES out of memory
994 * _SUCCESS Backup completed successfully.
996 static rep_protocol_responseid_t
997 backend_create_backup_locked(sqlite_backend_t *be, const char *name)
999 const char **old_list;
1000 ssize_t old_sz;
1001 ssize_t old_max = max_repository_backups;
1002 ssize_t cur;
1003 char *finalname;
1004 char *finalpath;
1005 char *tmppath;
1006 int infd, outfd;
1007 size_t len;
1008 time_t now;
1009 struct tm now_tm;
1010 be_flight_status_t backup_type;
1011 rep_protocol_responseid_t result;
1012 const char *src;
1013 int use_checkpoint;
1015 if (strcmp(name, REPOSITORY_BOOT_BACKUP) == 0) {
1016 backup_type = BE_FLIGHT_ST_BOOT_BACKUP;
1017 } else if (strcmp(name, "manifest_import") == 0) {
1018 backup_type = BE_FLIGHT_ST_MI_BACKUP;
1019 } else {
1020 backup_type = BE_FLIGHT_ST_OTHER_BACKUP;
1022 flight_recorder_event(BE_FLIGHT_EV_BACKUP_ENTER, backup_type);
1024 if ((finalpath = malloc(PATH_MAX)) == NULL)
1025 return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1027 if ((tmppath = malloc(PATH_MAX)) == NULL) {
1028 free(finalpath);
1029 return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1032 if (be->be_readonly) {
1033 flight_recorder_event(BE_FLIGHT_EV_NO_BACKUP, BE_FLIGHT_ST_RO);
1034 result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
1035 goto out;
1038 result = backend_backup_base(be, name, finalpath, PATH_MAX);
1039 if (result != REP_PROTOCOL_SUCCESS)
1040 goto out;
1043 * If this is a boot backup and if we made a checkpoint before the
1044 * root file system became read/write, then we should use the
1045 * checkpoint as the source. Otherwise, we'll use the actual
1046 * repository as the source.
1048 if (be->be_checkpoint && name &&
1049 strcmp(REPOSITORY_BOOT_BACKUP, name) == 0) {
1050 backup_type = BE_FLIGHT_ST_CHECKPOINT_BACKUP;
1051 use_checkpoint = 1;
1052 src = be->be_checkpoint;
1053 } else {
1054 backup_type = BE_FLIGHT_ST_REPO_BACKUP;
1055 use_checkpoint = 0;
1056 src = be->be_path;
1058 flight_recorder_event(BE_FLIGHT_EV_BACKUP, backup_type);
1059 if (!backend_check_backup_needed(src, finalpath)) {
1061 * No changes, so there is no need for a backup.
1063 flight_recorder_event(BE_FLIGHT_EV_NO_BACKUP,
1064 BE_FLIGHT_ST_DUPLICATE);
1065 result = REP_PROTOCOL_SUCCESS;
1066 goto out;
1070 * remember the original length, and the basename location
1072 len = strlen(finalpath);
1073 finalname = strrchr(finalpath, '/');
1074 if (finalname != NULL)
1075 finalname++;
1076 else
1077 finalname = finalpath;
1079 (void) strlcpy(tmppath, finalpath, PATH_MAX);
1080 if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
1081 result = REP_PROTOCOL_FAIL_TRUNCATED;
1082 goto out;
1085 now = time(NULL);
1086 if (localtime_r(&now, &now_tm) == NULL) {
1087 configd_critical(
1088 "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
1089 strerror(errno));
1090 result = REP_PROTOCOL_FAIL_UNKNOWN;
1091 goto out;
1094 if (strftime(finalpath + len, PATH_MAX - len,
1095 "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
1096 result = REP_PROTOCOL_FAIL_TRUNCATED;
1097 goto out;
1100 infd = open(src, O_RDONLY);
1101 if (infd < 0) {
1102 configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
1103 src, strerror(errno));
1104 result = REP_PROTOCOL_FAIL_UNKNOWN;
1105 goto out;
1108 outfd = mkstemp(tmppath);
1109 if (outfd < 0) {
1110 configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
1111 name, tmppath, strerror(errno));
1112 (void) close(infd);
1113 result = REP_PROTOCOL_FAIL_UNKNOWN;
1114 goto out;
1117 if ((result = backend_do_copy(src, infd, (const char *)tmppath,
1118 outfd, NULL)) != REP_PROTOCOL_SUCCESS)
1119 goto fail;
1122 * grab the old list before doing our re-name.
1124 if (old_max > 0)
1125 old_sz = backend_backup_get_prev(finalpath, len, &old_list);
1127 if (rename(tmppath, finalpath) < 0) {
1128 configd_critical(
1129 "\"%s\" backup failed: rename(%s, %s): %s\n",
1130 name, tmppath, finalpath, strerror(errno));
1131 result = REP_PROTOCOL_FAIL_UNKNOWN;
1132 goto fail;
1135 tmppath[len] = 0; /* strip -XXXXXX, for reference symlink */
1137 (void) unlink(tmppath);
1138 if (symlink(finalname, tmppath) < 0) {
1139 configd_critical(
1140 "\"%s\" backup completed, but updating "
1141 "\"%s\" symlink to \"%s\" failed: %s\n",
1142 name, tmppath, finalname, strerror(errno));
1145 if (old_max > 0 && old_sz > 0) {
1146 /* unlink all but the first (old_max - 1) files */
1147 for (cur = old_max - 1; cur < old_sz; cur++) {
1148 (void) strlcpy(finalname, old_list[cur],
1149 PATH_MAX - (finalname - finalpath));
1150 if (unlink(finalpath) < 0)
1151 configd_critical(
1152 "\"%s\" backup completed, but removing old "
1153 "file \"%s\" failed: %s\n",
1154 name, finalpath, strerror(errno));
1157 backend_backup_cleanup(old_list, old_sz);
1160 result = REP_PROTOCOL_SUCCESS;
1161 flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_SUCCESS);
1163 fail:
1164 (void) close(infd);
1165 (void) close(outfd);
1166 if (result != REP_PROTOCOL_SUCCESS) {
1167 flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_FAIL);
1168 (void) unlink(tmppath);
1171 out:
1172 /* Get rid of the checkpoint file now that we've used it. */
1173 if (use_checkpoint && (result == REP_PROTOCOL_SUCCESS)) {
1174 (void) unlink(be->be_checkpoint);
1175 be->be_checkpoint = NULL;
1177 free(finalpath);
1178 free(tmppath);
1180 return (result);
1184 * Check if value_tbl has been upgraded in the main database, and
1185 * if not (if the value_order column is not present), and do_upgrade is true,
1186 * upgrade value_tbl in repository to contain the additional value_order
1187 * column. The version of sqlite used means ALTER TABLE is not
1188 * available, so we cannot simply use "ALTER TABLE value_tbl ADD COLUMN".
1189 * Rather we need to create a temporary table with the additional column,
1190 * import the value_tbl, drop the original value_tbl, recreate the value_tbl
1191 * with the additional column, import the values from value_tbl_tmp,
1192 * reindex and finally drop value_tbl_tmp. During boot, we wish to check
1193 * if the repository has been upgraded before it is writable, so that
1194 * property value retrieval can use the appropriate form of the SELECT
1195 * statement that retrieves property values. As a result, we need to check
1196 * if the repository has been upgraded prior to the point when we can
1197 * actually carry out the update.
1199 void
1200 backend_check_upgrade(sqlite_backend_t *be, boolean_t do_upgrade)
1202 char *errp;
1203 int r;
1205 if (be_normal_upgraded)
1206 return;
1208 * Test if upgrade is needed. If value_order column does not exist,
1209 * we need to upgrade the schema.
1211 r = sqlite_exec(be->be_db, "SELECT value_order FROM value_tbl LIMIT 1;",
1212 NULL, NULL, NULL);
1213 if (r == SQLITE_ERROR && do_upgrade) {
1214 /* No value_order column - needs upgrade */
1215 configd_info("Upgrading SMF repository format...");
1216 r = sqlite_exec(be->be_db,
1217 "BEGIN TRANSACTION; "
1218 "CREATE TABLE value_tbl_tmp ( "
1219 "value_id INTEGER NOT NULL, "
1220 "value_type CHAR(1) NOT NULL, "
1221 "value_value VARCHAR NOT NULL, "
1222 "value_order INTEGER DEFAULT 0); "
1223 "INSERT INTO value_tbl_tmp "
1224 "(value_id, value_type, value_value) "
1225 "SELECT value_id, value_type, value_value FROM value_tbl; "
1226 "DROP TABLE value_tbl; "
1227 "CREATE TABLE value_tbl( "
1228 "value_id INTEGER NOT NULL, "
1229 "value_type CHAR(1) NOT NULL, "
1230 "value_value VARCHAR NOT NULL, "
1231 "value_order INTEGER DEFAULT 0); "
1232 "INSERT INTO value_tbl SELECT * FROM value_tbl_tmp; "
1233 "CREATE INDEX value_tbl_id ON value_tbl (value_id); "
1234 "DROP TABLE value_tbl_tmp; "
1235 "COMMIT TRANSACTION; "
1236 "VACUUM; ",
1237 NULL, NULL, &errp);
1238 if (r == SQLITE_OK) {
1239 configd_info("SMF repository upgrade is complete.");
1240 } else {
1241 backend_panic("%s: repository upgrade failed: %s",
1242 be->be_path, errp);
1243 /* NOTREACHED */
1246 if (r == SQLITE_OK)
1247 be_normal_upgraded = B_TRUE;
1248 else
1249 be_normal_upgraded = B_FALSE;
1252 static int
1253 backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
1255 const char *check_path;
1256 char *errp;
1257 struct sqlite *new;
1258 int r;
1260 assert(be->be_readonly);
1261 assert(be == bes[BACKEND_TYPE_NORMAL]);
1264 * If we don't *need* to be writable, only check every once in a
1265 * while.
1267 if (!writing) {
1268 if ((uint64_t)(t - be->be_lastcheck) <
1269 BACKEND_READONLY_CHECK_INTERVAL)
1270 return (REP_PROTOCOL_SUCCESS);
1271 be->be_lastcheck = t;
1275 * It could be that the repository has been moved to non-persistent
1276 * storage for performance reasons. In this case we need to check
1277 * the persistent path to see if it is writable. The
1278 * non-persistent path will always be writable.
1280 check_path = IS_VOLATILE(be) ? be->be_ppath : be->be_path;
1282 new = sqlite_open(check_path, 0600, &errp);
1283 if (new == NULL) {
1284 backend_panic("reopening %s: %s\n", check_path, errp);
1285 /*NOTREACHED*/
1287 r = backend_is_readonly(new, check_path);
1289 if (r != SQLITE_OK) {
1291 * The underlying storage for the permanent repository is
1292 * still read-only, so we don't want to change the state or
1293 * move the checkpointed backup if it exists. On the other
1294 * hand if the repository has been copied to volatile
1295 * storage, we'll let our caller go ahead and write to the
1296 * database.
1298 sqlite_close(new);
1299 if (writing && (IS_VOLATILE(be) == 0))
1300 return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
1301 return (REP_PROTOCOL_SUCCESS);
1305 * We can write! If the repository is not on volatile storage,
1306 * swap the db handles. Mark ourself as writable, upgrade the
1307 * repository if necessary and make a backup.
1309 be->be_readonly = 0;
1310 flight_recorder_event(BE_FLIGHT_EV_TRANS_RW, BE_FLIGHT_ST_RW);
1311 if (IS_VOLATILE(be)) {
1313 * If the repository is on volatile storage, don't switch
1314 * the handles. We'll continue to use the repository that
1315 * is on tmpfs until we're told to move it back by one of
1316 * our clients. Clients, specifically manifest_import,
1317 * move the repository to tmpfs for performance reasons,
1318 * and that is the reason to not switch it back until we're
1319 * told to do so.
1321 flight_recorder_event(BE_FLIGHT_EV_TRANS_RW,
1322 BE_FLIGHT_ST_NO_SWITCH);
1323 sqlite_close(new);
1324 } else {
1325 flight_recorder_event(BE_FLIGHT_EV_TRANS_RW,
1326 BE_FLIGHT_ST_SWITCH);
1327 sqlite_close(be->be_db);
1328 be->be_db = new;
1331 if (be->be_type == BACKEND_TYPE_NORMAL)
1332 backend_check_upgrade(be, B_TRUE);
1334 if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
1335 REP_PROTOCOL_SUCCESS) {
1336 configd_critical(
1337 "unable to create \"%s\" backup of \"%s\"\n",
1338 REPOSITORY_BOOT_BACKUP, be->be_path);
1341 return (REP_PROTOCOL_SUCCESS);
1345 * If t is not BACKEND_TYPE_NORMAL, can fail with
1346 * _BACKEND_ACCESS - backend does not exist
1348 * If writing is nonzero, can also fail with
1349 * _BACKEND_READONLY - backend is read-only
1351 static int
1352 backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
1354 sqlite_backend_t *be = NULL;
1355 hrtime_t ts, vts;
1357 *bep = NULL;
1359 assert(t == BACKEND_TYPE_NORMAL ||
1360 t == BACKEND_TYPE_NONPERSIST);
1362 be = bes[t];
1363 if (t == BACKEND_TYPE_NORMAL)
1364 assert(be != NULL); /* should always be there */
1366 if (be == NULL)
1367 return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
1369 if (backend_panic_thread != 0)
1370 backend_panic(NULL); /* don't proceed */
1372 ts = gethrtime();
1373 vts = gethrvtime();
1374 (void) pthread_mutex_lock(&be->be_lock);
1375 UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
1377 if (backend_panic_thread != 0) {
1378 (void) pthread_mutex_unlock(&be->be_lock);
1379 backend_panic(NULL); /* don't proceed */
1381 be->be_thread = pthread_self();
1383 if (be->be_readonly) {
1384 int r;
1385 assert(t == BACKEND_TYPE_NORMAL);
1387 r = backend_check_readonly(be, writing, ts);
1388 if (r != REP_PROTOCOL_SUCCESS) {
1389 be->be_thread = 0;
1390 (void) pthread_mutex_unlock(&be->be_lock);
1391 return (r);
1395 if (backend_do_trace)
1396 (void) sqlite_trace(be->be_db, backend_trace_sql, be);
1397 else
1398 (void) sqlite_trace(be->be_db, NULL, NULL);
1400 be->be_writing = writing;
1401 *bep = be;
1402 return (REP_PROTOCOL_SUCCESS);
1405 static void
1406 backend_unlock(sqlite_backend_t *be)
1408 be->be_writing = 0;
1409 be->be_thread = 0;
1410 (void) pthread_mutex_unlock(&be->be_lock);
1413 static void
1414 backend_destroy(sqlite_backend_t *be)
1416 if (be->be_db != NULL) {
1417 sqlite_close(be->be_db);
1418 be->be_db = NULL;
1420 be->be_thread = 0;
1421 (void) pthread_mutex_unlock(&be->be_lock);
1422 (void) pthread_mutex_destroy(&be->be_lock);
1425 static void
1426 backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
1428 assert(MUTEX_HELD(&be->be_lock));
1429 assert(be == &be_info[backend_id]);
1431 bes[backend_id] = be;
1432 (void) pthread_mutex_unlock(&be->be_lock);
1435 static int
1436 backend_fd_write(int fd, const char *mess)
1438 int len = strlen(mess);
1439 int written;
1441 while (len > 0) {
1442 if ((written = write(fd, mess, len)) < 0)
1443 return (-1);
1444 mess += written;
1445 len -= written;
1447 return (0);
1451 * Can return:
1452 * _BAD_REQUEST name is not valid
1453 * _TRUNCATED name is too long for current repository path
1454 * _UNKNOWN failed for unknown reason (details written to
1455 * console)
1456 * _BACKEND_READONLY backend is not writable
1457 * _NO_RESOURCES out of memory
1458 * _SUCCESS Backup completed successfully.
1460 rep_protocol_responseid_t
1461 backend_create_backup(const char *name)
1463 rep_protocol_responseid_t result;
1464 sqlite_backend_t *be;
1466 flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_CLIENT);
1467 result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
1468 assert(result == REP_PROTOCOL_SUCCESS);
1470 result = backend_create_backup_locked(be, name);
1471 backend_unlock(be);
1473 return (result);
1477 * This function makes a copy of the repository at src, placing the copy at
1478 * dst. It is used to copy a repository on permanent storage to volatile
1479 * storage or vice versa. If the source file is on volatile storage, it is
1480 * often times desirable to delete it after the copy has been made and
1481 * verified. To remove the source repository, set remove_src to 1.
1483 * Can return:
1485 * REP_PROTOCOL_SUCCESS successful copy and rename
1486 * REP_PROTOCOL_FAIL_UNKNOWN file operation error
1487 * REP_PROTOCOL_FAIL_NO_RESOURCES out of memory
1489 static rep_protocol_responseid_t
1490 backend_copy_repository(const char *src, const char *dst, int remove_src)
1492 int srcfd, dstfd;
1493 char *tmppath = malloc(PATH_MAX);
1494 rep_protocol_responseid_t res = REP_PROTOCOL_SUCCESS;
1495 struct stat s_buf;
1496 size_t cpsz, sz;
1498 if (tmppath == NULL) {
1499 res = REP_PROTOCOL_FAIL_NO_RESOURCES;
1500 goto out;
1504 * Create and open the related db files
1506 (void) strlcpy(tmppath, dst, PATH_MAX);
1507 sz = strlcat(tmppath, "-XXXXXX", PATH_MAX);
1508 assert(sz < PATH_MAX);
1509 if (sz >= PATH_MAX) {
1510 configd_critical(
1511 "Backend copy failed: strlcat %s: overflow\n", tmppath);
1512 abort();
1515 if ((dstfd = mkstemp(tmppath)) < 0) {
1516 configd_critical("Backend copy failed: mkstemp %s: %s\n",
1517 tmppath, strerror(errno));
1518 res = REP_PROTOCOL_FAIL_UNKNOWN;
1519 goto out;
1522 if ((srcfd = open(src, O_RDONLY)) < 0) {
1523 configd_critical("Backend copy failed: opening %s: %s\n",
1524 src, strerror(errno));
1525 res = REP_PROTOCOL_FAIL_UNKNOWN;
1526 goto errexit;
1530 * fstat the backend before copy for sanity check.
1532 if (fstat(srcfd, &s_buf) < 0) {
1533 configd_critical("Backend copy failed: fstat %s: %s\n",
1534 src, strerror(errno));
1535 res = REP_PROTOCOL_FAIL_UNKNOWN;
1536 goto errexit;
1539 if ((res = backend_do_copy(src, srcfd, dst, dstfd, &cpsz)) !=
1540 REP_PROTOCOL_SUCCESS)
1541 goto errexit;
1543 if (cpsz != s_buf.st_size) {
1544 configd_critical("Backend copy failed: incomplete copy\n");
1545 res = REP_PROTOCOL_FAIL_UNKNOWN;
1546 goto errexit;
1550 * Rename tmppath to dst
1552 if (rename(tmppath, dst) < 0) {
1553 configd_critical(
1554 "Backend copy failed: rename %s to %s: %s\n",
1555 tmppath, dst, strerror(errno));
1556 res = REP_PROTOCOL_FAIL_UNKNOWN;
1559 errexit:
1560 if (res != REP_PROTOCOL_SUCCESS && unlink(tmppath) < 0)
1561 configd_critical(
1562 "Backend copy failed: remove %s: %s\n",
1563 tmppath, strerror(errno));
1565 (void) close(srcfd);
1566 (void) close(dstfd);
1568 out:
1569 free(tmppath);
1570 if (remove_src) {
1571 if (unlink(src) < 0)
1572 configd_critical(
1573 "Backend copy failed: remove %s: %s\n",
1574 src, strerror(errno));
1577 return (res);
1581 * Perform sanity check on the repository.
1582 * Return 0 if check succeeds or -1 if fails.
1584 static int
1585 backend_switch_check(struct sqlite *be_db, char **errp)
1587 struct run_single_int_info info;
1588 uint32_t val = -1UL;
1589 int r;
1591 info.rs_out = &val;
1592 info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1594 r = sqlite_exec(be_db,
1595 "SELECT schema_version FROM schema_version;",
1596 run_single_int_callback, &info, errp);
1598 if (r == SQLITE_OK &&
1599 info.rs_result != REP_PROTOCOL_FAIL_NOT_FOUND &&
1600 val == BACKEND_SCHEMA_VERSION)
1601 return (0);
1602 else
1603 return (-1);
1607 * backend_switch() implements the REP_PROTOCOL_SWITCH request from
1608 * clients. First, it blocks all other clients from accessing the
1609 * repository by calling backend_lock to lock the repository. It either
1610 * copies the repository from it's permanent storage location
1611 * (REPOSITORY_DB) to its fast volatile location (FAST_REPOSITORY_DB), or
1612 * vice versa. dir determines the direction of the copy.
1614 * dir = 0 Copy from permanent location to volatile location.
1615 * dir = 1 Copy from volatile location to permanent location.
1617 * Can return:
1618 * REP_PROTOCOL_SUCCESS successful switch
1619 * REP_PROTOCOL_FAIL_BACKEND_ACCESS backen access fails
1620 * REP_PROTOCOL_FAIL_BACKEND_READONLY backend is not writable
1621 * REP_PROTOCOL_FAIL_UNKNOWN file operation error
1622 * REP_PROTOCOL_FAIL_NO_RESOURCES out of memory
1624 rep_protocol_responseid_t
1625 backend_switch(int dir)
1627 rep_protocol_responseid_t result;
1628 sqlite_backend_t *be;
1629 struct sqlite *new;
1630 char *errp;
1631 const char *dst;
1633 flight_recorder_event(BE_FLIGHT_EV_SWITCH, BE_FLIGHT_ST_CLIENT);
1636 * If switching back to the main repository, lock for writing.
1637 * Otherwise, lock for reading.
1639 result = backend_lock(BACKEND_TYPE_NORMAL, dir ? 1 : 0,
1640 &be);
1641 if (result != REP_PROTOCOL_SUCCESS)
1642 return (result);
1644 if (dir) {
1645 flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1646 BE_FLIGHT_ST_PERMANENT);
1647 dst = REPOSITORY_DB;
1648 } else {
1649 flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1650 BE_FLIGHT_ST_FAST);
1651 dst = FAST_REPOSITORY_DB;
1655 * Do the actual copy and rename
1657 if (strcmp(be->be_path, dst) == 0) {
1658 flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1659 BE_FLIGHT_ST_DUPLICATE);
1660 result = REP_PROTOCOL_SUCCESS;
1661 goto errout;
1664 result = backend_copy_repository(be->be_path, dst, dir);
1665 if (result != REP_PROTOCOL_SUCCESS) {
1666 goto errout;
1670 * Do the backend sanity check and switch
1672 new = sqlite_open(dst, 0600, &errp);
1673 if (new != NULL) {
1675 * Sanity check
1677 if (backend_switch_check(new, &errp) == 0) {
1678 free((char *)be->be_path);
1679 be->be_path = strdup(dst);
1680 if (be->be_path == NULL) {
1681 configd_critical(
1682 "Backend switch failed: strdup %s: %s\n",
1683 dst, strerror(errno));
1684 result = REP_PROTOCOL_FAIL_NO_RESOURCES;
1685 sqlite_close(new);
1686 } else {
1687 sqlite_close(be->be_db);
1688 be->be_db = new;
1689 if (dir) {
1690 /* We're back on permanent storage. */
1691 be->be_ppath = NULL;
1692 } else {
1694 * Repository is now on volatile
1695 * storage. Save the location of
1696 * the persistent repository.
1698 be->be_ppath = REPOSITORY_DB;
1701 } else {
1702 configd_critical(
1703 "Backend switch failed: integrity check %s: %s\n",
1704 dst, errp);
1705 result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1707 } else {
1708 configd_critical("Backend switch failed: sqlite_open %s: %s\n",
1709 dst, errp);
1710 result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1713 errout:
1714 if (result == REP_PROTOCOL_SUCCESS) {
1715 flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1716 BE_FLIGHT_ST_SUCCESS);
1717 } else {
1718 flight_recorder_event(BE_FLIGHT_EV_SWITCH, BE_FLIGHT_ST_FAIL);
1720 backend_unlock(be);
1721 return (result);
1725 * This routine is called to attempt the recovery of
1726 * the most recent valid repository if possible when configd
1727 * is restarted for some reasons or when system crashes
1728 * during the switch operation. The repository databases
1729 * referenced here are indicators of successful switch
1730 * operations.
1732 static backend_switch_results_t
1733 backend_switch_recovery(void)
1735 const char *fast_db = FAST_REPOSITORY_DB;
1736 char *errp = NULL;
1737 struct stat s_buf;
1738 struct sqlite *be_db;
1739 int r;
1740 backend_switch_results_t res = BACKEND_SWITCH_OK;
1743 * A good transient db containing most recent data can
1744 * exist if svc.configd crashes during the
1745 * switch operation. If that is the case, check its
1746 * integrity and use it.
1748 if (stat(fast_db, &s_buf) < 0) {
1749 return (BACKEND_SWITCH_OK);
1752 /* Determine if persistent repository is read-only */
1753 be_db = sqlite_open(REPOSITORY_DB, 0600, &errp);
1754 if (be_db == NULL) {
1755 configd_critical("Unable to open \"%s\". %s\n",
1756 REPOSITORY_DB, errp == NULL ? "" : errp);
1757 free(errp);
1758 return (BACKEND_SWITCH_FATAL);
1760 r = backend_is_readonly(be_db, REPOSITORY_DB);
1761 sqlite_close(be_db);
1762 if (r != SQLITE_OK) {
1763 if (r == SQLITE_READONLY) {
1764 return (BACKEND_SWITCH_RO);
1766 return (BACKEND_SWITCH_FATAL);
1770 * Do sanity check on the db
1772 be_db = sqlite_open(fast_db, 0600, &errp);
1774 if (be_db != NULL) {
1775 if (backend_switch_check(be_db, &errp) == 0) {
1776 if (backend_copy_repository(fast_db,
1777 REPOSITORY_DB, 1) != REP_PROTOCOL_SUCCESS) {
1778 res = BACKEND_SWITCH_FATAL;
1781 sqlite_close(be_db);
1783 free(errp);
1786 * If we get to this point, the fast_db has either been copied or
1787 * it is useless. Either way, get rid of it.
1789 (void) unlink(fast_db);
1791 return (res);
1794 /*ARGSUSED*/
1795 static int
1796 backend_integrity_callback(void *private, int narg, char **vals, char **cols)
1798 char **out = private;
1799 char *old = *out;
1800 char *new;
1801 const char *info;
1802 size_t len;
1803 int x;
1805 for (x = 0; x < narg; x++) {
1806 if ((info = vals[x]) != NULL &&
1807 strcmp(info, "ok") != 0) {
1808 len = (old == NULL)? 0 : strlen(old);
1809 len += strlen(info) + 2; /* '\n' + '\0' */
1811 new = realloc(old, len);
1812 if (new == NULL)
1813 return (BACKEND_CALLBACK_ABORT);
1814 if (old == NULL)
1815 new[0] = 0;
1816 old = *out = new;
1817 (void) strlcat(new, info, len);
1818 (void) strlcat(new, "\n", len);
1821 return (BACKEND_CALLBACK_CONTINUE);
1824 #define BACKEND_CREATE_LOCKED -2
1825 #define BACKEND_CREATE_FAIL -1
1826 #define BACKEND_CREATE_SUCCESS 0
1827 #define BACKEND_CREATE_READONLY 1
1828 #define BACKEND_CREATE_NEED_INIT 2
1829 static int
1830 backend_create(backend_type_t backend_id, const char *db_file,
1831 sqlite_backend_t **bep)
1833 char *errp;
1834 char *integrity_results = NULL;
1835 sqlite_backend_t *be;
1836 int r;
1837 uint32_t val = -1UL;
1838 struct run_single_int_info info;
1839 int fd;
1841 assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
1843 be = &be_info[backend_id];
1845 assert(be->be_db == NULL);
1847 (void) pthread_mutex_init(&be->be_lock, NULL);
1848 (void) pthread_mutex_lock(&be->be_lock);
1850 be->be_type = backend_id;
1851 be->be_path = strdup(db_file);
1852 if (be->be_path == NULL) {
1853 perror("malloc");
1854 goto fail;
1857 be->be_db = sqlite_open(be->be_path, 0600, &errp);
1859 if (be->be_db == NULL) {
1860 if (strstr(errp, "out of memory") != NULL) {
1861 configd_critical("%s: %s\n", db_file, errp);
1862 free(errp);
1864 goto fail;
1867 /* report it as an integrity failure */
1868 integrity_results = errp;
1869 errp = NULL;
1870 goto integrity_fail;
1874 * check if we are inited and of the correct schema version
1877 info.rs_out = &val;
1878 info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1880 r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
1881 run_single_int_callback, &info, &errp);
1882 if (r == SQLITE_ERROR &&
1883 strcmp("no such table: schema_version", errp) == 0) {
1884 free(errp);
1886 * Could be an empty repository, could be pre-schema_version
1887 * schema. Check for id_tbl, which has always been there.
1889 r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
1890 NULL, NULL, &errp);
1891 if (r == SQLITE_ERROR &&
1892 strcmp("no such table: id_tbl", errp) == 0) {
1893 free(errp);
1894 *bep = be;
1895 return (BACKEND_CREATE_NEED_INIT);
1898 configd_critical("%s: schema version mismatch\n", db_file);
1899 goto fail;
1901 if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1902 free(errp);
1903 *bep = NULL;
1904 backend_destroy(be);
1905 return (BACKEND_CREATE_LOCKED);
1907 if (r == SQLITE_OK) {
1908 if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
1909 val != BACKEND_SCHEMA_VERSION) {
1910 configd_critical("%s: schema version mismatch\n",
1911 db_file);
1912 goto fail;
1917 * pull in the whole database sequentially.
1919 if ((fd = open(db_file, O_RDONLY)) >= 0) {
1920 size_t sz = 64 * 1024;
1921 char *buffer = malloc(sz);
1922 if (buffer != NULL) {
1923 while (read(fd, buffer, sz) > 0)
1925 free(buffer);
1927 (void) close(fd);
1931 * run an integrity check
1933 r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
1934 backend_integrity_callback, &integrity_results, &errp);
1936 if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1937 free(errp);
1938 *bep = NULL;
1939 backend_destroy(be);
1940 return (BACKEND_CREATE_LOCKED);
1942 if (r == SQLITE_ABORT) {
1943 free(errp);
1944 errp = NULL;
1945 integrity_results = "out of memory running integrity check\n";
1946 } else if (r != SQLITE_OK && integrity_results == NULL) {
1947 integrity_results = errp;
1948 errp = NULL;
1951 integrity_fail:
1952 if (integrity_results != NULL) {
1953 const char *fname = "/etc/svc/volatile/db_errors";
1954 if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
1955 fname = NULL;
1956 } else {
1957 if (backend_fd_write(fd, "\n\n") < 0 ||
1958 backend_fd_write(fd, db_file) < 0 ||
1959 backend_fd_write(fd,
1960 ": PRAGMA integrity_check; failed. Results:\n") <
1961 0 || backend_fd_write(fd, integrity_results) < 0 ||
1962 backend_fd_write(fd, "\n\n") < 0) {
1963 fname = NULL;
1965 (void) close(fd);
1968 if (!is_main_repository ||
1969 backend_id == BACKEND_TYPE_NONPERSIST) {
1970 if (fname != NULL)
1971 configd_critical(
1972 "%s: integrity check failed. Details in "
1973 "%s\n", db_file, fname);
1974 else
1975 configd_critical(
1976 "%s: integrity check failed.\n",
1977 db_file);
1978 } else {
1979 (void) fprintf(stderr,
1980 "\n"
1981 "svc.configd: smf(5) database integrity check of:\n"
1982 "\n"
1983 " %s\n"
1984 "\n"
1985 " failed. The database might be damaged or a media error might have\n"
1986 " prevented it from being verified. Additional information useful to\n"
1987 " your service provider%s%s\n"
1988 "\n"
1989 " The system will not be able to boot until you have restored a working\n"
1990 " database. svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
1991 " purposes. The command:\n"
1992 "\n"
1993 " /lib/svc/bin/restore_repository\n"
1994 "\n"
1995 " can be run to restore a backup version of your repository. See\n"
1996 " http://illumos.org/msg/SMF-8000-MY for more information.\n"
1997 "\n",
1998 db_file,
1999 (fname == NULL)? ":\n\n" : " is in:\n\n ",
2000 (fname == NULL)? integrity_results : fname);
2002 free(errp);
2003 goto fail;
2007 * Simply do check if backend has been upgraded. We do not wish
2008 * to actually carry out upgrade here - the main repository may
2009 * not be writable at this point. Actual upgrade is carried out
2010 * via backend_check_readonly(). This check is done so that
2011 * we determine repository state - upgraded or not - and then
2012 * the appropriate SELECT statement (value-ordered or not)
2013 * can be used when retrieving property values early in boot.
2015 if (backend_id == BACKEND_TYPE_NORMAL)
2016 backend_check_upgrade(be, B_FALSE);
2018 * check if we are writable
2020 r = backend_is_readonly(be->be_db, be->be_path);
2022 if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
2023 free(errp);
2024 *bep = NULL;
2025 backend_destroy(be);
2026 return (BACKEND_CREATE_LOCKED);
2028 if (r != SQLITE_OK && r != SQLITE_FULL) {
2029 free(errp);
2030 be->be_readonly = 1;
2031 *bep = be;
2032 return (BACKEND_CREATE_READONLY);
2035 *bep = be;
2036 return (BACKEND_CREATE_SUCCESS);
2038 fail:
2039 *bep = NULL;
2040 backend_destroy(be);
2041 return (BACKEND_CREATE_FAIL);
2045 * (arg & -arg) is, through the magic of twos-complement arithmetic, the
2046 * lowest set bit in arg.
2048 static size_t
2049 round_up_to_p2(size_t arg)
2052 * Don't allow a zero result.
2054 assert(arg > 0 && ((ssize_t)arg > 0));
2056 while ((arg & (arg - 1)) != 0)
2057 arg += (arg & -arg);
2059 return (arg);
2063 * Returns
2064 * _NO_RESOURCES - out of memory
2065 * _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
2066 * _DONE - callback aborted query
2067 * _SUCCESS
2070 backend_run(backend_type_t t, backend_query_t *q,
2071 backend_run_callback_f *cb, void *data)
2073 char *errmsg = NULL;
2074 int ret;
2075 sqlite_backend_t *be;
2076 hrtime_t ts, vts;
2078 if (q == NULL || q->bq_buf == NULL)
2079 return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2081 if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
2082 return (ret);
2084 ts = gethrtime();
2085 vts = gethrvtime();
2086 ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
2087 UPDATE_TOTALS(be, bt_exec, ts, vts);
2088 ret = backend_error(be, ret, errmsg);
2089 backend_unlock(be);
2091 return (ret);
2095 * Starts a "read-only" transaction -- i.e., locks out writers as long
2096 * as it is active.
2098 * Fails with
2099 * _NO_RESOURCES - out of memory
2101 * If t is not _NORMAL, can also fail with
2102 * _BACKEND_ACCESS - backend does not exist
2104 * If writable is true, can also fail with
2105 * _BACKEND_READONLY
2107 static int
2108 backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
2110 backend_tx_t *ret;
2111 sqlite_backend_t *be;
2112 int r;
2114 *txp = NULL;
2116 ret = uu_zalloc(sizeof (*ret));
2117 if (ret == NULL)
2118 return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2120 if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
2121 uu_free(ret);
2122 return (r);
2125 ret->bt_be = be;
2126 ret->bt_readonly = !writable;
2127 ret->bt_type = t;
2128 ret->bt_full = 0;
2130 *txp = ret;
2131 return (REP_PROTOCOL_SUCCESS);
2135 backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
2137 return (backend_tx_begin_common(t, txp, 0));
2140 static void
2141 backend_tx_end(backend_tx_t *tx)
2143 sqlite_backend_t *be;
2145 be = tx->bt_be;
2147 if (tx->bt_full) {
2148 struct sqlite *new;
2151 * sqlite tends to be sticky with SQLITE_FULL, so we try
2152 * to get a fresh database handle if we got a FULL warning
2153 * along the way. If that fails, no harm done.
2155 new = sqlite_open(be->be_path, 0600, NULL);
2156 if (new != NULL) {
2157 sqlite_close(be->be_db);
2158 be->be_db = new;
2161 backend_unlock(be);
2162 tx->bt_be = NULL;
2163 uu_free(tx);
2166 void
2167 backend_tx_end_ro(backend_tx_t *tx)
2169 assert(tx->bt_readonly);
2170 backend_tx_end(tx);
2174 * Fails with
2175 * _NO_RESOURCES - out of memory
2176 * _BACKEND_ACCESS
2177 * _BACKEND_READONLY
2180 backend_tx_begin(backend_type_t t, backend_tx_t **txp)
2182 int r;
2183 char *errmsg;
2184 hrtime_t ts, vts;
2186 r = backend_tx_begin_common(t, txp, 1);
2187 if (r != REP_PROTOCOL_SUCCESS)
2188 return (r);
2190 ts = gethrtime();
2191 vts = gethrvtime();
2192 r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
2193 &errmsg);
2194 UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
2195 if (r == SQLITE_FULL)
2196 (*txp)->bt_full = 1;
2197 r = backend_error((*txp)->bt_be, r, errmsg);
2199 if (r != REP_PROTOCOL_SUCCESS) {
2200 assert(r != REP_PROTOCOL_DONE);
2201 (void) sqlite_exec((*txp)->bt_be->be_db,
2202 "ROLLBACK TRANSACTION", NULL, NULL, NULL);
2203 backend_tx_end(*txp);
2204 *txp = NULL;
2205 return (r);
2208 (*txp)->bt_readonly = 0;
2210 return (REP_PROTOCOL_SUCCESS);
2213 void
2214 backend_tx_rollback(backend_tx_t *tx)
2216 int r;
2217 char *errmsg;
2218 sqlite_backend_t *be;
2219 hrtime_t ts, vts;
2221 assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2222 be = tx->bt_be;
2224 ts = gethrtime();
2225 vts = gethrvtime();
2226 r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
2227 &errmsg);
2228 UPDATE_TOTALS(be, bt_exec, ts, vts);
2229 if (r == SQLITE_FULL)
2230 tx->bt_full = 1;
2231 (void) backend_error(be, r, errmsg);
2233 backend_tx_end(tx);
2237 * Fails with
2238 * _NO_RESOURCES - out of memory
2241 backend_tx_commit(backend_tx_t *tx)
2243 int r, r2;
2244 char *errmsg;
2245 sqlite_backend_t *be;
2246 hrtime_t ts, vts;
2248 assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2249 be = tx->bt_be;
2250 ts = gethrtime();
2251 vts = gethrvtime();
2252 r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
2253 &errmsg);
2254 UPDATE_TOTALS(be, bt_exec, ts, vts);
2255 if (r == SQLITE_FULL)
2256 tx->bt_full = 1;
2258 r = backend_error(be, r, errmsg);
2259 assert(r != REP_PROTOCOL_DONE);
2261 if (r != REP_PROTOCOL_SUCCESS) {
2262 r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
2263 &errmsg);
2264 r2 = backend_error(be, r2, errmsg);
2265 if (r2 != REP_PROTOCOL_SUCCESS)
2266 backend_panic("cannot rollback failed commit");
2268 backend_tx_end(tx);
2269 return (r);
2271 backend_tx_end(tx);
2272 return (REP_PROTOCOL_SUCCESS);
2275 static const char *
2276 id_space_to_name(enum id_space id)
2278 switch (id) {
2279 case BACKEND_ID_SERVICE_INSTANCE:
2280 return ("SI");
2281 case BACKEND_ID_PROPERTYGRP:
2282 return ("PG");
2283 case BACKEND_ID_GENERATION:
2284 return ("GEN");
2285 case BACKEND_ID_PROPERTY:
2286 return ("PROP");
2287 case BACKEND_ID_VALUE:
2288 return ("VAL");
2289 case BACKEND_ID_SNAPNAME:
2290 return ("SNAME");
2291 case BACKEND_ID_SNAPSHOT:
2292 return ("SHOT");
2293 case BACKEND_ID_SNAPLEVEL:
2294 return ("SLVL");
2295 default:
2296 abort();
2297 /*NOTREACHED*/
2302 * Returns a new id or 0 if the id argument is invalid or the query fails.
2304 uint32_t
2305 backend_new_id(backend_tx_t *tx, enum id_space id)
2307 struct run_single_int_info info;
2308 uint32_t new_id = 0;
2309 const char *name = id_space_to_name(id);
2310 char *errmsg;
2311 int ret;
2312 sqlite_backend_t *be;
2313 hrtime_t ts, vts;
2315 assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2316 be = tx->bt_be;
2318 info.rs_out = &new_id;
2319 info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2321 ts = gethrtime();
2322 vts = gethrvtime();
2323 ret = sqlite_exec_printf(be->be_db,
2324 "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
2325 "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
2326 run_single_int_callback, &info, &errmsg, name, name);
2327 UPDATE_TOTALS(be, bt_exec, ts, vts);
2328 if (ret == SQLITE_FULL)
2329 tx->bt_full = 1;
2331 ret = backend_error(be, ret, errmsg);
2333 if (ret != REP_PROTOCOL_SUCCESS) {
2334 return (0);
2337 return (new_id);
2341 * Returns
2342 * _NO_RESOURCES - out of memory
2343 * _DONE - callback aborted query
2344 * _SUCCESS
2347 backend_tx_run(backend_tx_t *tx, backend_query_t *q,
2348 backend_run_callback_f *cb, void *data)
2350 char *errmsg = NULL;
2351 int ret;
2352 sqlite_backend_t *be;
2353 hrtime_t ts, vts;
2355 assert(tx != NULL && tx->bt_be != NULL);
2356 be = tx->bt_be;
2358 if (q == NULL || q->bq_buf == NULL)
2359 return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2361 ts = gethrtime();
2362 vts = gethrvtime();
2363 ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
2364 UPDATE_TOTALS(be, bt_exec, ts, vts);
2365 if (ret == SQLITE_FULL)
2366 tx->bt_full = 1;
2367 ret = backend_error(be, ret, errmsg);
2369 return (ret);
2373 * Returns
2374 * _NO_RESOURCES - out of memory
2375 * _NOT_FOUND - the query returned no results
2376 * _SUCCESS - the query returned a single integer
2379 backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
2381 struct run_single_int_info info;
2382 int ret;
2384 info.rs_out = buf;
2385 info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2387 ret = backend_tx_run(tx, q, run_single_int_callback, &info);
2388 assert(ret != REP_PROTOCOL_DONE);
2390 if (ret != REP_PROTOCOL_SUCCESS)
2391 return (ret);
2393 return (info.rs_result);
2397 * Fails with
2398 * _NO_RESOURCES - out of memory
2401 backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
2403 va_list a;
2404 char *errmsg;
2405 int ret;
2406 sqlite_backend_t *be;
2407 hrtime_t ts, vts;
2409 assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2410 be = tx->bt_be;
2412 va_start(a, format);
2413 ts = gethrtime();
2414 vts = gethrvtime();
2415 ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2416 UPDATE_TOTALS(be, bt_exec, ts, vts);
2417 if (ret == SQLITE_FULL)
2418 tx->bt_full = 1;
2419 va_end(a);
2420 ret = backend_error(be, ret, errmsg);
2421 assert(ret != REP_PROTOCOL_DONE);
2423 return (ret);
2427 * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
2430 backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
2432 va_list a;
2433 char *errmsg;
2434 int ret;
2435 sqlite_backend_t *be;
2436 hrtime_t ts, vts;
2438 assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2439 be = tx->bt_be;
2441 va_start(a, format);
2442 ts = gethrtime();
2443 vts = gethrvtime();
2444 ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2445 UPDATE_TOTALS(be, bt_exec, ts, vts);
2446 if (ret == SQLITE_FULL)
2447 tx->bt_full = 1;
2448 va_end(a);
2450 ret = backend_error(be, ret, errmsg);
2452 return (ret);
2455 #define BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
2456 (backend_add_schema((be), (file), \
2457 (tbls), sizeof (tbls) / sizeof (*(tbls)), \
2458 (idxs), sizeof (idxs) / sizeof (*(idxs))))
2460 static int
2461 backend_add_schema(sqlite_backend_t *be, const char *file,
2462 struct backend_tbl_info *tbls, int tbl_count,
2463 struct backend_idx_info *idxs, int idx_count)
2465 int i;
2466 char *errmsg;
2467 int ret;
2470 * Create the tables.
2472 for (i = 0; i < tbl_count; i++) {
2473 if (tbls[i].bti_name == NULL) {
2474 assert(i + 1 == tbl_count);
2475 break;
2477 ret = sqlite_exec_printf(be->be_db,
2478 "CREATE TABLE %s (%s);\n",
2479 NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
2481 if (ret != SQLITE_OK) {
2482 configd_critical(
2483 "%s: %s table creation fails: %s\n", file,
2484 tbls[i].bti_name, errmsg);
2485 free(errmsg);
2486 return (-1);
2491 * Make indices on key tables and columns.
2493 for (i = 0; i < idx_count; i++) {
2494 if (idxs[i].bxi_tbl == NULL) {
2495 assert(i + 1 == idx_count);
2496 break;
2499 ret = sqlite_exec_printf(be->be_db,
2500 "CREATE INDEX %s_%s ON %s (%s);\n",
2501 NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
2502 idxs[i].bxi_tbl, idxs[i].bxi_cols);
2504 if (ret != SQLITE_OK) {
2505 configd_critical(
2506 "%s: %s_%s index creation fails: %s\n", file,
2507 idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
2508 free(errmsg);
2509 return (-1);
2512 return (0);
2515 static int
2516 backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
2518 int i;
2519 char *errmsg;
2520 int ret;
2522 assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
2524 if (t == BACKEND_TYPE_NORMAL) {
2525 ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
2526 } else if (t == BACKEND_TYPE_NONPERSIST) {
2527 ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
2528 } else {
2529 abort(); /* can't happen */
2532 if (ret < 0) {
2533 return (ret);
2536 ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
2537 if (ret < 0) {
2538 return (ret);
2542 * Add the schema version to the table
2544 ret = sqlite_exec_printf(be->be_db,
2545 "INSERT INTO schema_version (schema_version) VALUES (%d)",
2546 NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
2547 if (ret != SQLITE_OK) {
2548 configd_critical(
2549 "setting schema version fails: %s\n", errmsg);
2550 free(errmsg);
2554 * Populate id_tbl with initial IDs.
2556 for (i = 0; i < BACKEND_ID_INVALID; i++) {
2557 const char *name = id_space_to_name(i);
2559 ret = sqlite_exec_printf(be->be_db,
2560 "INSERT INTO id_tbl (id_name, id_next) "
2561 "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
2562 if (ret != SQLITE_OK) {
2563 configd_critical(
2564 "id insertion for %s fails: %s\n", name, errmsg);
2565 free(errmsg);
2566 return (-1);
2570 * Set the persistance of the database. The normal database is marked
2571 * "synchronous", so that all writes are synchronized to stable storage
2572 * before proceeding.
2574 ret = sqlite_exec_printf(be->be_db,
2575 "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
2576 NULL, NULL, &errmsg,
2577 (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
2578 (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
2579 if (ret != SQLITE_OK) {
2580 configd_critical("pragma setting fails: %s\n", errmsg);
2581 free(errmsg);
2582 return (-1);
2585 return (0);
2589 backend_init(const char *db_file, const char *npdb_file, int have_np)
2591 sqlite_backend_t *be;
2592 char *errp;
2593 struct sqlite *fast_db;
2594 int r;
2595 backend_switch_results_t switch_result = BACKEND_SWITCH_OK;
2596 int writable_persist = 1;
2598 /* set up our temporary directory */
2599 sqlite_temp_directory = "/etc/svc/volatile";
2601 if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
2602 configd_critical("Mismatched link! (%s should be %s)\n",
2603 sqlite_version, SQLITE_VERSION);
2604 return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2607 if (db_file == NULL)
2608 db_file = REPOSITORY_DB;
2609 if (strcmp(db_file, REPOSITORY_DB) != 0) {
2610 is_main_repository = 0;
2614 * If the svc.configd crashed, there might be a leftover transient
2615 * database at FAST_REPOSITORY_DB,which contains useful
2616 * information. Both early manifest import and late manifest
2617 * import use svcadm to copy the repository to FAST_REPOSITORY_DB.
2618 * One reason for doing this is that it improves the performance of
2619 * manifest import. The other reason is that the repository may be
2620 * on read-only root in the case of early manifest import.
2622 * If FAST_REPOSITORY_DB exists, it is an indication that
2623 * svc.configd has been restarted for some reason. Since we have
2624 * no way of knowing where we are in the boot process, the safe
2625 * thing to do is to move the repository back to it's non-transient
2626 * location, REPOSITORY_DB. This may slow manifest import
2627 * performance, but it avoids the problem of missing the command to
2628 * move the repository to permanent storage.
2630 * There is a caveat, though. If root is read-only, we'll need to
2631 * leave the repository at FAST_REPOSITORY_DB. If root is
2632 * read-only, late manifest import has not yet run, so it will move
2633 * the repository back to permanent storage when it runs.
2635 if (is_main_repository)
2636 switch_result = backend_switch_recovery();
2638 r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
2639 switch (r) {
2640 case BACKEND_CREATE_FAIL:
2641 return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2642 case BACKEND_CREATE_LOCKED:
2643 return (CONFIGD_EXIT_DATABASE_LOCKED);
2644 case BACKEND_CREATE_SUCCESS:
2645 break; /* success */
2646 case BACKEND_CREATE_READONLY:
2647 writable_persist = 0;
2648 break;
2649 case BACKEND_CREATE_NEED_INIT:
2650 if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
2651 backend_destroy(be);
2652 return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2654 break;
2655 default:
2656 abort();
2657 /*NOTREACHED*/
2659 backend_create_finish(BACKEND_TYPE_NORMAL, be);
2660 flight_recorder_event(BE_FLIGHT_EV_REPO_CREATE,
2661 writable_persist == 1 ? BE_FLIGHT_ST_RW : BE_FLIGHT_ST_RO);
2663 * If there was a transient repository that could not be copied
2664 * back because the root file system was read-only, switch over to
2665 * using the transient repository.
2667 if (switch_result == BACKEND_SWITCH_RO) {
2668 char *db_name_copy = NULL;
2670 fast_db = sqlite_open(FAST_REPOSITORY_DB, 0600, &errp);
2671 if (fast_db == NULL) {
2672 /* Can't open fast repository. Stick with permanent. */
2673 configd_critical("Cannot open \"%s\". %s\n",
2674 FAST_REPOSITORY_DB, errp == NULL ? "" : errp);
2675 free(errp);
2676 } else {
2677 db_name_copy = strdup(FAST_REPOSITORY_DB);
2678 if (db_name_copy == NULL) {
2679 configd_critical("backend_init: out of "
2680 "memory.\n");
2681 sqlite_close(fast_db);
2682 return (CONFIGD_EXIT_INIT_FAILED);
2683 } else {
2684 flight_recorder_event(
2685 BE_FLIGHT_EV_LINGERING_FAST,
2686 BE_FLIGHT_ST_RO);
2687 sqlite_close(be->be_db);
2688 be->be_db = fast_db;
2689 be->be_ppath = be->be_path;
2690 be->be_path = db_name_copy;
2695 if (have_np) {
2696 if (npdb_file == NULL)
2697 npdb_file = NONPERSIST_DB;
2699 r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
2700 switch (r) {
2701 case BACKEND_CREATE_SUCCESS:
2702 break; /* success */
2703 case BACKEND_CREATE_FAIL:
2704 return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2705 case BACKEND_CREATE_LOCKED:
2706 return (CONFIGD_EXIT_DATABASE_LOCKED);
2707 case BACKEND_CREATE_READONLY:
2708 configd_critical("%s: unable to write\n", npdb_file);
2709 return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2710 case BACKEND_CREATE_NEED_INIT:
2711 if (backend_init_schema(be, db_file,
2712 BACKEND_TYPE_NONPERSIST)) {
2713 backend_destroy(be);
2714 return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2716 break;
2717 default:
2718 abort();
2719 /*NOTREACHED*/
2721 backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
2723 if (r != BACKEND_CREATE_NEED_INIT) {
2724 flight_recorder_event(BE_FLIGHT_EV_RESTART,
2725 BE_FLIGHT_ST_INFO);
2729 * If we started up with a writable filesystem, but the
2730 * non-persistent database needed initialization, we are
2731 * booting a non-global zone or a system with a writable
2732 * root (ZFS), so do a backup. Checking to see if the
2733 * non-persistent database needed initialization also keeps
2734 * us from making additional backups if configd gets
2735 * restarted.
2737 if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
2738 backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2739 REP_PROTOCOL_SUCCESS) {
2740 if (backend_create_backup_locked(be,
2741 REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
2742 configd_critical(
2743 "unable to create \"%s\" backup of "
2744 "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2745 be->be_path);
2747 backend_unlock(be);
2751 * On the other hand if we started with a read-only file
2752 * system and the non-persistent database needed
2753 * initialization, then we need to take a checkpoint of the
2754 * repository. We grab the checkpoint now before Early
2755 * Manifest Import starts modifying the repository. Then
2756 * when the file system becomes writable, the checkpoint
2757 * can be used to create the boot time backup of the
2758 * repository. Checking that the non-persistent database
2759 * needed initialization, keeps us from making additional
2760 * checkpoints if configd gets restarted.
2762 if (r == BACKEND_CREATE_NEED_INIT && writable_persist == 0 &&
2763 backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2764 REP_PROTOCOL_SUCCESS) {
2765 r = backend_checkpoint_repository(be);
2766 if (r != REP_PROTOCOL_SUCCESS) {
2767 configd_critical("unable to create checkpoint "
2768 "of \"%s\"\n", be->be_path);
2770 backend_unlock(be);
2774 * If the non-persistent database did not need
2775 * initialization, svc.configd has been restarted. See if
2776 * the boot time checkpoint exists. If it does, use it to
2777 * make a backup if root is writable.
2779 if (r != BACKEND_CREATE_NEED_INIT &&
2780 backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2781 REP_PROTOCOL_SUCCESS) {
2782 struct stat sb;
2784 if ((stat(REPOSITORY_CHECKPOINT, &sb) == 0) &&
2785 (sb.st_size > 0) && (sb.st_mode & S_IFREG)) {
2786 be->be_checkpoint = REPOSITORY_CHECKPOINT;
2787 flight_recorder_event(
2788 BE_FLIGHT_EV_CHECKPOINT_EXISTS,
2789 BE_FLIGHT_ST_INFO);
2793 * If we have a checkpoint and root is writable,
2794 * make the backup now.
2796 if (be->be_checkpoint && writable_persist) {
2797 if (backend_create_backup_locked(be,
2798 REPOSITORY_BOOT_BACKUP) !=
2799 REP_PROTOCOL_SUCCESS) {
2800 configd_critical(
2801 "unable to create \"%s\" backup of "
2802 "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2803 be->be_path);
2806 backend_unlock(be);
2811 * If the persistent backend is writable at this point, upgrade it.
2812 * This can occur in a few cases, most notably on UFS roots if
2813 * we are operating on the backend from another root, as is the case
2814 * during alternate-root BFU.
2816 * Otherwise, upgrade will occur via backend_check_readonly() when
2817 * the repository is re-opened read-write.
2819 if (writable_persist) {
2820 r = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
2821 assert(r == REP_PROTOCOL_SUCCESS);
2822 backend_check_upgrade(be, B_TRUE);
2823 backend_unlock(be);
2826 return (CONFIGD_EXIT_OKAY);
2830 * quiesce all database activity prior to exiting
2832 void
2833 backend_fini(void)
2835 sqlite_backend_t *be_normal, *be_np;
2837 (void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
2838 (void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
2841 #define QUERY_BASE 128
2842 backend_query_t *
2843 backend_query_alloc(void)
2845 backend_query_t *q;
2846 q = calloc(1, sizeof (backend_query_t));
2847 if (q != NULL) {
2848 q->bq_size = QUERY_BASE;
2849 q->bq_buf = calloc(1, q->bq_size);
2850 if (q->bq_buf == NULL) {
2851 q->bq_size = 0;
2855 return (q);
2858 void
2859 backend_query_append(backend_query_t *q, const char *value)
2861 char *alloc;
2862 int count;
2863 size_t size, old_len;
2865 if (q == NULL) {
2866 /* We'll discover the error when we try to run the query. */
2867 return;
2870 while (q->bq_buf != NULL) {
2871 old_len = strlen(q->bq_buf);
2872 size = q->bq_size;
2873 count = strlcat(q->bq_buf, value, size);
2875 if (count < size)
2876 break; /* success */
2878 q->bq_buf[old_len] = 0;
2879 size = round_up_to_p2(count + 1);
2881 assert(size > q->bq_size);
2882 alloc = realloc(q->bq_buf, size);
2883 if (alloc == NULL) {
2884 free(q->bq_buf);
2885 q->bq_buf = NULL;
2886 break; /* can't grow */
2889 q->bq_buf = alloc;
2890 q->bq_size = size;
2894 void
2895 backend_query_add(backend_query_t *q, const char *format, ...)
2897 va_list args;
2898 char *new;
2900 if (q == NULL || q->bq_buf == NULL)
2901 return;
2903 va_start(args, format);
2904 new = sqlite_vmprintf(format, args);
2905 va_end(args);
2907 if (new == NULL) {
2908 free(q->bq_buf);
2909 q->bq_buf = NULL;
2910 return;
2913 backend_query_append(q, new);
2915 free(new);
2918 void
2919 backend_query_free(backend_query_t *q)
2921 if (q != NULL) {
2922 if (q->bq_buf != NULL) {
2923 free(q->bq_buf);
2925 free(q);