2 * Unix SMB/CIFS implementation.
4 * OneFS shadow copy implementation that utilizes the file system's native
5 * snapshot support. This file does all of the heavy lifting.
7 * Copyright (C) Dave Richards, 2007
8 * Copyright (C) Tim Prouty, 2009
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 3 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, see <http://www.gnu.org/licenses/>.
24 #include "smbd/smbd.h"
25 #include <ifs/ifs_syscalls.h>
26 #include <sys/types.h>
27 #include <sys/isi_enc.h>
28 #include <sys/module.h>
30 #include <sys/syscall.h>
42 #include "onefs_shadow_copy.h"
44 /* Copied from ../include/proto.h */
45 void become_root(void);
46 void unbecome_root(void);
48 #define SNAPSHOT_DIRECTORY ".snapshot"
50 #define MAX_VERSIONS 64
55 * During snapshot enumeration, snapshots are represented by snapshot objects
56 * and are stored in a snapshot set. The snapshot object represents one
57 * snapshot within the set. An important thing to note about the set is that
58 * the key of the snapshot object is the tv_sec component of the is_time
59 * member. What this means is that we only store one snapshot for each
60 * second. If multiple snapshots were created within the same second, we'll
61 * keep the earliest one and ignore the rest. Thus, not all snapshots are
62 * necessarily retained.
66 struct timespec is_time
;
67 struct osc_snapshot
* is_next
;
71 * A snapshot context object.
73 * Snapshot contexts are used to pass information throughout the snapshot
74 * enumeration routines. As a result, snapshot contexts are stored on the
75 * stack and are both created and destroyed within a single API function.
77 struct osc_snapshot_ctx
{
79 struct timespec osc_mtime
;
83 * A directory context.
85 * Directory contexts are the underlying data structured used to enumerate
86 * snapshot versions. An opendir()-, readdir()- and closedir()-like interface
87 * is provided that utilizes directory contexts. At the API level, directory
88 * contexts are passed around as void pointers. Directory contexts are
89 * allocated on the heap and their lifetime is dictated by the calling
92 struct osc_directory_ctx
{
100 * Return a file descriptor to the STF names directory.
102 * Opens the STF names directory and returns a file descriptor to it.
103 * Subsequent calls return the same value (avoiding the need to re-open the
104 * directory repeatedly). Caveat caller: don't close the file descriptor or
108 osc_get_names_directory_fd(void)
114 fd
= pctl2_lin_open(STF_NAMES_LIN
, HEAD_SNAPID
, O_RDONLY
);
122 * Compare two time values.
124 * Accepts two struct timespecs and compares the tv_sec components of these
125 * values. It returns -1 if the first value preceeds the second, 0 if they
126 * are equal and +1 if the first values succeeds the second.
129 osc_time_compare(const struct timespec
*tsp1
, const struct timespec
*tsp2
)
131 return (tsp1
->tv_sec
< tsp2
->tv_sec
) ? -1 :
132 (tsp1
->tv_sec
> tsp2
->tv_sec
) ? +1 : 0;
136 * Compare two timespec values.
138 * Compares two timespec values. It returns -1 if the first value preceeds
139 * the second, 0 if they are equal and +1 if the first values succeeds the
143 osc_timespec_compare(const struct timespec
*tsp1
, const struct timespec
*tsp2
)
145 return (tsp1
->tv_sec
< tsp2
->tv_sec
) ? -1 :
146 (tsp1
->tv_sec
> tsp2
->tv_sec
) ? +1 :
147 (tsp1
->tv_nsec
< tsp2
->tv_nsec
) ? -1 :
148 (tsp1
->tv_nsec
> tsp2
->tv_nsec
) ? +1 : 0;
152 * Determine whether a timespec value is zero.
154 * Return 1 if the struct timespec provided is zero and 0 otherwise.
157 osc_timespec_is_zero(const struct timespec
*tsp
)
159 return (tsp
->tv_sec
== 0) &&
164 * Create a snapshot object.
166 * Allocates and initializes a new snapshot object. In addition to allocating
167 * space for the snapshot object itself, space is allocated for the snapshot
168 * name. Both the name and time are then copied to the new object.
170 static struct osc_snapshot
*
171 osc_snapshot_create(const char *name
, const struct timespec
*tsp
)
173 struct osc_snapshot
*isp
;
175 isp
= malloc(sizeof *isp
);
179 isp
->is_name
= malloc(strlen(name
) + 1);
180 if (isp
->is_name
== NULL
) {
186 strcpy(isp
->is_name
, name
);
195 * Destroy a snapshot object.
197 * Frees both the name and the snapshot object itself. Appropriate NULL
198 * checking is performed because counting on free to do so is immoral.
201 osc_snapshot_destroy(struct osc_snapshot
*isp
)
204 if (isp
->is_name
!= NULL
)
211 * Destroy all snapshots in the snapshot list.
213 * Calls osc_snapshot_destroy() on each snapshot in the list.
216 osc_snapshot_destroy_list(struct osc_snapshot
*isp
)
218 struct osc_snapshot
*tmp
;
220 while (isp
!= NULL
) {
223 osc_snapshot_destroy(tmp
);
228 * Compare two snapshot objects.
230 * Compare two snapshot objects. It is really just a wrapper for
231 * osc_time_compare(), which compare the time value of the two snapshots.
232 * N.B. time value in this context refers only to the tv_sec component.
235 osc_snapshot_compare(const void *vp1
, const void *vp2
)
237 const struct osc_snapshot
*isp1
= vp1
;
238 const struct osc_snapshot
*isp2
= vp2
;
240 return -osc_time_compare(&isp1
->is_time
, &isp2
->is_time
);
244 * Insert a snapshot into the snapshot set.
246 * Inserts a new snapshot into the snapshot set. The key for snapshots is
247 * their creation time (it's actually the seconds portion of the creation
248 * time). If a duplicate snapshot is found in the set, the new snapshot is
249 * added to a linked list of snapshots for that second.
252 osc_snapshot_insert(struct osc_snapshot_ctx
*oscp
, const char *name
,
253 const struct timespec
*tsp
, int *errorp
)
255 struct osc_snapshot
*isp1
;
256 struct osc_snapshot
**ispp
;
258 isp1
= osc_snapshot_create(name
, tsp
);
264 ispp
= tsearch(isp1
, &oscp
->osc_set
, osc_snapshot_compare
);
266 struct osc_snapshot
*isp2
= *ispp
;
268 /* If this is the only snapshot for this second, we're done. */
272 /* Collision: add the new snapshot to the list. */
273 isp1
->is_next
= isp2
->is_next
;
274 isp2
->is_next
= isp1
;
282 * Process the next snapshot.
284 * Called for (almost) every entry in a .snapshot directory, ("." and ".." are
285 * ignored in osc_process_snapshot_directory()). All other entries are passed
286 * to osc_process_snapshot(), however. These entries can fall into one of two
287 * categories: snapshot names and snapshot aliases. We only care about
288 * snapshot names (as aliases are just redundant entries). Once it verifies
289 * that name represents a valid snapshot name, it calls fstat() to get the
290 * creation time of the snapshot and then calls osc_snapshot_insert() to add
291 * this entry to the snapshot set.
294 osc_process_snapshot(struct osc_snapshot_ctx
*oscp
, const char *name
,
298 struct stf_stat stf_stat
;
301 fd
= osc_get_names_directory_fd();
305 fd
= enc_openat(fd
, name
, ENC_DEFAULT
, O_RDONLY
);
309 memset(&stf_stat
, 0, sizeof stf_stat
);
310 if (ifs_snap_stat(fd
, &stf_stat
) == -1)
313 if (stf_stat
.sf_type
!= SF_STF
)
316 if (fstat(fd
, &stbuf
) == -1)
319 osc_snapshot_insert(oscp
, name
, &stbuf
.st_birthtimespec
, errorp
);
327 * Process a snapshot directory.
329 * Opens the snapshot directory and calls osc_process_snapshot() for each
330 * entry. (Well ok, "." and ".." are ignored.) The goal here is to add all
331 * snapshots in the directory to the snapshot set.
334 osc_process_snapshot_directory(struct osc_snapshot_ctx
*oscp
, int *errorp
)
341 fd
= osc_get_names_directory_fd();
345 if (fstat(fd
, &stbuf
) == -1)
348 dirp
= opendir(SNAPSHOT_DIRECTORY
);
357 if (dp
->d_name
[0] == '.' && (dp
->d_name
[1] == '\0' ||
358 (dp
->d_name
[1] == '.' && dp
->d_name
[2] == '\0')))
361 osc_process_snapshot(oscp
, dp
->d_name
, errorp
);
369 oscp
->osc_mtime
= stbuf
.st_mtimespec
;
376 * Initialize a snapshot context object.
378 * Clears all members of the context object.
381 osc_snapshot_ctx_init(struct osc_snapshot_ctx
*oscp
)
383 memset(oscp
, 0, sizeof *oscp
);
387 * Desoy a snapshot context object.
389 * Frees all snapshots associated with the snapshot context and then calls
390 * osc_snapshot_ctx_init() to re-initialize the context object.
393 osc_snapshot_ctx_clean(struct osc_snapshot_ctx
*oscp
)
395 struct osc_snapshot
*tmp
;
397 while (oscp
->osc_set
!= NULL
) {
398 tmp
= *(void **)oscp
->osc_set
;
399 tdelete(tmp
, &oscp
->osc_set
, osc_snapshot_compare
);
400 osc_snapshot_destroy_list(tmp
);
403 osc_snapshot_ctx_init(oscp
);
407 * Return the "global" snapshot context.
409 * We maintain a single open snapshot context. Return a pointer to it.
411 static struct osc_snapshot_ctx
*
412 osc_get_snapshot_ctx(void)
414 static struct osc_snapshot_ctx osc
= { 0, { 0, 0 } };
420 * Determine whether a snapshot context is still valid.
422 * "Valid" in this context means "reusable". We can re-use a previous
423 * snapshot context iff we successfully built a previous snapshot context
424 * and no snapshots have been created or deleted since we did so.
425 * A "names" directory exists within our snapshot
426 * implementation in which all snapshot names are entered. Each time a
427 * snapshot is created or deleted, an entry must be added or removed.
428 * When this happens the modification time on the "names" directory
429 * changes. Therefore, a snapshot context is valid iff the context
430 * pointer is non-NULL, the cached modification time is non-zero
431 * (zero means uninitialized), and the modification time of the "names"
432 * directory matches the cached value.
435 osc_snapshot_ctx_is_valid(struct osc_snapshot_ctx
*oscp
)
443 if (osc_timespec_is_zero(&oscp
->osc_mtime
))
446 fd
= osc_get_names_directory_fd();
450 if (fstat(fd
, &stbuf
) == -1)
453 if (osc_timespec_compare(&oscp
->osc_mtime
, &stbuf
.st_mtimespec
) != 0)
460 * Create and initialize a directory context.
462 * Allocates a directory context from the heap and initializes it.
464 static struct osc_directory_ctx
*
465 osc_directory_ctx_create(void)
467 struct osc_directory_ctx
*idcp
;
469 idcp
= malloc(sizeof *idcp
);
471 memset(idcp
, 0, sizeof *idcp
);
477 * Destroy a directory context.
479 * Frees any versions associated with the directory context and then frees the
483 osc_directory_ctx_destroy(struct osc_directory_ctx
*idcp
)
490 for (i
= 0; i
< idcp
->idc_len
; i
++)
491 free(idcp
->idc_version
[i
]);
497 * Expand the size of a directory context's version list.
499 * If osc_directory_ctx_append_version() detects that the version list is too
500 * small to accomodate a new version string, it called
501 * osc_directory_ctx_expand_version_list() to expand the version list.
504 osc_directory_ctx_expand_version_list(struct osc_snapshot_ctx
*oscp
,
505 struct osc_directory_ctx
*idcp
, int *errorp
)
510 size
= idcp
->idc_size
* 2 ?: 1;
512 cpp
= realloc(idcp
->idc_version
, size
* sizeof (char *));
518 idcp
->idc_size
= size
;
519 idcp
->idc_version
= cpp
;
523 * Append a new version to a directory context.
525 * Appends a snapshot version to the
526 * directory context's version list.
529 osc_directory_ctx_append_version(struct osc_snapshot_ctx
*oscp
,
530 struct osc_directory_ctx
*idcp
, const struct timespec
*tsp
, int *errorp
)
536 if (idcp
->idc_len
>= MAX_VERSIONS
)
539 if (idcp
->idc_len
>= idcp
->idc_size
) {
540 osc_directory_ctx_expand_version_list(oscp
, idcp
, errorp
);
545 tmp
= gmtime(&tsp
->tv_sec
);
551 snprintf(text
, sizeof text
,
552 "@GMT-%04u.%02u.%02u-%02u.%02u.%02u",
560 cp
= malloc(strlen(text
) + 1);
568 idcp
->idc_version
[idcp
->idc_len
++] = cp
;
572 * Make a directory context from a snapshot context.
574 * Once a snapshot context has been completely filled-in,
575 * osc_make_directory_ctx() is used to build a directory context from it. The
576 * idea here is to create version for each snapshot in the snapshot set.
579 osc_make_directory_ctx(struct osc_snapshot_ctx
*oscp
,
580 struct osc_directory_ctx
*idcp
, int *errorp
)
583 walk(const void *vp
, VISIT v
, int level
)
585 const struct osc_snapshot
*isp
;
587 if ((v
!= postorder
&& v
!= leaf
) || *errorp
)
590 isp
= *(const struct osc_snapshot
**)(u_long
)vp
;
592 osc_directory_ctx_append_version(oscp
, idcp
, &isp
->is_time
,
596 twalk(oscp
->osc_set
, walk
);
600 * Open a version directory.
602 * Opens a version directory. What this really means is that
603 * osc_version_opendir() returns a handle to a directory context, which can be
604 * used to retrieve version strings.
607 osc_version_opendir(void)
610 struct osc_directory_ctx
*idcp
;
611 struct osc_snapshot_ctx
*oscp
;
613 idcp
= osc_directory_ctx_create();
617 oscp
= osc_get_snapshot_ctx();
619 if (!osc_snapshot_ctx_is_valid(oscp
)) {
620 osc_snapshot_ctx_clean(oscp
);
621 osc_process_snapshot_directory(oscp
, &error
);
626 osc_make_directory_ctx(oscp
, idcp
, &error
);
634 osc_directory_ctx_destroy(idcp
);
643 * Read the next version directory entry.
645 * Returns the name of the next version in the version directory, or NULL if
646 * we're at the end of the directory. What this really does is return the
647 * next version from the version list stored in the directory context.
650 osc_version_readdir(void *vp
)
652 struct osc_directory_ctx
*idcp
= vp
;
657 if (idcp
->idc_pos
>= idcp
->idc_len
)
660 return idcp
->idc_version
[idcp
->idc_pos
++];
664 * Close the version directory.
666 * Destroys the underlying directory context.
669 osc_version_closedir(void *vp
)
671 struct osc_directory_ctx
*idcp
= vp
;
674 osc_directory_ctx_destroy(idcp
);
678 * Canonicalize a path.
680 * Converts paths of the form @GMT-.. to paths of the form ../.snapshot/..
681 * It's not the prettiest routine I've ever written, but what the heck?
684 osc_canonicalize_path(const char *path
, char *snap_component
)
687 struct osc_snapshot_ctx
*oscp
;
690 struct osc_snapshot is
;
691 struct osc_snapshot
**ispp
;
692 struct osc_snapshot
*isp
;
695 const char *snap_component_orig
= snap_component
;
698 oscp
= osc_get_snapshot_ctx();
700 if (!osc_snapshot_ctx_is_valid(oscp
)) {
701 osc_snapshot_ctx_clean(oscp
);
702 osc_process_snapshot_directory(oscp
, &error
);
707 memset(&tm
, 0, sizeof tm
);
708 n
= sscanf(snap_component
,
709 "@GMT-%4u.%2u.%2u-%2u.%2u.%2u",
723 is
.is_time
.tv_sec
= timegm(&tm
);
724 is
.is_time
.tv_nsec
= 0;
726 ispp
= tfind(&is
, &oscp
->osc_set
, osc_snapshot_compare
);
731 /* Determine the path after "@GMT-..." */
732 while (*snap_component
!= '/' && *snap_component
!= '\0')
735 while (*snap_component
== '/')
738 cpath
= malloc(strlen(SNAPSHOT_DIRECTORY
) + strlen(isp
->is_name
) +
745 * Use the first snapshot that has a successful stat for the requested
750 sprintf(cpath
, "%s/%s", SNAPSHOT_DIRECTORY
, isp
->is_name
);
752 /* Append path before "@GMT-..." */
753 if (snap_component_orig
!= path
) {
755 strncat(cpath
, path
, snap_component_orig
- path
);
758 /* Append path after "@GMT-..." */
759 if (*snap_component
!= '\0') {
761 strcat(cpath
, snap_component
);
764 /* If there is a valid snapshot for this file, we're done. */
765 if (stat(cpath
, &sb
) == 0)
768 /* Try the next snapshot. If this was the last one, give up. */
773 /* If the realloc fails, give up. */
774 cpath2
= realloc(cpath
, strlen(SNAPSHOT_DIRECTORY
) +
775 strlen(isp
->is_name
) + strlen(path
) + 3);