sys/vfs/hammer/hammer_pfs.c

   1 /*
   2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $
  35  */
  36 /*
  37  * HAMMER PFS ioctls - Manage pseudo-fs configurations
  38  */
  39
  40 #include "hammer.h"
  41
  42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
  43                                 hammer_inode_t ip);
  44 static int hammer_pfs_rollback(hammer_transaction_t trans,
  45                                 hammer_pseudofs_inmem_t pfsm,
  46                                 hammer_tid_t trunc_tid);
  47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
  48                                 hammer_tid_t trunc_tid);
  49
  50 /*
  51  * Get mirroring/pseudo-fs information
  52  *
  53  * NOTE: The ip used for ioctl is not necessarily related to the PFS
  54  */
  55 int
  56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
  57                         struct hammer_ioc_pseudofs_rw *pfs)
  58 {
  59         hammer_pseudofs_inmem_t pfsm;
  60         u_int32_t localization;
  61         int error;
  62
  63         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
  64                 return(error);
  65         localization = (u_int32_t)pfs->pfs_id << 16;
  66         pfs->bytes = sizeof(struct hammer_pseudofs_data);
  67         pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
  68
  69         pfsm = hammer_load_pseudofs(trans, localization, &error);
  70         if (error) {
  71                 hammer_rel_pseudofs(trans->hmp, pfsm);
  72                 return(error);
  73         }
  74
  75         /*
  76          * If the PFS is a master the sync tid is set by normal operation
  77          * rather then the mirroring code, and will always track the
  78          * real HAMMER filesystem.
  79          *
  80          * We use flush_tid1, which is the highest fully committed TID.
  81          * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
  82          * caught up to it yet so a crash will roll us back to flush_tid1.
  83          */
  84         if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
  85                 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
  86
  87         /*
  88          * Copy out to userland.
  89          */
  90         error = 0;
  91         if (pfs->ondisk && error == 0)
  92                 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
  93         hammer_rel_pseudofs(trans->hmp, pfsm);
  94         return(error);
  95 }
  96
  97 /*
  98  * Set mirroring/pseudo-fs information
  99  *
 100  * NOTE: The ip used for ioctl is not necessarily related to the PFS
 101  */
 102 int
 103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 104                         struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
 105 {
 106         hammer_pseudofs_inmem_t pfsm;
 107         u_int32_t localization;
 108         int error;
 109
 110         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
 111                 return(error);
 112         localization = (u_int32_t)pfs->pfs_id << 16;
 113         if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
 114                 error = EINVAL;
 115         localization = (u_int32_t)pfs->pfs_id << 16;
 116
 117         if (error == 0 && pfs->ondisk) {
 118                 /*
 119                  * Load the PFS so we can modify our in-core copy.  Ignore
 120                  * ENOENT errors.
 121                  */
 122                 pfsm = hammer_load_pseudofs(trans, localization, &error);
 123                 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
 124
 125                 /*
 126                  * Save it back, create a root inode if we are in master
 127                  * mode and no root exists.
 128                  */
 129                 if (error == 0)
 130                         error = hammer_mkroot_pseudofs(trans, cred, pfsm);
 131                 if (error == 0)
 132                         error = hammer_save_pseudofs(trans, pfsm);
 133
 134                 /*
 135                  * Wakeup anyone waiting for a TID update for this PFS
 136                  */
 137                 wakeup(&pfsm->pfsd.sync_end_tid);
 138                 hammer_rel_pseudofs(trans->hmp, pfsm);
 139         }
 140         return(error);
 141 }
 142
 143 /*
 144  * Upgrade a slave to a master
 145  *
 146  * This is fairly easy to do, but we must physically undo any partial syncs
 147  * for transaction ids > sync_end_tid.  Effective, we must do a partial
 148  * rollback.
 149  *
 150  * NOTE: The ip used for ioctl is not necessarily related to the PFS
 151  */
 152 int
 153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 154                         struct hammer_ioc_pseudofs_rw *pfs)
 155 {
 156         hammer_pseudofs_inmem_t pfsm;
 157         u_int32_t localization;
 158         int error;
 159
 160         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
 161                 return(error);
 162         localization = (u_int32_t)pfs->pfs_id << 16;
 163         if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
 164                 return(error);
 165
 166         /*
 167          * A master id must be set when upgrading
 168          */
 169         pfsm = hammer_load_pseudofs(trans, localization, &error);
 170         if (error == 0) {
 171                 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
 172                         error = hammer_pfs_rollback(trans, pfsm,
 173                                             pfsm->pfsd.sync_end_tid + 1);
 174                         if (error == 0) {
 175                                 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
 176                                 error = hammer_save_pseudofs(trans, pfsm);
 177                         }
 178                 }
 179         }
 180         hammer_rel_pseudofs(trans->hmp, pfsm);
 181         if (error == EINTR) {
 182                 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
 183                 error = 0;
 184         }
 185         return (error);
 186 }
 187
 188 /*
 189  * Downgrade a master to a slave
 190  *
 191  * This is really easy to do, just set the SLAVE flag.
 192  *
 193  * We also leave sync_end_tid intact... the field is not used in master
 194  * mode (vol0_next_tid overrides it), but if someone switches to master
 195  * mode accidently and then back to slave mode we don't want it to change.
 196  * Eventually it will be used as the cross-synchronization TID in
 197  * multi-master mode, and we don't want to mess with it for that feature
 198  * either.
 199  *
 200  * NOTE: The ip used for ioctl is not necessarily related to the PFS
 201  */
 202 int
 203 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 204                         struct hammer_ioc_pseudofs_rw *pfs)
 205 {
 206         hammer_pseudofs_inmem_t pfsm;
 207         u_int32_t localization;
 208         int error;
 209
 210         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
 211                 return(error);
 212         localization = (u_int32_t)pfs->pfs_id << 16;
 213         if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
 214                 return(error);
 215
 216         pfsm = hammer_load_pseudofs(trans, localization, &error);
 217         if (error == 0) {
 218                 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
 219                         pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
 220                         error = hammer_save_pseudofs(trans, pfsm);
 221                 }
 222         }
 223         hammer_rel_pseudofs(trans->hmp, pfsm);
 224         return (error);
 225 }
 226
 227 /*
 228  * Destroy a PFS
 229  *
 230  * We can destroy a PFS by scanning and deleting all of its records in the
 231  * B-Tree.  The hammer utility will delete the softlink in the primary
 232  * filesystem.
 233  *
 234  * NOTE: The ip used for ioctl is not necessarily related to the PFS
 235  */
 236 int
 237 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 238                         struct hammer_ioc_pseudofs_rw *pfs)
 239 {
 240         hammer_pseudofs_inmem_t pfsm;
 241         u_int32_t localization;
 242         int error;
 243
 244         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
 245                 return(error);
 246         localization = (u_int32_t)pfs->pfs_id << 16;
 247
 248         if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
 249                 return(error);
 250
 251         pfsm = hammer_load_pseudofs(trans, localization, &error);
 252         if (error == 0) {
 253                 error = hammer_pfs_rollback(trans, pfsm, 0);
 254                 if (error == 0) {
 255                         pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
 256                         error = hammer_save_pseudofs(trans, pfsm);
 257                 }
 258         }
 259         hammer_rel_pseudofs(trans->hmp, pfsm);
 260         if (error == EINTR) {
 261                 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
 262                 error = 0;
 263         }
 264         return(error);
 265 }
 266
 267 /*
 268  * Wait for the PFS to sync past the specified TID
 269  */
 270 int
 271 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 272                          struct hammer_ioc_pseudofs_rw *pfs)
 273 {
 274         hammer_pseudofs_inmem_t pfsm;
 275         struct hammer_pseudofs_data pfsd;
 276         u_int32_t localization;
 277         hammer_tid_t tid;
 278         void *waitp;
 279         int error;
 280
 281         if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
 282                 return(error);
 283         localization = (u_int32_t)pfs->pfs_id << 16;
 284
 285         if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
 286                 return(error);
 287
 288         pfsm = hammer_load_pseudofs(trans, localization, &error);
 289         if (error == 0) {
 290                 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
 291                         tid = pfsm->pfsd.sync_end_tid;
 292                         waitp = &pfsm->pfsd.sync_end_tid;
 293                 } else {
 294                         tid = trans->hmp->flush_tid1;
 295                         waitp = &trans->hmp->flush_tid1;
 296                 }
 297                 if (tid <= pfsd.sync_end_tid)
 298                         tsleep(waitp, PCATCH, "hmrmwt", 0);
 299         }
 300         hammer_rel_pseudofs(trans->hmp, pfsm);
 301         if (error == EINTR) {
 302                 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
 303                 error = 0;
 304         }
 305         return(error);
 306 }
 307
 308
 309 /*
 310  * Auto-detect the pseudofs and do basic bounds checking.
 311  */
 312 static
 313 int
 314 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
 315 {
 316         int error = 0;
 317
 318         if (pfs->pfs_id == -1)
 319                 pfs->pfs_id = (int)(ip->obj_localization >> 16);
 320         if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
 321                 error = EINVAL;
 322         if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
 323                 error = EINVAL;
 324         return(error);
 325 }
 326
 327 /*
 328  * Rollback the specified PFS to (trunc_tid - 1), removing everything
 329  * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
 330  * mode or the MIRROR_FILTERED scan will not work properly.
 331  *
 332  * This is typically used to remove any partial syncs when upgrading a
 333  * slave to a master.  It can theoretically also be used to rollback
 334  * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
 335  * PRUNED, and to points that are older only if they are on a retained
 336  * (pruning softlink) boundary.
 337  *
 338  * Rollbacks destroy information.  If you don't mind inode numbers changing
 339  * a better way would be to cpdup a snapshot back onto the master.
 340  */
 341 static
 342 int
 343 hammer_pfs_rollback(hammer_transaction_t trans,
 344                     hammer_pseudofs_inmem_t pfsm,
 345                     hammer_tid_t trunc_tid)
 346 {
 347         struct hammer_cmirror cmirror;
 348         struct hammer_cursor cursor;
 349         struct hammer_base_elm key_cur;
 350         int error;
 351         int seq;
 352
 353         bzero(&cmirror, sizeof(cmirror));
 354         bzero(&key_cur, sizeof(key_cur));
 355         key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
 356         key_cur.obj_id = HAMMER_MIN_OBJID;
 357         key_cur.key = HAMMER_MIN_KEY;
 358         key_cur.create_tid = 1;
 359         key_cur.rec_type = HAMMER_MIN_RECTYPE;
 360
 361         seq = trans->hmp->flusher.act;
 362
 363 retry:
 364         error = hammer_init_cursor(trans, &cursor, NULL, NULL);
 365         if (error) {
 366                 hammer_done_cursor(&cursor);
 367                 goto failed;
 368         }
 369         cursor.key_beg = key_cur;
 370         cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
 371                                       pfsm->localization;
 372         cursor.key_end.obj_id = HAMMER_MAX_OBJID;
 373         cursor.key_end.key = HAMMER_MAX_KEY;
 374         cursor.key_end.create_tid = HAMMER_MAX_TID;
 375         cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
 376
 377         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
 378         cursor.flags |= HAMMER_CURSOR_BACKEND;
 379
 380         /*
 381          * Do an optimized scan of only records created or modified
 382          * >= trunc_tid, so we can fix up those records.  We must
 383          * still check the TIDs but this greatly reduces the size of
 384          * the scan.
 385          */
 386         cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
 387         cursor.cmirror = &cmirror;
 388         cmirror.mirror_tid = trunc_tid;
 389
 390         error = hammer_btree_first(&cursor);
 391         while (error == 0) {
 392                 /*
 393                  * Abort the rollback.
 394                  */
 395                 if (error == 0) {
 396                         error = hammer_signal_check(trans->hmp);
 397                         if (error)
 398                                 break;
 399                 }
 400
 401                 /*
 402                  * We only care about leafs.  Internal nodes can be returned
 403                  * in mirror-filtered mode (they are used to generate SKIP
 404                  * mrecords), but we don't need them for this code.
 405                  */
 406                 cursor.flags |= HAMMER_CURSOR_ATEDISK;
 407                 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
 408                         key_cur = cursor.node->ondisk->elms[cursor.index].base;
 409                         error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
 410                 }
 411
 412                 while (hammer_flusher_meta_halflimit(trans->hmp) ||
 413                        hammer_flusher_undo_exhausted(trans, 2)) {
 414                         hammer_unlock_cursor(&cursor);
 415                         hammer_flusher_wait(trans->hmp, seq);
 416                         hammer_lock_cursor(&cursor);
 417                         seq = hammer_flusher_async_one(trans->hmp);
 418                 }
 419
 420                 if (error == 0)
 421                         error = hammer_btree_iterate(&cursor);
 422         }
 423         if (error == ENOENT)
 424                 error = 0;
 425         hammer_done_cursor(&cursor);
 426         if (error == EDEADLK)
 427                 goto retry;
 428 failed:
 429         return(error);
 430 }
 431
 432 /*
 433  * Helper function - perform rollback on a B-Tree element given trunc_tid.
 434  *
 435  * If create_tid >= trunc_tid the record is physically destroyed.
 436  * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
 437  */
 438 static
 439 int
 440 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
 441 {
 442         hammer_btree_leaf_elm_t elm;
 443         hammer_transaction_t trans;
 444         int error;
 445
 446         elm = &cursor->node->ondisk->elms[cursor->index].leaf;
 447         if (elm->base.create_tid < trunc_tid &&
 448             elm->base.delete_tid < trunc_tid) {
 449                 return(0);
 450         }
 451         trans = cursor->trans;
 452
 453         if (elm->base.create_tid >= trunc_tid) {
 454                 error = hammer_delete_at_cursor(
 455                                 cursor, HAMMER_DELETE_DESTROY,
 456                                 cursor->trans->tid, cursor->trans->time32,
 457                                 1, NULL);
 458         } else if (elm->base.delete_tid >= trunc_tid) {
 459                 error = hammer_delete_at_cursor(
 460                                 cursor, HAMMER_DELETE_ADJUST,
 461                                 0, 0,
 462                                 1, NULL);
 463         } else {
 464                 error = 0;
 465         }
 466         return(error);
 467 }
 468