fs/btrfs/scrub.c

   1 /*
   2  * Copyright (C) 2011 STRATO.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/blkdev.h>
  20 #include "ctree.h"
  21 #include "volumes.h"
  22 #include "disk-io.h"
  23 #include "ordered-data.h"
  24
  25 /*
  26  * This is only the first step towards a full-features scrub. It reads all
  27  * extent and super block and verifies the checksums. In case a bad checksum
  28  * is found or the extent cannot be read, good data will be written back if
  29  * any can be found.
  30  *
  31  * Future enhancements:
  32  *  - In case an unrepairable extent is encountered, track which files are
  33  *    affected and report them
  34  *  - In case of a read error on files with nodatasum, map the file and read
  35  *    the extent to trigger a writeback of the good copy
  36  *  - track and record media errors, throw out bad devices
  37  *  - add a mode to also read unallocated space
  38  */
  39
  40 struct scrub_bio;
  41 struct scrub_page;
  42 struct scrub_dev;
  43 static void scrub_bio_end_io(struct bio *bio, int err);
  44 static void scrub_checksum(struct btrfs_work *work);
  45 static int scrub_checksum_data(struct scrub_dev *sdev,
  46                                struct scrub_page *spag, void *buffer);
  47 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
  48                                      struct scrub_page *spag, u64 logical,
  49                                      void *buffer);
  50 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
  51 static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
  52 static void scrub_fixup_end_io(struct bio *bio, int err);
  53 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
  54                           struct page *page);
  55 static void scrub_fixup(struct scrub_bio *sbio, int ix);
  56
  57 #define SCRUB_PAGES_PER_BIO     16      /* 64k per bio */
  58 #define SCRUB_BIOS_PER_DEV      16      /* 1 MB per device in flight */
  59
  60 struct scrub_page {
  61         u64                     flags;  /* extent flags */
  62         u64                     generation;
  63         u64                     mirror_num;
  64         int                     have_csum;
  65         u8                      csum[BTRFS_CSUM_SIZE];
  66 };
  67
  68 struct scrub_bio {
  69         int                     index;
  70         struct scrub_dev        *sdev;
  71         struct bio              *bio;
  72         int                     err;
  73         u64                     logical;
  74         u64                     physical;
  75         struct scrub_page       spag[SCRUB_PAGES_PER_BIO];
  76         u64                     count;
  77         int                     next_free;
  78         struct btrfs_work       work;
  79 };
  80
  81 struct scrub_dev {
  82         struct scrub_bio        *bios[SCRUB_BIOS_PER_DEV];
  83         struct btrfs_device     *dev;
  84         int                     first_free;
  85         int                     curr;
  86         atomic_t                in_flight;
  87         spinlock_t              list_lock;
  88         wait_queue_head_t       list_wait;
  89         u16                     csum_size;
  90         struct list_head        csum_list;
  91         atomic_t                cancel_req;
  92         int                     readonly;
  93         /*
  94          * statistics
  95          */
  96         struct btrfs_scrub_progress stat;
  97         spinlock_t              stat_lock;
  98 };
  99
 100 static void scrub_free_csums(struct scrub_dev *sdev)
 101 {
 102         while (!list_empty(&sdev->csum_list)) {
 103                 struct btrfs_ordered_sum *sum;
 104                 sum = list_first_entry(&sdev->csum_list,
 105                                        struct btrfs_ordered_sum, list);
 106                 list_del(&sum->list);
 107                 kfree(sum);
 108         }
 109 }
 110
 111 static void scrub_free_bio(struct bio *bio)
 112 {
 113         int i;
 114         struct page *last_page = NULL;
 115
 116         if (!bio)
 117                 return;
 118
 119         for (i = 0; i < bio->bi_vcnt; ++i) {
 120                 if (bio->bi_io_vec[i].bv_page == last_page)
 121                         continue;
 122                 last_page = bio->bi_io_vec[i].bv_page;
 123                 __free_page(last_page);
 124         }
 125         bio_put(bio);
 126 }
 127
 128 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
 129 {
 130         int i;
 131
 132         if (!sdev)
 133                 return;
 134
 135         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 136                 struct scrub_bio *sbio = sdev->bios[i];
 137
 138                 if (!sbio)
 139                         break;
 140
 141                 scrub_free_bio(sbio->bio);
 142                 kfree(sbio);
 143         }
 144
 145         scrub_free_csums(sdev);
 146         kfree(sdev);
 147 }
 148
 149 static noinline_for_stack
 150 struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
 151 {
 152         struct scrub_dev *sdev;
 153         int             i;
 154         struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
 155
 156         sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
 157         if (!sdev)
 158                 goto nomem;
 159         sdev->dev = dev;
 160         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 161                 struct scrub_bio *sbio;
 162
 163                 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
 164                 if (!sbio)
 165                         goto nomem;
 166                 sdev->bios[i] = sbio;
 167
 168                 sbio->index = i;
 169                 sbio->sdev = sdev;
 170                 sbio->count = 0;
 171                 sbio->work.func = scrub_checksum;
 172
 173                 if (i != SCRUB_BIOS_PER_DEV-1)
 174                         sdev->bios[i]->next_free = i + 1;
 175                  else
 176                         sdev->bios[i]->next_free = -1;
 177         }
 178         sdev->first_free = 0;
 179         sdev->curr = -1;
 180         atomic_set(&sdev->in_flight, 0);
 181         atomic_set(&sdev->cancel_req, 0);
 182         sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy);
 183         INIT_LIST_HEAD(&sdev->csum_list);
 184
 185         spin_lock_init(&sdev->list_lock);
 186         spin_lock_init(&sdev->stat_lock);
 187         init_waitqueue_head(&sdev->list_wait);
 188         return sdev;
 189
 190 nomem:
 191         scrub_free_dev(sdev);
 192         return ERR_PTR(-ENOMEM);
 193 }
 194
 195 /*
 196  * scrub_recheck_error gets called when either verification of the page
 197  * failed or the bio failed to read, e.g. with EIO. In the latter case,
 198  * recheck_error gets called for every page in the bio, even though only
 199  * one may be bad
 200  */
 201 static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
 202 {
 203         if (sbio->err) {
 204                 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
 205                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 206                                    sbio->bio->bi_io_vec[ix].bv_page) == 0) {
 207                         if (scrub_fixup_check(sbio, ix) == 0)
 208                                 return;
 209                 }
 210         }
 211
 212         scrub_fixup(sbio, ix);
 213 }
 214
 215 static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
 216 {
 217         int ret = 1;
 218         struct page *page;
 219         void *buffer;
 220         u64 flags = sbio->spag[ix].flags;
 221
 222         page = sbio->bio->bi_io_vec[ix].bv_page;
 223         buffer = kmap_atomic(page, KM_USER0);
 224         if (flags & BTRFS_EXTENT_FLAG_DATA) {
 225                 ret = scrub_checksum_data(sbio->sdev,
 226                                           sbio->spag + ix, buffer);
 227         } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 228                 ret = scrub_checksum_tree_block(sbio->sdev,
 229                                                 sbio->spag + ix,
 230                                                 sbio->logical + ix * PAGE_SIZE,
 231                                                 buffer);
 232         } else {
 233                 WARN_ON(1);
 234         }
 235         kunmap_atomic(buffer, KM_USER0);
 236
 237         return ret;
 238 }
 239
 240 static void scrub_fixup_end_io(struct bio *bio, int err)
 241 {
 242         complete((struct completion *)bio->bi_private);
 243 }
 244
 245 static void scrub_fixup(struct scrub_bio *sbio, int ix)
 246 {
 247         struct scrub_dev *sdev = sbio->sdev;
 248         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 249         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 250         struct btrfs_multi_bio *multi = NULL;
 251         u64 logical = sbio->logical + ix * PAGE_SIZE;
 252         u64 length;
 253         int i;
 254         int ret;
 255         DECLARE_COMPLETION_ONSTACK(complete);
 256
 257         if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
 258             (sbio->spag[ix].have_csum == 0)) {
 259                 /*
 260                  * nodatasum, don't try to fix anything
 261                  * FIXME: we can do better, open the inode and trigger a
 262                  * writeback
 263                  */
 264                 goto uncorrectable;
 265         }
 266
 267         length = PAGE_SIZE;
 268         ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
 269                               &multi, 0);
 270         if (ret || !multi || length < PAGE_SIZE) {
 271                 printk(KERN_ERR
 272                        "scrub_fixup: btrfs_map_block failed us for %llu\n",
 273                        (unsigned long long)logical);
 274                 WARN_ON(1);
 275                 return;
 276         }
 277
 278         if (multi->num_stripes == 1)
 279                 /* there aren't any replicas */
 280                 goto uncorrectable;
 281
 282         /*
 283          * first find a good copy
 284          */
 285         for (i = 0; i < multi->num_stripes; ++i) {
 286                 if (i == sbio->spag[ix].mirror_num)
 287                         continue;
 288
 289                 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
 290                                    multi->stripes[i].physical >> 9,
 291                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 292                         /* I/O-error, this is not a good copy */
 293                         continue;
 294                 }
 295
 296                 if (scrub_fixup_check(sbio, ix) == 0)
 297                         break;
 298         }
 299         if (i == multi->num_stripes)
 300                 goto uncorrectable;
 301
 302         if (!sdev->readonly) {
 303                 /*
 304                  * bi_io_vec[ix].bv_page now contains good data, write it back
 305                  */
 306                 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
 307                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 308                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 309                         /* I/O-error, writeback failed, give up */
 310                         goto uncorrectable;
 311                 }
 312         }
 313
 314         kfree(multi);
 315         spin_lock(&sdev->stat_lock);
 316         ++sdev->stat.corrected_errors;
 317         spin_unlock(&sdev->stat_lock);
 318
 319         if (printk_ratelimit())
 320                 printk(KERN_ERR "btrfs: fixed up at %llu\n",
 321                        (unsigned long long)logical);
 322         return;
 323
 324 uncorrectable:
 325         kfree(multi);
 326         spin_lock(&sdev->stat_lock);
 327         ++sdev->stat.uncorrectable_errors;
 328         spin_unlock(&sdev->stat_lock);
 329
 330         if (printk_ratelimit())
 331                 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
 332                          (unsigned long long)logical);
 333 }
 334
 335 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
 336                          struct page *page)
 337 {
 338         struct bio *bio = NULL;
 339         int ret;
 340         DECLARE_COMPLETION_ONSTACK(complete);
 341
 342         bio = bio_alloc(GFP_NOFS, 1);
 343         bio->bi_bdev = bdev;
 344         bio->bi_sector = sector;
 345         bio_add_page(bio, page, PAGE_SIZE, 0);
 346         bio->bi_end_io = scrub_fixup_end_io;
 347         bio->bi_private = &complete;
 348         submit_bio(rw, bio);
 349
 350         /* this will also unplug the queue */
 351         wait_for_completion(&complete);
 352
 353         ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
 354         bio_put(bio);
 355         return ret;
 356 }
 357
 358 static void scrub_bio_end_io(struct bio *bio, int err)
 359 {
 360         struct scrub_bio *sbio = bio->bi_private;
 361         struct scrub_dev *sdev = sbio->sdev;
 362         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 363
 364         sbio->err = err;
 365         sbio->bio = bio;
 366
 367         btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
 368 }
 369
 370 static void scrub_checksum(struct btrfs_work *work)
 371 {
 372         struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
 373         struct scrub_dev *sdev = sbio->sdev;
 374         struct page *page;
 375         void *buffer;
 376         int i;
 377         u64 flags;
 378         u64 logical;
 379         int ret;
 380
 381         if (sbio->err) {
 382                 for (i = 0; i < sbio->count; ++i)
 383                         scrub_recheck_error(sbio, i);
 384
 385                 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
 386                 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
 387                 sbio->bio->bi_phys_segments = 0;
 388                 sbio->bio->bi_idx = 0;
 389
 390                 for (i = 0; i < sbio->count; i++) {
 391                         struct bio_vec *bi;
 392                         bi = &sbio->bio->bi_io_vec[i];
 393                         bi->bv_offset = 0;
 394                         bi->bv_len = PAGE_SIZE;
 395                 }
 396
 397                 spin_lock(&sdev->stat_lock);
 398                 ++sdev->stat.read_errors;
 399                 spin_unlock(&sdev->stat_lock);
 400                 goto out;
 401         }
 402         for (i = 0; i < sbio->count; ++i) {
 403                 page = sbio->bio->bi_io_vec[i].bv_page;
 404                 buffer = kmap_atomic(page, KM_USER0);
 405                 flags = sbio->spag[i].flags;
 406                 logical = sbio->logical + i * PAGE_SIZE;
 407                 ret = 0;
 408                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 409                         ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
 410                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 411                         ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
 412                                                         logical, buffer);
 413                 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
 414                         BUG_ON(i);
 415                         (void)scrub_checksum_super(sbio, buffer);
 416                 } else {
 417                         WARN_ON(1);
 418                 }
 419                 kunmap_atomic(buffer, KM_USER0);
 420                 if (ret)
 421                         scrub_recheck_error(sbio, i);
 422         }
 423
 424 out:
 425         scrub_free_bio(sbio->bio);
 426         sbio->bio = NULL;
 427         spin_lock(&sdev->list_lock);
 428         sbio->next_free = sdev->first_free;
 429         sdev->first_free = sbio->index;
 430         spin_unlock(&sdev->list_lock);
 431         atomic_dec(&sdev->in_flight);
 432         wake_up(&sdev->list_wait);
 433 }
 434
 435 static int scrub_checksum_data(struct scrub_dev *sdev,
 436                                struct scrub_page *spag, void *buffer)
 437 {
 438         u8 csum[BTRFS_CSUM_SIZE];
 439         u32 crc = ~(u32)0;
 440         int fail = 0;
 441         struct btrfs_root *root = sdev->dev->dev_root;
 442
 443         if (!spag->have_csum)
 444                 return 0;
 445
 446         crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
 447         btrfs_csum_final(crc, csum);
 448         if (memcmp(csum, spag->csum, sdev->csum_size))
 449                 fail = 1;
 450
 451         spin_lock(&sdev->stat_lock);
 452         ++sdev->stat.data_extents_scrubbed;
 453         sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
 454         if (fail)
 455                 ++sdev->stat.csum_errors;
 456         spin_unlock(&sdev->stat_lock);
 457
 458         return fail;
 459 }
 460
 461 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
 462                                      struct scrub_page *spag, u64 logical,
 463                                      void *buffer)
 464 {
 465         struct btrfs_header *h;
 466         struct btrfs_root *root = sdev->dev->dev_root;
 467         struct btrfs_fs_info *fs_info = root->fs_info;
 468         u8 csum[BTRFS_CSUM_SIZE];
 469         u32 crc = ~(u32)0;
 470         int fail = 0;
 471         int crc_fail = 0;
 472
 473         /*
 474          * we don't use the getter functions here, as we
 475          * a) don't have an extent buffer and
 476          * b) the page is already kmapped
 477          */
 478         h = (struct btrfs_header *)buffer;
 479
 480         if (logical != le64_to_cpu(h->bytenr))
 481                 ++fail;
 482
 483         if (spag->generation != le64_to_cpu(h->generation))
 484                 ++fail;
 485
 486         if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 487                 ++fail;
 488
 489         if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
 490                    BTRFS_UUID_SIZE))
 491                 ++fail;
 492
 493         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 494                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 495         btrfs_csum_final(crc, csum);
 496         if (memcmp(csum, h->csum, sdev->csum_size))
 497                 ++crc_fail;
 498
 499         spin_lock(&sdev->stat_lock);
 500         ++sdev->stat.tree_extents_scrubbed;
 501         sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
 502         if (crc_fail)
 503                 ++sdev->stat.csum_errors;
 504         if (fail)
 505                 ++sdev->stat.verify_errors;
 506         spin_unlock(&sdev->stat_lock);
 507
 508         return fail || crc_fail;
 509 }
 510
 511 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
 512 {
 513         struct btrfs_super_block *s;
 514         u64 logical;
 515         struct scrub_dev *sdev = sbio->sdev;
 516         struct btrfs_root *root = sdev->dev->dev_root;
 517         struct btrfs_fs_info *fs_info = root->fs_info;
 518         u8 csum[BTRFS_CSUM_SIZE];
 519         u32 crc = ~(u32)0;
 520         int fail = 0;
 521
 522         s = (struct btrfs_super_block *)buffer;
 523         logical = sbio->logical;
 524
 525         if (logical != le64_to_cpu(s->bytenr))
 526                 ++fail;
 527
 528         if (sbio->spag[0].generation != le64_to_cpu(s->generation))
 529                 ++fail;
 530
 531         if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 532                 ++fail;
 533
 534         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 535                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 536         btrfs_csum_final(crc, csum);
 537         if (memcmp(csum, s->csum, sbio->sdev->csum_size))
 538                 ++fail;
 539
 540         if (fail) {
 541                 /*
 542                  * if we find an error in a super block, we just report it.
 543                  * They will get written with the next transaction commit
 544                  * anyway
 545                  */
 546                 spin_lock(&sdev->stat_lock);
 547                 ++sdev->stat.super_errors;
 548                 spin_unlock(&sdev->stat_lock);
 549         }
 550
 551         return fail;
 552 }
 553
 554 static int scrub_submit(struct scrub_dev *sdev)
 555 {
 556         struct scrub_bio *sbio;
 557         struct bio *bio;
 558         int i;
 559
 560         if (sdev->curr == -1)
 561                 return 0;
 562
 563         sbio = sdev->bios[sdev->curr];
 564
 565         bio = bio_alloc(GFP_NOFS, sbio->count);
 566         if (!bio)
 567                 goto nomem;
 568
 569         bio->bi_private = sbio;
 570         bio->bi_end_io = scrub_bio_end_io;
 571         bio->bi_bdev = sdev->dev->bdev;
 572         bio->bi_sector = sbio->physical >> 9;
 573
 574         for (i = 0; i < sbio->count; ++i) {
 575                 struct page *page;
 576                 int ret;
 577
 578                 page = alloc_page(GFP_NOFS);
 579                 if (!page)
 580                         goto nomem;
 581
 582                 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
 583                 if (!ret) {
 584                         __free_page(page);
 585                         goto nomem;
 586                 }
 587         }
 588
 589         sbio->err = 0;
 590         sdev->curr = -1;
 591         atomic_inc(&sdev->in_flight);
 592
 593         submit_bio(READ, bio);
 594
 595         return 0;
 596
 597 nomem:
 598         scrub_free_bio(bio);
 599
 600         return -ENOMEM;
 601 }
 602
 603 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
 604                       u64 physical, u64 flags, u64 gen, u64 mirror_num,
 605                       u8 *csum, int force)
 606 {
 607         struct scrub_bio *sbio;
 608
 609 again:
 610         /*
 611          * grab a fresh bio or wait for one to become available
 612          */
 613         while (sdev->curr == -1) {
 614                 spin_lock(&sdev->list_lock);
 615                 sdev->curr = sdev->first_free;
 616                 if (sdev->curr != -1) {
 617                         sdev->first_free = sdev->bios[sdev->curr]->next_free;
 618                         sdev->bios[sdev->curr]->next_free = -1;
 619                         sdev->bios[sdev->curr]->count = 0;
 620                         spin_unlock(&sdev->list_lock);
 621                 } else {
 622                         spin_unlock(&sdev->list_lock);
 623                         wait_event(sdev->list_wait, sdev->first_free != -1);
 624                 }
 625         }
 626         sbio = sdev->bios[sdev->curr];
 627         if (sbio->count == 0) {
 628                 sbio->physical = physical;
 629                 sbio->logical = logical;
 630         } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
 631                    sbio->logical + sbio->count * PAGE_SIZE != logical) {
 632                 int ret;
 633
 634                 ret = scrub_submit(sdev);
 635                 if (ret)
 636                         return ret;
 637                 goto again;
 638         }
 639         sbio->spag[sbio->count].flags = flags;
 640         sbio->spag[sbio->count].generation = gen;
 641         sbio->spag[sbio->count].have_csum = 0;
 642         sbio->spag[sbio->count].mirror_num = mirror_num;
 643         if (csum) {
 644                 sbio->spag[sbio->count].have_csum = 1;
 645                 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
 646         }
 647         ++sbio->count;
 648         if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
 649                 int ret;
 650
 651                 ret = scrub_submit(sdev);
 652                 if (ret)
 653                         return ret;
 654         }
 655
 656         return 0;
 657 }
 658
 659 static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
 660                            u8 *csum)
 661 {
 662         struct btrfs_ordered_sum *sum = NULL;
 663         int ret = 0;
 664         unsigned long i;
 665         unsigned long num_sectors;
 666         u32 sectorsize = sdev->dev->dev_root->sectorsize;
 667
 668         while (!list_empty(&sdev->csum_list)) {
 669                 sum = list_first_entry(&sdev->csum_list,
 670                                        struct btrfs_ordered_sum, list);
 671                 if (sum->bytenr > logical)
 672                         return 0;
 673                 if (sum->bytenr + sum->len > logical)
 674                         break;
 675
 676                 ++sdev->stat.csum_discards;
 677                 list_del(&sum->list);
 678                 kfree(sum);
 679                 sum = NULL;
 680         }
 681         if (!sum)
 682                 return 0;
 683
 684         num_sectors = sum->len / sectorsize;
 685         for (i = 0; i < num_sectors; ++i) {
 686                 if (sum->sums[i].bytenr == logical) {
 687                         memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
 688                         ret = 1;
 689                         break;
 690                 }
 691         }
 692         if (ret && i == num_sectors - 1) {
 693                 list_del(&sum->list);
 694                 kfree(sum);
 695         }
 696         return ret;
 697 }
 698
 699 /* scrub extent tries to collect up to 64 kB for each bio */
 700 static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
 701                         u64 physical, u64 flags, u64 gen, u64 mirror_num)
 702 {
 703         int ret;
 704         u8 csum[BTRFS_CSUM_SIZE];
 705
 706         while (len) {
 707                 u64 l = min_t(u64, len, PAGE_SIZE);
 708                 int have_csum = 0;
 709
 710                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 711                         /* push csums to sbio */
 712                         have_csum = scrub_find_csum(sdev, logical, l, csum);
 713                         if (have_csum == 0)
 714                                 ++sdev->stat.no_csum;
 715                 }
 716                 ret = scrub_page(sdev, logical, l, physical, flags, gen,
 717                                  mirror_num, have_csum ? csum : NULL, 0);
 718                 if (ret)
 719                         return ret;
 720                 len -= l;
 721                 logical += l;
 722                 physical += l;
 723         }
 724         return 0;
 725 }
 726
 727 static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 728         struct map_lookup *map, int num, u64 base, u64 length)
 729 {
 730         struct btrfs_path *path;
 731         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 732         struct btrfs_root *root = fs_info->extent_root;
 733         struct btrfs_root *csum_root = fs_info->csum_root;
 734         struct btrfs_extent_item *extent;
 735         struct blk_plug plug;
 736         u64 flags;
 737         int ret;
 738         int slot;
 739         int i;
 740         u64 nstripes;
 741         struct extent_buffer *l;
 742         struct btrfs_key key;
 743         u64 physical;
 744         u64 logical;
 745         u64 generation;
 746         u64 mirror_num;
 747         struct reada_control *reada1;
 748         struct reada_control *reada2;
 749         struct btrfs_key key_start;
 750         struct btrfs_key key_end;
 751
 752         u64 increment = map->stripe_len;
 753         u64 offset;
 754
 755         nstripes = length;
 756         offset = 0;
 757         do_div(nstripes, map->stripe_len);
 758         if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 759                 offset = map->stripe_len * num;
 760                 increment = map->stripe_len * map->num_stripes;
 761                 mirror_num = 0;
 762         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 763                 int factor = map->num_stripes / map->sub_stripes;
 764                 offset = map->stripe_len * (num / map->sub_stripes);
 765                 increment = map->stripe_len * factor;
 766                 mirror_num = num % map->sub_stripes;
 767         } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
 768                 increment = map->stripe_len;
 769                 mirror_num = num % map->num_stripes;
 770         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
 771                 increment = map->stripe_len;
 772                 mirror_num = num % map->num_stripes;
 773         } else {
 774                 increment = map->stripe_len;
 775                 mirror_num = 0;
 776         }
 777
 778         path = btrfs_alloc_path();
 779         if (!path)
 780                 return -ENOMEM;
 781
 782         path->search_commit_root = 1;
 783         path->skip_locking = 1;
 784
 785         /*
 786          * trigger the readahead for extent tree csum tree and wait for
 787          * completion. During readahead, the scrub is officially paused
 788          * to not hold off transaction commits
 789          */
 790         logical = base + offset;
 791
 792         wait_event(sdev->list_wait,
 793                    atomic_read(&sdev->in_flight) == 0);
 794         atomic_inc(&fs_info->scrubs_paused);
 795         wake_up(&fs_info->scrub_pause_wait);
 796
 797         /* FIXME it might be better to start readahead at commit root */
 798         key_start.objectid = logical;
 799         key_start.type = BTRFS_EXTENT_ITEM_KEY;
 800         key_start.offset = (u64)0;
 801         key_end.objectid = base + offset + nstripes * increment;
 802         key_end.type = BTRFS_EXTENT_ITEM_KEY;
 803         key_end.offset = (u64)0;
 804         reada1 = btrfs_reada_add(root, &key_start, &key_end);
 805
 806         key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
 807         key_start.type = BTRFS_EXTENT_CSUM_KEY;
 808         key_start.offset = logical;
 809         key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
 810         key_end.type = BTRFS_EXTENT_CSUM_KEY;
 811         key_end.offset = base + offset + nstripes * increment;
 812         reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
 813
 814         if (!IS_ERR(reada1))
 815                 btrfs_reada_wait(reada1);
 816         if (!IS_ERR(reada2))
 817                 btrfs_reada_wait(reada2);
 818
 819         mutex_lock(&fs_info->scrub_lock);
 820         while (atomic_read(&fs_info->scrub_pause_req)) {
 821                 mutex_unlock(&fs_info->scrub_lock);
 822                 wait_event(fs_info->scrub_pause_wait,
 823                    atomic_read(&fs_info->scrub_pause_req) == 0);
 824                 mutex_lock(&fs_info->scrub_lock);
 825         }
 826         atomic_dec(&fs_info->scrubs_paused);
 827         mutex_unlock(&fs_info->scrub_lock);
 828         wake_up(&fs_info->scrub_pause_wait);
 829
 830         /*
 831          * collect all data csums for the stripe to avoid seeking during
 832          * the scrub. This might currently (crc32) end up to be about 1MB
 833          */
 834         blk_start_plug(&plug);
 835
 836         /*
 837          * now find all extents for each stripe and scrub them
 838          */
 839         logical = base + offset;
 840         physical = map->stripes[num].physical;
 841         ret = 0;
 842         for (i = 0; i < nstripes; ++i) {
 843                 /*
 844                  * canceled?
 845                  */
 846                 if (atomic_read(&fs_info->scrub_cancel_req) ||
 847                     atomic_read(&sdev->cancel_req)) {
 848                         ret = -ECANCELED;
 849                         goto out;
 850                 }
 851                 /*
 852                  * check to see if we have to pause
 853                  */
 854                 if (atomic_read(&fs_info->scrub_pause_req)) {
 855                         /* push queued extents */
 856                         scrub_submit(sdev);
 857                         wait_event(sdev->list_wait,
 858                                    atomic_read(&sdev->in_flight) == 0);
 859                         atomic_inc(&fs_info->scrubs_paused);
 860                         wake_up(&fs_info->scrub_pause_wait);
 861                         mutex_lock(&fs_info->scrub_lock);
 862                         while (atomic_read(&fs_info->scrub_pause_req)) {
 863                                 mutex_unlock(&fs_info->scrub_lock);
 864                                 wait_event(fs_info->scrub_pause_wait,
 865                                    atomic_read(&fs_info->scrub_pause_req) == 0);
 866                                 mutex_lock(&fs_info->scrub_lock);
 867                         }
 868                         atomic_dec(&fs_info->scrubs_paused);
 869                         mutex_unlock(&fs_info->scrub_lock);
 870                         wake_up(&fs_info->scrub_pause_wait);
 871                 }
 872
 873                 ret = btrfs_lookup_csums_range(csum_root, logical,
 874                                                logical + map->stripe_len - 1,
 875                                                &sdev->csum_list, 1);
 876                 if (ret)
 877                         goto out;
 878
 879                 key.objectid = logical;
 880                 key.type = BTRFS_EXTENT_ITEM_KEY;
 881                 key.offset = (u64)0;
 882
 883                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 884                 if (ret < 0)
 885                         goto out;
 886                 if (ret > 0) {
 887                         ret = btrfs_previous_item(root, path, 0,
 888                                                   BTRFS_EXTENT_ITEM_KEY);
 889                         if (ret < 0)
 890                                 goto out;
 891                         if (ret > 0) {
 892                                 /* there's no smaller item, so stick with the
 893                                  * larger one */
 894                                 btrfs_release_path(path);
 895                                 ret = btrfs_search_slot(NULL, root, &key,
 896                                                         path, 0, 0);
 897                                 if (ret < 0)
 898                                         goto out;
 899                         }
 900                 }
 901
 902                 while (1) {
 903                         l = path->nodes[0];
 904                         slot = path->slots[0];
 905                         if (slot >= btrfs_header_nritems(l)) {
 906                                 ret = btrfs_next_leaf(root, path);
 907                                 if (ret == 0)
 908                                         continue;
 909                                 if (ret < 0)
 910                                         goto out;
 911
 912                                 break;
 913                         }
 914                         btrfs_item_key_to_cpu(l, &key, slot);
 915
 916                         if (key.objectid + key.offset <= logical)
 917                                 goto next;
 918
 919                         if (key.objectid >= logical + map->stripe_len)
 920                                 break;
 921
 922                         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
 923                                 goto next;
 924
 925                         extent = btrfs_item_ptr(l, slot,
 926                                                 struct btrfs_extent_item);
 927                         flags = btrfs_extent_flags(l, extent);
 928                         generation = btrfs_extent_generation(l, extent);
 929
 930                         if (key.objectid < logical &&
 931                             (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
 932                                 printk(KERN_ERR
 933                                        "btrfs scrub: tree block %llu spanning "
 934                                        "stripes, ignored. logical=%llu\n",
 935                                        (unsigned long long)key.objectid,
 936                                        (unsigned long long)logical);
 937                                 goto next;
 938                         }
 939
 940                         /*
 941                          * trim extent to this stripe
 942                          */
 943                         if (key.objectid < logical) {
 944                                 key.offset -= logical - key.objectid;
 945                                 key.objectid = logical;
 946                         }
 947                         if (key.objectid + key.offset >
 948                             logical + map->stripe_len) {
 949                                 key.offset = logical + map->stripe_len -
 950                                              key.objectid;
 951                         }
 952
 953                         ret = scrub_extent(sdev, key.objectid, key.offset,
 954                                            key.objectid - logical + physical,
 955                                            flags, generation, mirror_num);
 956                         if (ret)
 957                                 goto out;
 958
 959 next:
 960                         path->slots[0]++;
 961                 }
 962                 btrfs_release_path(path);
 963                 logical += increment;
 964                 physical += map->stripe_len;
 965                 spin_lock(&sdev->stat_lock);
 966                 sdev->stat.last_physical = physical;
 967                 spin_unlock(&sdev->stat_lock);
 968         }
 969         /* push queued extents */
 970         scrub_submit(sdev);
 971
 972 out:
 973         blk_finish_plug(&plug);
 974         btrfs_free_path(path);
 975         return ret < 0 ? ret : 0;
 976 }
 977
 978 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
 979         u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
 980 {
 981         struct btrfs_mapping_tree *map_tree =
 982                 &sdev->dev->dev_root->fs_info->mapping_tree;
 983         struct map_lookup *map;
 984         struct extent_map *em;
 985         int i;
 986         int ret = -EINVAL;
 987
 988         read_lock(&map_tree->map_tree.lock);
 989         em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
 990         read_unlock(&map_tree->map_tree.lock);
 991
 992         if (!em)
 993                 return -EINVAL;
 994
 995         map = (struct map_lookup *)em->bdev;
 996         if (em->start != chunk_offset)
 997                 goto out;
 998
 999         if (em->len < length)
1000                 goto out;
1001
1002         for (i = 0; i < map->num_stripes; ++i) {
1003                 if (map->stripes[i].dev == sdev->dev) {
1004                         ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1005                         if (ret)
1006                                 goto out;
1007                 }
1008         }
1009 out:
1010         free_extent_map(em);
1011
1012         return ret;
1013 }
1014
1015 static noinline_for_stack
1016 int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1017 {
1018         struct btrfs_dev_extent *dev_extent = NULL;
1019         struct btrfs_path *path;
1020         struct btrfs_root *root = sdev->dev->dev_root;
1021         struct btrfs_fs_info *fs_info = root->fs_info;
1022         u64 length;
1023         u64 chunk_tree;
1024         u64 chunk_objectid;
1025         u64 chunk_offset;
1026         int ret;
1027         int slot;
1028         struct extent_buffer *l;
1029         struct btrfs_key key;
1030         struct btrfs_key found_key;
1031         struct btrfs_block_group_cache *cache;
1032
1033         path = btrfs_alloc_path();
1034         if (!path)
1035                 return -ENOMEM;
1036
1037         path->reada = 2;
1038         path->search_commit_root = 1;
1039         path->skip_locking = 1;
1040
1041         key.objectid = sdev->dev->devid;
1042         key.offset = 0ull;
1043         key.type = BTRFS_DEV_EXTENT_KEY;
1044
1045
1046         while (1) {
1047                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1048                 if (ret < 0)
1049                         break;
1050                 if (ret > 0) {
1051                         if (path->slots[0] >=
1052                             btrfs_header_nritems(path->nodes[0])) {
1053                                 ret = btrfs_next_leaf(root, path);
1054                                 if (ret)
1055                                         break;
1056                         }
1057                 }
1058
1059                 l = path->nodes[0];
1060                 slot = path->slots[0];
1061
1062                 btrfs_item_key_to_cpu(l, &found_key, slot);
1063
1064                 if (found_key.objectid != sdev->dev->devid)
1065                         break;
1066
1067                 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1068                         break;
1069
1070                 if (found_key.offset >= end)
1071                         break;
1072
1073                 if (found_key.offset < key.offset)
1074                         break;
1075
1076                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1077                 length = btrfs_dev_extent_length(l, dev_extent);
1078
1079                 if (found_key.offset + length <= start) {
1080                         key.offset = found_key.offset + length;
1081                         btrfs_release_path(path);
1082                         continue;
1083                 }
1084
1085                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1086                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1087                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1088
1089                 /*
1090                  * get a reference on the corresponding block group to prevent
1091                  * the chunk from going away while we scrub it
1092                  */
1093                 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1094                 if (!cache) {
1095                         ret = -ENOENT;
1096                         break;
1097                 }
1098                 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1099                                   chunk_offset, length);
1100                 btrfs_put_block_group(cache);
1101                 if (ret)
1102                         break;
1103
1104                 key.offset = found_key.offset + length;
1105                 btrfs_release_path(path);
1106         }
1107
1108         btrfs_free_path(path);
1109
1110         /*
1111          * ret can still be 1 from search_slot or next_leaf,
1112          * that's not an error
1113          */
1114         return ret < 0 ? ret : 0;
1115 }
1116
1117 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1118 {
1119         int     i;
1120         u64     bytenr;
1121         u64     gen;
1122         int     ret;
1123         struct btrfs_device *device = sdev->dev;
1124         struct btrfs_root *root = device->dev_root;
1125
1126         gen = root->fs_info->last_trans_committed;
1127
1128         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1129                 bytenr = btrfs_sb_offset(i);
1130                 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1131                         break;
1132
1133                 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1134                                  BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1135                 if (ret)
1136                         return ret;
1137         }
1138         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1139
1140         return 0;
1141 }
1142
1143 /*
1144  * get a reference count on fs_info->scrub_workers. start worker if necessary
1145  */
1146 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1147 {
1148         struct btrfs_fs_info *fs_info = root->fs_info;
1149
1150         mutex_lock(&fs_info->scrub_lock);
1151         if (fs_info->scrub_workers_refcnt == 0) {
1152                 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1153                            fs_info->thread_pool_size, &fs_info->generic_worker);
1154                 fs_info->scrub_workers.idle_thresh = 4;
1155                 btrfs_start_workers(&fs_info->scrub_workers, 1);
1156         }
1157         ++fs_info->scrub_workers_refcnt;
1158         mutex_unlock(&fs_info->scrub_lock);
1159
1160         return 0;
1161 }
1162
1163 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1164 {
1165         struct btrfs_fs_info *fs_info = root->fs_info;
1166
1167         mutex_lock(&fs_info->scrub_lock);
1168         if (--fs_info->scrub_workers_refcnt == 0)
1169                 btrfs_stop_workers(&fs_info->scrub_workers);
1170         WARN_ON(fs_info->scrub_workers_refcnt < 0);
1171         mutex_unlock(&fs_info->scrub_lock);
1172 }
1173
1174
1175 int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1176                     struct btrfs_scrub_progress *progress, int readonly)
1177 {
1178         struct scrub_dev *sdev;
1179         struct btrfs_fs_info *fs_info = root->fs_info;
1180         int ret;
1181         struct btrfs_device *dev;
1182
1183         if (btrfs_fs_closing(root->fs_info))
1184                 return -EINVAL;
1185
1186         /*
1187          * check some assumptions
1188          */
1189         if (root->sectorsize != PAGE_SIZE ||
1190             root->sectorsize != root->leafsize ||
1191             root->sectorsize != root->nodesize) {
1192                 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1193                 return -EINVAL;
1194         }
1195
1196         ret = scrub_workers_get(root);
1197         if (ret)
1198                 return ret;
1199
1200         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1201         dev = btrfs_find_device(root, devid, NULL, NULL);
1202         if (!dev || dev->missing) {
1203                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1204                 scrub_workers_put(root);
1205                 return -ENODEV;
1206         }
1207         mutex_lock(&fs_info->scrub_lock);
1208
1209         if (!dev->in_fs_metadata) {
1210                 mutex_unlock(&fs_info->scrub_lock);
1211                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1212                 scrub_workers_put(root);
1213                 return -ENODEV;
1214         }
1215
1216         if (dev->scrub_device) {
1217                 mutex_unlock(&fs_info->scrub_lock);
1218                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1219                 scrub_workers_put(root);
1220                 return -EINPROGRESS;
1221         }
1222         sdev = scrub_setup_dev(dev);
1223         if (IS_ERR(sdev)) {
1224                 mutex_unlock(&fs_info->scrub_lock);
1225                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1226                 scrub_workers_put(root);
1227                 return PTR_ERR(sdev);
1228         }
1229         sdev->readonly = readonly;
1230         dev->scrub_device = sdev;
1231
1232         atomic_inc(&fs_info->scrubs_running);
1233         mutex_unlock(&fs_info->scrub_lock);
1234         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1235
1236         down_read(&fs_info->scrub_super_lock);
1237         ret = scrub_supers(sdev);
1238         up_read(&fs_info->scrub_super_lock);
1239
1240         if (!ret)
1241                 ret = scrub_enumerate_chunks(sdev, start, end);
1242
1243         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1244
1245         atomic_dec(&fs_info->scrubs_running);
1246         wake_up(&fs_info->scrub_pause_wait);
1247
1248         if (progress)
1249                 memcpy(progress, &sdev->stat, sizeof(*progress));
1250
1251         mutex_lock(&fs_info->scrub_lock);
1252         dev->scrub_device = NULL;
1253         mutex_unlock(&fs_info->scrub_lock);
1254
1255         scrub_free_dev(sdev);
1256         scrub_workers_put(root);
1257
1258         return ret;
1259 }
1260
1261 int btrfs_scrub_pause(struct btrfs_root *root)
1262 {
1263         struct btrfs_fs_info *fs_info = root->fs_info;
1264
1265         mutex_lock(&fs_info->scrub_lock);
1266         atomic_inc(&fs_info->scrub_pause_req);
1267         while (atomic_read(&fs_info->scrubs_paused) !=
1268                atomic_read(&fs_info->scrubs_running)) {
1269                 mutex_unlock(&fs_info->scrub_lock);
1270                 wait_event(fs_info->scrub_pause_wait,
1271                            atomic_read(&fs_info->scrubs_paused) ==
1272                            atomic_read(&fs_info->scrubs_running));
1273                 mutex_lock(&fs_info->scrub_lock);
1274         }
1275         mutex_unlock(&fs_info->scrub_lock);
1276
1277         return 0;
1278 }
1279
1280 int btrfs_scrub_continue(struct btrfs_root *root)
1281 {
1282         struct btrfs_fs_info *fs_info = root->fs_info;
1283
1284         atomic_dec(&fs_info->scrub_pause_req);
1285         wake_up(&fs_info->scrub_pause_wait);
1286         return 0;
1287 }
1288
1289 int btrfs_scrub_pause_super(struct btrfs_root *root)
1290 {
1291         down_write(&root->fs_info->scrub_super_lock);
1292         return 0;
1293 }
1294
1295 int btrfs_scrub_continue_super(struct btrfs_root *root)
1296 {
1297         up_write(&root->fs_info->scrub_super_lock);
1298         return 0;
1299 }
1300
1301 int btrfs_scrub_cancel(struct btrfs_root *root)
1302 {
1303         struct btrfs_fs_info *fs_info = root->fs_info;
1304
1305         mutex_lock(&fs_info->scrub_lock);
1306         if (!atomic_read(&fs_info->scrubs_running)) {
1307                 mutex_unlock(&fs_info->scrub_lock);
1308                 return -ENOTCONN;
1309         }
1310
1311         atomic_inc(&fs_info->scrub_cancel_req);
1312         while (atomic_read(&fs_info->scrubs_running)) {
1313                 mutex_unlock(&fs_info->scrub_lock);
1314                 wait_event(fs_info->scrub_pause_wait,
1315                            atomic_read(&fs_info->scrubs_running) == 0);
1316                 mutex_lock(&fs_info->scrub_lock);
1317         }
1318         atomic_dec(&fs_info->scrub_cancel_req);
1319         mutex_unlock(&fs_info->scrub_lock);
1320
1321         return 0;
1322 }
1323
1324 int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1325 {
1326         struct btrfs_fs_info *fs_info = root->fs_info;
1327         struct scrub_dev *sdev;
1328
1329         mutex_lock(&fs_info->scrub_lock);
1330         sdev = dev->scrub_device;
1331         if (!sdev) {
1332                 mutex_unlock(&fs_info->scrub_lock);
1333                 return -ENOTCONN;
1334         }
1335         atomic_inc(&sdev->cancel_req);
1336         while (dev->scrub_device) {
1337                 mutex_unlock(&fs_info->scrub_lock);
1338                 wait_event(fs_info->scrub_pause_wait,
1339                            dev->scrub_device == NULL);
1340                 mutex_lock(&fs_info->scrub_lock);
1341         }
1342         mutex_unlock(&fs_info->scrub_lock);
1343
1344         return 0;
1345 }
1346 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1347 {
1348         struct btrfs_fs_info *fs_info = root->fs_info;
1349         struct btrfs_device *dev;
1350         int ret;
1351
1352         /*
1353          * we have to hold the device_list_mutex here so the device
1354          * does not go away in cancel_dev. FIXME: find a better solution
1355          */
1356         mutex_lock(&fs_info->fs_devices->device_list_mutex);
1357         dev = btrfs_find_device(root, devid, NULL, NULL);
1358         if (!dev) {
1359                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1360                 return -ENODEV;
1361         }
1362         ret = btrfs_scrub_cancel_dev(root, dev);
1363         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1364
1365         return ret;
1366 }
1367
1368 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1369                          struct btrfs_scrub_progress *progress)
1370 {
1371         struct btrfs_device *dev;
1372         struct scrub_dev *sdev = NULL;
1373
1374         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1375         dev = btrfs_find_device(root, devid, NULL, NULL);
1376         if (dev)
1377                 sdev = dev->scrub_device;
1378         if (sdev)
1379                 memcpy(progress, &sdev->stat, sizeof(*progress));
1380         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1381
1382         return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1383 }