release/src-rt-6.x.4708/linux/linux-2.6.36/fs/jffs2/gc.c

   1 /*
   2  * JFFS2 -- Journalling Flash File System, Version 2.
   3  *
   4  * Copyright © 2001-2007 Red Hat, Inc.
   5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
   6  *
   7  * Created by David Woodhouse <dwmw2@infradead.org>
   8  *
   9  * For licensing information, see the file 'LICENCE' in this directory.
  10  *
  11  */
  12
  13 #include <linux/kernel.h>
  14 #include <linux/mtd/mtd.h>
  15 #include <linux/slab.h>
  16 #include <linux/pagemap.h>
  17 #include <linux/crc32.h>
  18 #include <linux/compiler.h>
  19 #include <linux/stat.h>
  20 #include "nodelist.h"
  21 #include "compr.h"
  22
  23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
  24                                           struct jffs2_inode_cache *ic,
  25                                           struct jffs2_raw_node_ref *raw);
  26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  27                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
  28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  29                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  33                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  34                                       uint32_t start, uint32_t end);
  35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  36                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  37                                        uint32_t start, uint32_t end);
  38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
  39                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
  40
  41 /* Called with erase_completion_lock held */
  42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
  43 {
  44         struct jffs2_eraseblock *ret;
  45         struct list_head *nextlist = NULL;
  46         int n = jiffies % 128;
  47
  48         /* Pick an eraseblock to garbage collect next. This is where we'll
  49            put the clever wear-levelling algorithms. Eventually.  */
  50         /* We possibly want to favour the dirtier blocks more when the
  51            number of free blocks is low. */
  52 again:
  53         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
  54                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
  55                 nextlist = &c->bad_used_list;
  56         } else if (n < 50 && !list_empty(&c->erasable_list)) {
  57                 /* Note that most of them will have gone directly to be erased.
  58                    So don't favour the erasable_list _too_ much. */
  59                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
  60                 nextlist = &c->erasable_list;
  61         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
  62                 /* Most of the time, pick one off the very_dirty list */
  63                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
  64                 nextlist = &c->very_dirty_list;
  65         } else if (n < 126 && !list_empty(&c->dirty_list)) {
  66                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
  67                 nextlist = &c->dirty_list;
  68         } else if (!list_empty(&c->clean_list)) {
  69                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
  70                 nextlist = &c->clean_list;
  71         } else if (!list_empty(&c->dirty_list)) {
  72                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
  73
  74                 nextlist = &c->dirty_list;
  75         } else if (!list_empty(&c->very_dirty_list)) {
  76                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
  77                 nextlist = &c->very_dirty_list;
  78         } else if (!list_empty(&c->erasable_list)) {
  79                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
  80
  81                 nextlist = &c->erasable_list;
  82         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
  83                 /* There are blocks are wating for the wbuf sync */
  84                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
  85                 spin_unlock(&c->erase_completion_lock);
  86                 jffs2_flush_wbuf_pad(c);
  87                 spin_lock(&c->erase_completion_lock);
  88                 goto again;
  89         } else {
  90                 /* Eep. All were empty */
  91                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
  92                 return NULL;
  93         }
  94
  95         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
  96         list_del(&ret->list);
  97         c->gcblock = ret;
  98         ret->gc_node = ret->first_node;
  99         if (!ret->gc_node) {
 100                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
 101                 BUG();
 102         }
 103
 104         /* Have we accidentally picked a clean block with wasted space ? */
 105         if (ret->wasted_size) {
 106                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
 107                 ret->dirty_size += ret->wasted_size;
 108                 c->wasted_size -= ret->wasted_size;
 109                 c->dirty_size += ret->wasted_size;
 110                 ret->wasted_size = 0;
 111         }
 112
 113         return ret;
 114 }
 115
 116 /* jffs2_garbage_collect_pass
 117  * Make a single attempt to progress GC. Move one node, and possibly
 118  * start erasing one eraseblock.
 119  */
 120 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 121 {
 122         struct jffs2_inode_info *f;
 123         struct jffs2_inode_cache *ic;
 124         struct jffs2_eraseblock *jeb;
 125         struct jffs2_raw_node_ref *raw;
 126         uint32_t gcblock_dirty;
 127         int ret = 0, inum, nlink;
 128         int xattr = 0;
 129
 130         if (mutex_lock_interruptible(&c->alloc_sem))
 131                 return -EINTR;
 132
 133         for (;;) {
 134                 spin_lock(&c->erase_completion_lock);
 135                 if (!c->unchecked_size)
 136                         break;
 137
 138                 /* We can't start doing GC yet. We haven't finished checking
 139                    the node CRCs etc. Do it now. */
 140
 141                 /* checked_ino is protected by the alloc_sem */
 142                 if (c->checked_ino > c->highest_ino && xattr) {
 143                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
 144                                c->unchecked_size);
 145                         jffs2_dbg_dump_block_lists_nolock(c);
 146                         spin_unlock(&c->erase_completion_lock);
 147                         mutex_unlock(&c->alloc_sem);
 148                         return -ENOSPC;
 149                 }
 150
 151                 spin_unlock(&c->erase_completion_lock);
 152
 153                 if (!xattr)
 154                         xattr = jffs2_verify_xattr(c);
 155
 156                 spin_lock(&c->inocache_lock);
 157
 158                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
 159
 160                 if (!ic) {
 161                         spin_unlock(&c->inocache_lock);
 162                         continue;
 163                 }
 164
 165                 if (!ic->pino_nlink) {
 166                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
 167                                   ic->ino));
 168                         spin_unlock(&c->inocache_lock);
 169                         jffs2_xattr_delete_inode(c, ic);
 170                         continue;
 171                 }
 172                 switch(ic->state) {
 173                 case INO_STATE_CHECKEDABSENT:
 174                 case INO_STATE_PRESENT:
 175                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
 176                         spin_unlock(&c->inocache_lock);
 177                         continue;
 178
 179                 case INO_STATE_GC:
 180                 case INO_STATE_CHECKING:
 181                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
 182                         spin_unlock(&c->inocache_lock);
 183                         BUG();
 184
 185                 case INO_STATE_READING:
 186                         /* We need to wait for it to finish, lest we move on
 187                            and trigger the BUG() above while we haven't yet
 188                            finished checking all its nodes */
 189                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
 190                         /* We need to come back again for the _same_ inode. We've
 191                          made no progress in this case, but that should be OK */
 192                         c->checked_ino--;
 193
 194                         mutex_unlock(&c->alloc_sem);
 195                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 196                         return 0;
 197
 198                 default:
 199                         BUG();
 200
 201                 case INO_STATE_UNCHECKED:
 202                         ;
 203                 }
 204                 ic->state = INO_STATE_CHECKING;
 205                 spin_unlock(&c->inocache_lock);
 206
 207                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
 208
 209                 ret = jffs2_do_crccheck_inode(c, ic);
 210                 if (ret)
 211                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
 212
 213                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
 214                 mutex_unlock(&c->alloc_sem);
 215                 return ret;
 216         }
 217
 218         /* If there are any blocks which need erasing, erase them now */
 219         if (!list_empty(&c->erase_complete_list) ||
 220             !list_empty(&c->erase_pending_list)) {
 221                 spin_unlock(&c->erase_completion_lock);
 222                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
 223                 if (jffs2_erase_pending_blocks(c, 1)) {
 224                         mutex_unlock(&c->alloc_sem);
 225                         return 0;
 226                 }
 227                 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
 228                 spin_lock(&c->erase_completion_lock);
 229         }
 230
 231         /* First, work out which block we're garbage-collecting */
 232         jeb = c->gcblock;
 233
 234         if (!jeb)
 235                 jeb = jffs2_find_gc_block(c);
 236
 237         if (!jeb) {
 238                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
 239                 if (c->nr_erasing_blocks) {
 240                         spin_unlock(&c->erase_completion_lock);
 241                         mutex_unlock(&c->alloc_sem);
 242                         return -EAGAIN;
 243                 }
 244                 D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
 245                 spin_unlock(&c->erase_completion_lock);
 246                 mutex_unlock(&c->alloc_sem);
 247                 return -EIO;
 248         }
 249
 250         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
 251         D1(if (c->nextblock)
 252            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
 253
 254         if (!jeb->used_size) {
 255                 mutex_unlock(&c->alloc_sem);
 256                 goto eraseit;
 257         }
 258
 259         raw = jeb->gc_node;
 260         gcblock_dirty = jeb->dirty_size;
 261
 262         while(ref_obsolete(raw)) {
 263                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
 264                 raw = ref_next(raw);
 265                 if (unlikely(!raw)) {
 266                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
 267                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
 268                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
 269                         jeb->gc_node = raw;
 270                         spin_unlock(&c->erase_completion_lock);
 271                         mutex_unlock(&c->alloc_sem);
 272                         BUG();
 273                 }
 274         }
 275         jeb->gc_node = raw;
 276
 277         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
 278
 279         if (!raw->next_in_ino) {
 280                 /* Inode-less node. Clean marker, snapshot or something like that */
 281                 spin_unlock(&c->erase_completion_lock);
 282                 if (ref_flags(raw) == REF_PRISTINE) {
 283                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
 284                         jffs2_garbage_collect_pristine(c, NULL, raw);
 285                 } else {
 286                         /* Just mark it obsolete */
 287                         jffs2_mark_node_obsolete(c, raw);
 288                 }
 289                 mutex_unlock(&c->alloc_sem);
 290                 goto eraseit_lock;
 291         }
 292
 293         ic = jffs2_raw_ref_to_ic(raw);
 294
 295 #ifdef CONFIG_JFFS2_FS_XATTR
 296         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
 297          * We can decide whether this node is inode or xattr by ic->class.     */
 298         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
 299             || ic->class == RAWNODE_CLASS_XATTR_REF) {
 300                 spin_unlock(&c->erase_completion_lock);
 301
 302                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
 303                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
 304                 } else {
 305                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
 306                 }
 307                 goto test_gcnode;
 308         }
 309 #endif
 310
 311         /* We need to hold the inocache. Either the erase_completion_lock or
 312            the inocache_lock are sufficient; we trade down since the inocache_lock
 313            causes less contention. */
 314         spin_lock(&c->inocache_lock);
 315
 316         spin_unlock(&c->erase_completion_lock);
 317
 318         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
 319
 320         /* Three possibilities:
 321            1. Inode is already in-core. We must iget it and do proper
 322               updating to its fragtree, etc.
 323            2. Inode is not in-core, node is REF_PRISTINE. We lock the
 324               inocache to prevent a read_inode(), copy the node intact.
 325            3. Inode is not in-core, node is not pristine. We must iget()
 326               and take the slow path.
 327         */
 328
 329         switch(ic->state) {
 330         case INO_STATE_CHECKEDABSENT:
 331                 /* It's been checked, but it's not currently in-core.
 332                    We can just copy any pristine nodes, but have
 333                    to prevent anyone else from doing read_inode() while
 334                    we're at it, so we set the state accordingly */
 335                 if (ref_flags(raw) == REF_PRISTINE)
 336                         ic->state = INO_STATE_GC;
 337                 else {
 338                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
 339                                   ic->ino));
 340                 }
 341                 break;
 342
 343         case INO_STATE_PRESENT:
 344                 /* It's in-core. GC must iget() it. */
 345                 break;
 346
 347         case INO_STATE_UNCHECKED:
 348         case INO_STATE_CHECKING:
 349         case INO_STATE_GC:
 350                 /* Should never happen. We should have finished checking
 351                    by the time we actually start doing any GC, and since
 352                    we're holding the alloc_sem, no other garbage collection
 353                    can happen.
 354                 */
 355                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
 356                        ic->ino, ic->state);
 357                 mutex_unlock(&c->alloc_sem);
 358                 spin_unlock(&c->inocache_lock);
 359                 BUG();
 360
 361         case INO_STATE_READING:
 362                 /* Someone's currently trying to read it. We must wait for
 363                    them to finish and then go through the full iget() route
 364                    to do the GC. However, sometimes read_inode() needs to get
 365                    the alloc_sem() (for marking nodes invalid) so we must
 366                    drop the alloc_sem before sleeping. */
 367
 368                 mutex_unlock(&c->alloc_sem);
 369                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
 370                           ic->ino, ic->state));
 371                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 372                 /* And because we dropped the alloc_sem we must start again from the
 373                    beginning. Ponder chance of livelock here -- we're returning success
 374                    without actually making any progress.
 375
 376                    Q: What are the chances that the inode is back in INO_STATE_READING
 377                    again by the time we next enter this function? And that this happens
 378                    enough times to cause a real delay?
 379
 380                    A: Small enough that I don't care :)
 381                 */
 382                 return 0;
 383         }
 384
 385         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
 386            node intact, and we don't have to muck about with the fragtree etc.
 387            because we know it's not in-core. If it _was_ in-core, we go through
 388            all the iget() crap anyway */
 389
 390         if (ic->state == INO_STATE_GC) {
 391                 spin_unlock(&c->inocache_lock);
 392
 393                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
 394
 395                 spin_lock(&c->inocache_lock);
 396                 ic->state = INO_STATE_CHECKEDABSENT;
 397                 wake_up(&c->inocache_wq);
 398
 399                 if (ret != -EBADFD) {
 400                         spin_unlock(&c->inocache_lock);
 401                         goto test_gcnode;
 402                 }
 403
 404                 /* Fall through if it wanted us to, with inocache_lock held */
 405         }
 406
 407         /* Prevent the fairly unlikely race where the gcblock is
 408            entirely obsoleted by the final close of a file which had
 409            the only valid nodes in the block, followed by erasure,
 410            followed by freeing of the ic because the erased block(s)
 411            held _all_ the nodes of that inode.... never been seen but
 412            it's vaguely possible. */
 413
 414         inum = ic->ino;
 415         nlink = ic->pino_nlink;
 416         spin_unlock(&c->inocache_lock);
 417
 418         f = jffs2_gc_fetch_inode(c, inum, !nlink);
 419         if (IS_ERR(f)) {
 420                 ret = PTR_ERR(f);
 421                 goto release_sem;
 422         }
 423         if (!f) {
 424                 ret = 0;
 425                 goto release_sem;
 426         }
 427
 428         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
 429
 430         jffs2_gc_release_inode(c, f);
 431
 432  test_gcnode:
 433         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
 434                 /* Eep. This really should never happen. GC is broken */
 435                 printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
 436                 ret = -ENOSPC;
 437         }
 438  release_sem:
 439         mutex_unlock(&c->alloc_sem);
 440
 441  eraseit_lock:
 442         /* If we've finished this block, start it erasing */
 443         spin_lock(&c->erase_completion_lock);
 444
 445  eraseit:
 446         if (c->gcblock && !c->gcblock->used_size) {
 447                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
 448                 /* We're GC'ing an empty block? */
 449                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
 450                 c->gcblock = NULL;
 451                 c->nr_erasing_blocks++;
 452                 jffs2_garbage_collect_trigger(c);
 453         }
 454         spin_unlock(&c->erase_completion_lock);
 455
 456         return ret;
 457 }
 458
 459 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
 460                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
 461 {
 462         struct jffs2_node_frag *frag;
 463         struct jffs2_full_dnode *fn = NULL;
 464         struct jffs2_full_dirent *fd;
 465         uint32_t start = 0, end = 0, nrfrags = 0;
 466         int ret = 0;
 467
 468         mutex_lock(&f->sem);
 469
 470         /* Now we have the lock for this inode. Check that it's still the one at the head
 471            of the list. */
 472
 473         spin_lock(&c->erase_completion_lock);
 474
 475         if (c->gcblock != jeb) {
 476                 spin_unlock(&c->erase_completion_lock);
 477                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
 478                 goto upnout;
 479         }
 480         if (ref_obsolete(raw)) {
 481                 spin_unlock(&c->erase_completion_lock);
 482                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
 483                 /* They'll call again */
 484                 goto upnout;
 485         }
 486         spin_unlock(&c->erase_completion_lock);
 487
 488         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
 489         if (f->metadata && f->metadata->raw == raw) {
 490                 fn = f->metadata;
 491                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
 492                 goto upnout;
 493         }
 494
 495         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
 496                 if (frag->node && frag->node->raw == raw) {
 497                         fn = frag->node;
 498                         end = frag->ofs + frag->size;
 499                         if (!nrfrags++)
 500                                 start = frag->ofs;
 501                         if (nrfrags == frag->node->frags)
 502                                 break; /* We've found them all */
 503                 }
 504         }
 505         if (fn) {
 506                 if (ref_flags(raw) == REF_PRISTINE) {
 507                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
 508                         if (!ret) {
 509                                 /* Urgh. Return it sensibly. */
 510                                 frag->node->raw = f->inocache->nodes;
 511                         }
 512                         if (ret != -EBADFD)
 513                                 goto upnout;
 514                 }
 515                 /* We found a datanode. Do the GC */
 516                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
 517                         /* It crosses a page boundary. Therefore, it must be a hole. */
 518                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 519                 } else {
 520                         /* It could still be a hole. But we GC the page this way anyway */
 521                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
 522                 }
 523                 goto upnout;
 524         }
 525
 526         /* Wasn't a dnode. Try dirent */
 527         for (fd = f->dents; fd; fd=fd->next) {
 528                 if (fd->raw == raw)
 529                         break;
 530         }
 531
 532         if (fd && fd->ino) {
 533                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
 534         } else if (fd) {
 535                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
 536         } else {
 537                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
 538                        ref_offset(raw), f->inocache->ino);
 539                 if (ref_obsolete(raw)) {
 540                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
 541                 } else {
 542                         jffs2_dbg_dump_node(c, ref_offset(raw));
 543                         BUG();
 544                 }
 545         }
 546  upnout:
 547         mutex_unlock(&f->sem);
 548
 549         return ret;
 550 }
 551
 552 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
 553                                           struct jffs2_inode_cache *ic,
 554                                           struct jffs2_raw_node_ref *raw)
 555 {
 556         union jffs2_node_union *node;
 557         size_t retlen;
 558         int ret;
 559         uint32_t phys_ofs, alloclen;
 560         uint32_t crc, rawlen;
 561         int retried = 0;
 562
 563         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
 564
 565         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
 566
 567         /* Ask for a small amount of space (or the totlen if smaller) because we
 568            don't want to force wastage of the end of a block if splitting would
 569            work. */
 570         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
 571                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
 572
 573         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
 574         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
 575
 576         if (ret)
 577                 return ret;
 578
 579         if (alloclen < rawlen) {
 580                 /* Doesn't fit untouched. We'll go the old route and split it */
 581                 return -EBADFD;
 582         }
 583
 584         node = kmalloc(rawlen, GFP_KERNEL);
 585         if (!node)
 586                 return -ENOMEM;
 587
 588         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
 589         if (!ret && retlen != rawlen)
 590                 ret = -EIO;
 591         if (ret)
 592                 goto out_node;
 593
 594         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
 595         if (je32_to_cpu(node->u.hdr_crc) != crc) {
 596                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 597                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
 598                 goto bail;
 599         }
 600
 601         switch(je16_to_cpu(node->u.nodetype)) {
 602         case JFFS2_NODETYPE_INODE:
 603                 crc = crc32(0, node, sizeof(node->i)-8);
 604                 if (je32_to_cpu(node->i.node_crc) != crc) {
 605                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 606                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
 607                         goto bail;
 608                 }
 609
 610                 if (je32_to_cpu(node->i.dsize)) {
 611                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
 612                         if (je32_to_cpu(node->i.data_crc) != crc) {
 613                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 614                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
 615                                 goto bail;
 616                         }
 617                 }
 618                 break;
 619
 620         case JFFS2_NODETYPE_DIRENT:
 621                 crc = crc32(0, node, sizeof(node->d)-8);
 622                 if (je32_to_cpu(node->d.node_crc) != crc) {
 623                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 624                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
 625                         goto bail;
 626                 }
 627
 628                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
 629                         printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
 630                         goto bail;
 631                 }
 632
 633                 if (node->d.nsize) {
 634                         crc = crc32(0, node->d.name, node->d.nsize);
 635                         if (je32_to_cpu(node->d.name_crc) != crc) {
 636                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 637                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
 638                                 goto bail;
 639                         }
 640                 }
 641                 break;
 642         default:
 643                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
 644                 if (ic) {
 645                         printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
 646                                ref_offset(raw), je16_to_cpu(node->u.nodetype));
 647                         goto bail;
 648                 }
 649         }
 650
 651         /* OK, all the CRCs are good; this node can just be copied as-is. */
 652  retry:
 653         phys_ofs = write_ofs(c);
 654
 655         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
 656
 657         if (ret || (retlen != rawlen)) {
 658                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
 659                        rawlen, phys_ofs, ret, retlen);
 660                 if (retlen) {
 661                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
 662                 } else {
 663                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
 664                 }
 665                 if (!retried) {
 666                         /* Try to reallocate space and retry */
 667                         uint32_t dummy;
 668                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
 669
 670                         retried = 1;
 671
 672                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
 673
 674                         jffs2_dbg_acct_sanity_check(c,jeb);
 675                         jffs2_dbg_acct_paranoia_check(c, jeb);
 676
 677                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
 678                                                 /* this is not the exact summary size of it,
 679                                                         it is only an upper estimation */
 680
 681                         if (!ret) {
 682                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
 683
 684                                 jffs2_dbg_acct_sanity_check(c,jeb);
 685                                 jffs2_dbg_acct_paranoia_check(c, jeb);
 686
 687                                 goto retry;
 688                         }
 689                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
 690                 }
 691
 692                 if (!ret)
 693                         ret = -EIO;
 694                 goto out_node;
 695         }
 696         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
 697
 698         jffs2_mark_node_obsolete(c, raw);
 699         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
 700
 701  out_node:
 702         kfree(node);
 703         return ret;
 704  bail:
 705         ret = -EBADFD;
 706         goto out_node;
 707 }
 708
 709 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 710                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 711 {
 712         struct jffs2_full_dnode *new_fn;
 713         struct jffs2_raw_inode ri;
 714         struct jffs2_node_frag *last_frag;
 715         union jffs2_device_node dev;
 716         char *mdata = NULL;
 717         int mdatalen = 0;
 718         uint32_t alloclen, ilen;
 719         int ret;
 720
 721         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
 722             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
 723                 /* For these, we don't actually need to read the old node */
 724                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
 725                 mdata = (char *)&dev;
 726                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
 727         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
 728                 mdatalen = fn->size;
 729                 mdata = kmalloc(fn->size, GFP_KERNEL);
 730                 if (!mdata) {
 731                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
 732                         return -ENOMEM;
 733                 }
 734                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
 735                 if (ret) {
 736                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
 737                         kfree(mdata);
 738                         return ret;
 739                 }
 740                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
 741
 742         }
 743
 744         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
 745                                 JFFS2_SUMMARY_INODE_SIZE);
 746         if (ret) {
 747                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
 748                        sizeof(ri)+ mdatalen, ret);
 749                 goto out;
 750         }
 751
 752         last_frag = frag_last(&f->fragtree);
 753         if (last_frag)
 754                 /* Fetch the inode length from the fragtree rather then
 755                  * from i_size since i_size may have not been updated yet */
 756                 ilen = last_frag->ofs + last_frag->size;
 757         else
 758                 ilen = JFFS2_F_I_SIZE(f);
 759
 760         memset(&ri, 0, sizeof(ri));
 761         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 762         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
 763         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
 764         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
 765
 766         ri.ino = cpu_to_je32(f->inocache->ino);
 767         ri.version = cpu_to_je32(++f->highest_version);
 768         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
 769         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
 770         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
 771         ri.isize = cpu_to_je32(ilen);
 772         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
 773         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
 774         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 775         ri.offset = cpu_to_je32(0);
 776         ri.csize = cpu_to_je32(mdatalen);
 777         ri.dsize = cpu_to_je32(mdatalen);
 778         ri.compr = JFFS2_COMPR_NONE;
 779         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
 780         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
 781
 782         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
 783
 784         if (IS_ERR(new_fn)) {
 785                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
 786                 ret = PTR_ERR(new_fn);
 787                 goto out;
 788         }
 789         jffs2_mark_node_obsolete(c, fn->raw);
 790         jffs2_free_full_dnode(fn);
 791         f->metadata = new_fn;
 792  out:
 793         if (S_ISLNK(JFFS2_F_I_MODE(f)))
 794                 kfree(mdata);
 795         return ret;
 796 }
 797
 798 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 799                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 800 {
 801         struct jffs2_full_dirent *new_fd;
 802         struct jffs2_raw_dirent rd;
 803         uint32_t alloclen;
 804         int ret;
 805
 806         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 807         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
 808         rd.nsize = strlen(fd->name);
 809         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
 810         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
 811
 812         rd.pino = cpu_to_je32(f->inocache->ino);
 813         rd.version = cpu_to_je32(++f->highest_version);
 814         rd.ino = cpu_to_je32(fd->ino);
 815         /* If the times on this inode were set by explicit utime() they can be different,
 816            so refrain from splatting them. */
 817         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
 818                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 819         else
 820                 rd.mctime = cpu_to_je32(0);
 821         rd.type = fd->type;
 822         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
 823         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
 824
 825         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
 826                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
 827         if (ret) {
 828                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
 829                        sizeof(rd)+rd.nsize, ret);
 830                 return ret;
 831         }
 832         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
 833
 834         if (IS_ERR(new_fd)) {
 835                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
 836                 return PTR_ERR(new_fd);
 837         }
 838         jffs2_add_fd_to_list(c, new_fd, &f->dents);
 839         return 0;
 840 }
 841
 842 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 843                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 844 {
 845         struct jffs2_full_dirent **fdp = &f->dents;
 846         int found = 0;
 847
 848         /* On a medium where we can't actually mark nodes obsolete
 849            pernamently, such as NAND flash, we need to work out
 850            whether this deletion dirent is still needed to actively
 851            delete a 'real' dirent with the same name that's still
 852            somewhere else on the flash. */
 853         if (!jffs2_can_mark_obsolete(c)) {
 854                 struct jffs2_raw_dirent *rd;
 855                 struct jffs2_raw_node_ref *raw;
 856                 int ret;
 857                 size_t retlen;
 858                 int name_len = strlen(fd->name);
 859                 uint32_t name_crc = crc32(0, fd->name, name_len);
 860                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
 861
 862                 rd = kmalloc(rawlen, GFP_KERNEL);
 863                 if (!rd)
 864                         return -ENOMEM;
 865
 866                 /* Prevent the erase code from nicking the obsolete node refs while
 867                    we're looking at them. I really don't like this extra lock but
 868                    can't see any alternative. Suggestions on a postcard to... */
 869                 mutex_lock(&c->erase_free_sem);
 870
 871                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
 872
 873                         cond_resched();
 874
 875                         /* We only care about obsolete ones */
 876                         if (!(ref_obsolete(raw)))
 877                                 continue;
 878
 879                         /* Any dirent with the same name is going to have the same length... */
 880                         if (ref_totlen(c, NULL, raw) != rawlen)
 881                                 continue;
 882
 883                         /* Doesn't matter if there's one in the same erase block. We're going to
 884                            delete it too at the same time. */
 885                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
 886                                 continue;
 887
 888                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
 889
 890                         /* This is an obsolete node belonging to the same directory, and it's of the right
 891                            length. We need to take a closer look...*/
 892                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
 893                         if (ret) {
 894                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
 895                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
 896                                 continue;
 897                         }
 898                         if (retlen != rawlen) {
 899                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
 900                                        retlen, rawlen, ref_offset(raw));
 901                                 continue;
 902                         }
 903
 904                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
 905                                 continue;
 906
 907                         /* If the name CRC doesn't match, skip */
 908                         if (je32_to_cpu(rd->name_crc) != name_crc)
 909                                 continue;
 910
 911                         /* If the name length doesn't match, or it's another deletion dirent, skip */
 912                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
 913                                 continue;
 914
 915                         /* OK, check the actual name now */
 916                         if (memcmp(rd->name, fd->name, name_len))
 917                                 continue;
 918
 919                         /* OK. The name really does match. There really is still an older node on
 920                            the flash which our deletion dirent obsoletes. So we have to write out
 921                            a new deletion dirent to replace it */
 922                         mutex_unlock(&c->erase_free_sem);
 923
 924                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
 925                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
 926                         kfree(rd);
 927
 928                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
 929                 }
 930
 931                 mutex_unlock(&c->erase_free_sem);
 932                 kfree(rd);
 933         }
 934
 935
 936         /* No need for it any more. Just mark it obsolete and remove it from the list */
 937         while (*fdp) {
 938                 if ((*fdp) == fd) {
 939                         found = 1;
 940                         *fdp = fd->next;
 941                         break;
 942                 }
 943                 fdp = &(*fdp)->next;
 944         }
 945         if (!found) {
 946                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
 947         }
 948         jffs2_mark_node_obsolete(c, fd->raw);
 949         jffs2_free_full_dirent(fd);
 950         return 0;
 951 }
 952
 953 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 954                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
 955                                       uint32_t start, uint32_t end)
 956 {
 957         struct jffs2_raw_inode ri;
 958         struct jffs2_node_frag *frag;
 959         struct jffs2_full_dnode *new_fn;
 960         uint32_t alloclen, ilen;
 961         int ret;
 962
 963         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
 964                   f->inocache->ino, start, end));
 965
 966         memset(&ri, 0, sizeof(ri));
 967
 968         if(fn->frags > 1) {
 969                 size_t readlen;
 970                 uint32_t crc;
 971                 /* It's partially obsoleted by a later write. So we have to
 972                    write it out again with the _same_ version as before */
 973                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
 974                 if (readlen != sizeof(ri) || ret) {
 975                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
 976                         goto fill;
 977                 }
 978                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
 979                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
 980                                ref_offset(fn->raw),
 981                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
 982                         return -EIO;
 983                 }
 984                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
 985                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
 986                                ref_offset(fn->raw),
 987                                je32_to_cpu(ri.totlen), sizeof(ri));
 988                         return -EIO;
 989                 }
 990                 crc = crc32(0, &ri, sizeof(ri)-8);
 991                 if (crc != je32_to_cpu(ri.node_crc)) {
 992                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
 993                                ref_offset(fn->raw),
 994                                je32_to_cpu(ri.node_crc), crc);
 995                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
 996                                start, end, f->inocache->ino);
 997                         goto fill;
 998                 }
 999                 if (ri.compr != JFFS2_COMPR_ZERO) {
1000                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1001                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1002                                start, end, f->inocache->ino);
1003                         goto fill;
1004                 }
1005         } else {
1006         fill:
1007                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1008                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1009                 ri.totlen = cpu_to_je32(sizeof(ri));
1010                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1011
1012                 ri.ino = cpu_to_je32(f->inocache->ino);
1013                 ri.version = cpu_to_je32(++f->highest_version);
1014                 ri.offset = cpu_to_je32(start);
1015                 ri.dsize = cpu_to_je32(end - start);
1016                 ri.csize = cpu_to_je32(0);
1017                 ri.compr = JFFS2_COMPR_ZERO;
1018         }
1019
1020         frag = frag_last(&f->fragtree);
1021         if (frag)
1022                 /* Fetch the inode length from the fragtree rather then
1023                  * from i_size since i_size may have not been updated yet */
1024                 ilen = frag->ofs + frag->size;
1025         else
1026                 ilen = JFFS2_F_I_SIZE(f);
1027
1028         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1029         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1030         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1031         ri.isize = cpu_to_je32(ilen);
1032         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1033         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1034         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1035         ri.data_crc = cpu_to_je32(0);
1036         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1037
1038         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1039                                      JFFS2_SUMMARY_INODE_SIZE);
1040         if (ret) {
1041                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1042                        sizeof(ri), ret);
1043                 return ret;
1044         }
1045         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1046
1047         if (IS_ERR(new_fn)) {
1048                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1049                 return PTR_ERR(new_fn);
1050         }
1051         if (je32_to_cpu(ri.version) == f->highest_version) {
1052                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1053                 if (f->metadata) {
1054                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1055                         jffs2_free_full_dnode(f->metadata);
1056                         f->metadata = NULL;
1057                 }
1058                 return 0;
1059         }
1060
1061         /*
1062          * We should only get here in the case where the node we are
1063          * replacing had more than one frag, so we kept the same version
1064          * number as before. (Except in case of error -- see 'goto fill;'
1065          * above.)
1066          */
1067         D1(if(unlikely(fn->frags <= 1)) {
1068                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1069                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1070                        je32_to_cpu(ri.ino));
1071         });
1072
1073         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1074         mark_ref_normal(new_fn->raw);
1075
1076         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1077              frag; frag = frag_next(frag)) {
1078                 if (frag->ofs > fn->size + fn->ofs)
1079                         break;
1080                 if (frag->node == fn) {
1081                         frag->node = new_fn;
1082                         new_fn->frags++;
1083                         fn->frags--;
1084                 }
1085         }
1086         if (fn->frags) {
1087                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1088                 BUG();
1089         }
1090         if (!new_fn->frags) {
1091                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1092                 BUG();
1093         }
1094
1095         jffs2_mark_node_obsolete(c, fn->raw);
1096         jffs2_free_full_dnode(fn);
1097
1098         return 0;
1099 }
1100
1101 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1102                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1103                                        uint32_t start, uint32_t end)
1104 {
1105         struct jffs2_full_dnode *new_fn;
1106         struct jffs2_raw_inode ri;
1107         uint32_t alloclen, offset, orig_end, orig_start;
1108         int ret = 0;
1109         unsigned char *comprbuf = NULL, *writebuf;
1110         unsigned long pg;
1111         unsigned char *pg_ptr;
1112
1113         memset(&ri, 0, sizeof(ri));
1114
1115         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1116                   f->inocache->ino, start, end));
1117
1118         orig_end = end;
1119         orig_start = start;
1120
1121         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1122                 /* Attempt to do some merging. But only expand to cover logically
1123                    adjacent frags if the block containing them is already considered
1124                    to be dirty. Otherwise we end up with GC just going round in
1125                    circles dirtying the nodes it already wrote out, especially
1126                    on NAND where we have small eraseblocks and hence a much higher
1127                    chance of nodes having to be split to cross boundaries. */
1128
1129                 struct jffs2_node_frag *frag;
1130                 uint32_t min, max;
1131
1132                 min = start & ~(PAGE_CACHE_SIZE-1);
1133                 max = min + PAGE_CACHE_SIZE;
1134
1135                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1136
1137                 /* BUG_ON(!frag) but that'll happen anyway... */
1138
1139                 BUG_ON(frag->ofs != start);
1140
1141                 /* First grow down... */
1142                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1143
1144                         /* If the previous frag doesn't even reach the beginning, there's
1145                            excessive fragmentation. Just merge. */
1146                         if (frag->ofs > min) {
1147                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1148                                           frag->ofs, frag->ofs+frag->size));
1149                                 start = frag->ofs;
1150                                 continue;
1151                         }
1152                         /* OK. This frag holds the first byte of the page. */
1153                         if (!frag->node || !frag->node->raw) {
1154                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1155                                           frag->ofs, frag->ofs+frag->size));
1156                                 break;
1157                         } else {
1158
1159                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1160                                    in a block which is still considered clean? If so, don't obsolete it.
1161                                    If not, cover it anyway. */
1162
1163                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1164                                 struct jffs2_eraseblock *jeb;
1165
1166                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1167
1168                                 if (jeb == c->gcblock) {
1169                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1170                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1171                                         start = frag->ofs;
1172                                         break;
1173                                 }
1174                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1175                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1176                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1177                                         break;
1178                                 }
1179
1180                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1181                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1182                                 start = frag->ofs;
1183                                 break;
1184                         }
1185                 }
1186
1187                 /* ... then up */
1188
1189                 /* Find last frag which is actually part of the node we're to GC. */
1190                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1191
1192                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1193
1194                         /* If the previous frag doesn't even reach the beginning, there's lots
1195                            of fragmentation. Just merge. */
1196                         if (frag->ofs+frag->size < max) {
1197                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1198                                           frag->ofs, frag->ofs+frag->size));
1199                                 end = frag->ofs + frag->size;
1200                                 continue;
1201                         }
1202
1203                         if (!frag->node || !frag->node->raw) {
1204                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1205                                           frag->ofs, frag->ofs+frag->size));
1206                                 break;
1207                         } else {
1208
1209                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1210                                    in a block which is still considered clean? If so, don't obsolete it.
1211                                    If not, cover it anyway. */
1212
1213                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1214                                 struct jffs2_eraseblock *jeb;
1215
1216                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1217
1218                                 if (jeb == c->gcblock) {
1219                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1220                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1221                                         end = frag->ofs + frag->size;
1222                                         break;
1223                                 }
1224                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1225                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1226                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1227                                         break;
1228                                 }
1229
1230                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1231                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1232                                 end = frag->ofs + frag->size;
1233                                 break;
1234                         }
1235                 }
1236                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1237                           orig_start, orig_end, start, end));
1238
1239                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1240                 BUG_ON(end < orig_end);
1241                 BUG_ON(start > orig_start);
1242         }
1243
1244         /* First, use readpage() to read the appropriate page into the page cache */
1245         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1246          *    triggered garbage collection in the first place?
1247          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1248          *    page OK. We'll actually write it out again in commit_write, which is a little
1249          *    suboptimal, but at least we're correct.
1250          */
1251         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1252
1253         if (IS_ERR(pg_ptr)) {
1254                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1255                 return PTR_ERR(pg_ptr);
1256         }
1257
1258         offset = start;
1259         while(offset < orig_end) {
1260                 uint32_t datalen;
1261                 uint32_t cdatalen;
1262                 uint16_t comprtype = JFFS2_COMPR_NONE;
1263
1264                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1265                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1266
1267                 if (ret) {
1268                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1269                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1270                         break;
1271                 }
1272                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1273                 datalen = end - offset;
1274
1275                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1276
1277                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1278
1279                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1280                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1281                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1282                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1283
1284                 ri.ino = cpu_to_je32(f->inocache->ino);
1285                 ri.version = cpu_to_je32(++f->highest_version);
1286                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1287                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1288                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1289                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1290                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1291                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1292                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1293                 ri.offset = cpu_to_je32(offset);
1294                 ri.csize = cpu_to_je32(cdatalen);
1295                 ri.dsize = cpu_to_je32(datalen);
1296                 ri.compr = comprtype & 0xff;
1297                 ri.usercompr = (comprtype >> 8) & 0xff;
1298                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1299                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1300
1301                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1302
1303                 jffs2_free_comprbuf(comprbuf, writebuf);
1304
1305                 if (IS_ERR(new_fn)) {
1306                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1307                         ret = PTR_ERR(new_fn);
1308                         break;
1309                 }
1310                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1311                 offset += datalen;
1312                 if (f->metadata) {
1313                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1314                         jffs2_free_full_dnode(f->metadata);
1315                         f->metadata = NULL;
1316                 }
1317         }
1318
1319         jffs2_gc_release_page(c, pg_ptr, &pg);
1320         return ret;
1321 }