net/fsl-pq-mdio: coalesce multiple memory allocations into one
[linux-2.6/libata-dev.git] / fs / ubifs / log.c
blobc80b15d6c8de0a2cc49bd6a591fd482e7b30b456
1 /*
2 * This file is part of UBIFS.
4 * Copyright (C) 2006-2008 Nokia Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
24 * This file is a part of UBIFS journal implementation and contains various
25 * functions which manipulate the log. The log is a fixed area on the flash
26 * which does not contain any data but refers to buds. The log is a part of the
27 * journal.
30 #include "ubifs.h"
32 static int dbg_check_bud_bytes(struct ubifs_info *c);
34 /**
35 * ubifs_search_bud - search bud LEB.
36 * @c: UBIFS file-system description object
37 * @lnum: logical eraseblock number to search
39 * This function searches bud LEB @lnum. Returns bud description object in case
40 * of success and %NULL if there is no bud with this LEB number.
42 struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum)
44 struct rb_node *p;
45 struct ubifs_bud *bud;
47 spin_lock(&c->buds_lock);
48 p = c->buds.rb_node;
49 while (p) {
50 bud = rb_entry(p, struct ubifs_bud, rb);
51 if (lnum < bud->lnum)
52 p = p->rb_left;
53 else if (lnum > bud->lnum)
54 p = p->rb_right;
55 else {
56 spin_unlock(&c->buds_lock);
57 return bud;
60 spin_unlock(&c->buds_lock);
61 return NULL;
64 /**
65 * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
66 * @c: UBIFS file-system description object
67 * @lnum: logical eraseblock number to search
69 * This functions returns the wbuf for @lnum or %NULL if there is not one.
71 struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
73 struct rb_node *p;
74 struct ubifs_bud *bud;
75 int jhead;
77 if (!c->jheads)
78 return NULL;
80 spin_lock(&c->buds_lock);
81 p = c->buds.rb_node;
82 while (p) {
83 bud = rb_entry(p, struct ubifs_bud, rb);
84 if (lnum < bud->lnum)
85 p = p->rb_left;
86 else if (lnum > bud->lnum)
87 p = p->rb_right;
88 else {
89 jhead = bud->jhead;
90 spin_unlock(&c->buds_lock);
91 return &c->jheads[jhead].wbuf;
94 spin_unlock(&c->buds_lock);
95 return NULL;
98 /**
99 * empty_log_bytes - calculate amount of empty space in the log.
100 * @c: UBIFS file-system description object
102 static inline long long empty_log_bytes(const struct ubifs_info *c)
104 long long h, t;
106 h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
107 t = (long long)c->ltail_lnum * c->leb_size;
109 if (h >= t)
110 return c->log_bytes - h + t;
111 else
112 return t - h;
116 * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
117 * @c: UBIFS file-system description object
118 * @bud: the bud to add
120 void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
122 struct rb_node **p, *parent = NULL;
123 struct ubifs_bud *b;
124 struct ubifs_jhead *jhead;
126 spin_lock(&c->buds_lock);
127 p = &c->buds.rb_node;
128 while (*p) {
129 parent = *p;
130 b = rb_entry(parent, struct ubifs_bud, rb);
131 ubifs_assert(bud->lnum != b->lnum);
132 if (bud->lnum < b->lnum)
133 p = &(*p)->rb_left;
134 else
135 p = &(*p)->rb_right;
138 rb_link_node(&bud->rb, parent, p);
139 rb_insert_color(&bud->rb, &c->buds);
140 if (c->jheads) {
141 jhead = &c->jheads[bud->jhead];
142 list_add_tail(&bud->list, &jhead->buds_list);
143 } else
144 ubifs_assert(c->replaying && c->ro_mount);
147 * Note, although this is a new bud, we anyway account this space now,
148 * before any data has been written to it, because this is about to
149 * guarantee fixed mount time, and this bud will anyway be read and
150 * scanned.
152 c->bud_bytes += c->leb_size - bud->start;
154 dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
155 bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
156 spin_unlock(&c->buds_lock);
160 * ubifs_add_bud_to_log - add a new bud to the log.
161 * @c: UBIFS file-system description object
162 * @jhead: journal head the bud belongs to
163 * @lnum: LEB number of the bud
164 * @offs: starting offset of the bud
166 * This function writes reference node for the new bud LEB @lnum it to the log,
167 * and adds it to the buds tress. It also makes sure that log size does not
168 * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
169 * %-EAGAIN if commit is required, and a negative error codes in case of
170 * failure.
172 int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
174 int err;
175 struct ubifs_bud *bud;
176 struct ubifs_ref_node *ref;
178 bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
179 if (!bud)
180 return -ENOMEM;
181 ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
182 if (!ref) {
183 kfree(bud);
184 return -ENOMEM;
187 mutex_lock(&c->log_mutex);
188 ubifs_assert(!c->ro_media && !c->ro_mount);
189 if (c->ro_error) {
190 err = -EROFS;
191 goto out_unlock;
194 /* Make sure we have enough space in the log */
195 if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
196 dbg_log("not enough log space - %lld, required %d",
197 empty_log_bytes(c), c->min_log_bytes);
198 ubifs_commit_required(c);
199 err = -EAGAIN;
200 goto out_unlock;
204 * Make sure the amount of space in buds will not exceed the
205 * 'c->max_bud_bytes' limit, because we want to guarantee mount time
206 * limits.
208 * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
209 * because we are holding @c->log_mutex. All @c->bud_bytes take place
210 * when both @c->log_mutex and @c->bud_bytes are locked.
212 if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
213 dbg_log("bud bytes %lld (%lld max), require commit",
214 c->bud_bytes, c->max_bud_bytes);
215 ubifs_commit_required(c);
216 err = -EAGAIN;
217 goto out_unlock;
221 * If the journal is full enough - start background commit. Note, it is
222 * OK to read 'c->cmt_state' without spinlock because integer reads
223 * are atomic in the kernel.
225 if (c->bud_bytes >= c->bg_bud_bytes &&
226 c->cmt_state == COMMIT_RESTING) {
227 dbg_log("bud bytes %lld (%lld max), initiate BG commit",
228 c->bud_bytes, c->max_bud_bytes);
229 ubifs_request_bg_commit(c);
232 bud->lnum = lnum;
233 bud->start = offs;
234 bud->jhead = jhead;
236 ref->ch.node_type = UBIFS_REF_NODE;
237 ref->lnum = cpu_to_le32(bud->lnum);
238 ref->offs = cpu_to_le32(bud->start);
239 ref->jhead = cpu_to_le32(jhead);
241 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
242 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
243 c->lhead_offs = 0;
246 if (c->lhead_offs == 0) {
247 /* Must ensure next log LEB has been unmapped */
248 err = ubifs_leb_unmap(c, c->lhead_lnum);
249 if (err)
250 goto out_unlock;
253 if (bud->start == 0) {
255 * Before writing the LEB reference which refers an empty LEB
256 * to the log, we have to make sure it is mapped, because
257 * otherwise we'd risk to refer an LEB with garbage in case of
258 * an unclean reboot, because the target LEB might have been
259 * unmapped, but not yet physically erased.
261 err = ubifs_leb_map(c, bud->lnum);
262 if (err)
263 goto out_unlock;
266 dbg_log("write ref LEB %d:%d",
267 c->lhead_lnum, c->lhead_offs);
268 err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
269 c->lhead_offs);
270 if (err)
271 goto out_unlock;
273 c->lhead_offs += c->ref_node_alsz;
275 ubifs_add_bud(c, bud);
277 mutex_unlock(&c->log_mutex);
278 kfree(ref);
279 return 0;
281 out_unlock:
282 mutex_unlock(&c->log_mutex);
283 kfree(ref);
284 kfree(bud);
285 return err;
289 * remove_buds - remove used buds.
290 * @c: UBIFS file-system description object
292 * This function removes use buds from the buds tree. It does not remove the
293 * buds which are pointed to by journal heads.
295 static void remove_buds(struct ubifs_info *c)
297 struct rb_node *p;
299 ubifs_assert(list_empty(&c->old_buds));
300 c->cmt_bud_bytes = 0;
301 spin_lock(&c->buds_lock);
302 p = rb_first(&c->buds);
303 while (p) {
304 struct rb_node *p1 = p;
305 struct ubifs_bud *bud;
306 struct ubifs_wbuf *wbuf;
308 p = rb_next(p);
309 bud = rb_entry(p1, struct ubifs_bud, rb);
310 wbuf = &c->jheads[bud->jhead].wbuf;
312 if (wbuf->lnum == bud->lnum) {
314 * Do not remove buds which are pointed to by journal
315 * heads (non-closed buds).
317 c->cmt_bud_bytes += wbuf->offs - bud->start;
318 dbg_log("preserve %d:%d, jhead %s, bud bytes %d, "
319 "cmt_bud_bytes %lld", bud->lnum, bud->start,
320 dbg_jhead(bud->jhead), wbuf->offs - bud->start,
321 c->cmt_bud_bytes);
322 bud->start = wbuf->offs;
323 } else {
324 c->cmt_bud_bytes += c->leb_size - bud->start;
325 dbg_log("remove %d:%d, jhead %s, bud bytes %d, "
326 "cmt_bud_bytes %lld", bud->lnum, bud->start,
327 dbg_jhead(bud->jhead), c->leb_size - bud->start,
328 c->cmt_bud_bytes);
329 rb_erase(p1, &c->buds);
331 * If the commit does not finish, the recovery will need
332 * to replay the journal, in which case the old buds
333 * must be unchanged. Do not release them until post
334 * commit i.e. do not allow them to be garbage
335 * collected.
337 list_move(&bud->list, &c->old_buds);
340 spin_unlock(&c->buds_lock);
344 * ubifs_log_start_commit - start commit.
345 * @c: UBIFS file-system description object
346 * @ltail_lnum: return new log tail LEB number
348 * The commit operation starts with writing "commit start" node to the log and
349 * reference nodes for all journal heads which will define new journal after
350 * the commit has been finished. The commit start and reference nodes are
351 * written in one go to the nearest empty log LEB (hence, when commit is
352 * finished UBIFS may safely unmap all the previous log LEBs). This function
353 * returns zero in case of success and a negative error code in case of
354 * failure.
356 int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
358 void *buf;
359 struct ubifs_cs_node *cs;
360 struct ubifs_ref_node *ref;
361 int err, i, max_len, len;
363 err = dbg_check_bud_bytes(c);
364 if (err)
365 return err;
367 max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
368 max_len = ALIGN(max_len, c->min_io_size);
369 buf = cs = kmalloc(max_len, GFP_NOFS);
370 if (!buf)
371 return -ENOMEM;
373 cs->ch.node_type = UBIFS_CS_NODE;
374 cs->cmt_no = cpu_to_le64(c->cmt_no);
375 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
378 * Note, we do not lock 'c->log_mutex' because this is the commit start
379 * phase and we are exclusively using the log. And we do not lock
380 * write-buffer because nobody can write to the file-system at this
381 * phase.
384 len = UBIFS_CS_NODE_SZ;
385 for (i = 0; i < c->jhead_cnt; i++) {
386 int lnum = c->jheads[i].wbuf.lnum;
387 int offs = c->jheads[i].wbuf.offs;
389 if (lnum == -1 || offs == c->leb_size)
390 continue;
392 dbg_log("add ref to LEB %d:%d for jhead %s",
393 lnum, offs, dbg_jhead(i));
394 ref = buf + len;
395 ref->ch.node_type = UBIFS_REF_NODE;
396 ref->lnum = cpu_to_le32(lnum);
397 ref->offs = cpu_to_le32(offs);
398 ref->jhead = cpu_to_le32(i);
400 ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
401 len += UBIFS_REF_NODE_SZ;
404 ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
406 /* Switch to the next log LEB */
407 if (c->lhead_offs) {
408 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
409 c->lhead_offs = 0;
412 if (c->lhead_offs == 0) {
413 /* Must ensure next LEB has been unmapped */
414 err = ubifs_leb_unmap(c, c->lhead_lnum);
415 if (err)
416 goto out;
419 len = ALIGN(len, c->min_io_size);
420 dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
421 err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len);
422 if (err)
423 goto out;
425 *ltail_lnum = c->lhead_lnum;
427 c->lhead_offs += len;
428 if (c->lhead_offs == c->leb_size) {
429 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
430 c->lhead_offs = 0;
433 remove_buds(c);
436 * We have started the commit and now users may use the rest of the log
437 * for new writes.
439 c->min_log_bytes = 0;
441 out:
442 kfree(buf);
443 return err;
447 * ubifs_log_end_commit - end commit.
448 * @c: UBIFS file-system description object
449 * @ltail_lnum: new log tail LEB number
451 * This function is called on when the commit operation was finished. It
452 * moves log tail to new position and unmaps LEBs which contain obsolete data.
453 * Returns zero in case of success and a negative error code in case of
454 * failure.
456 int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
458 int err;
461 * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
462 * writes during commit. Its only short "commit" start phase when
463 * writers are blocked.
465 mutex_lock(&c->log_mutex);
467 dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
468 c->ltail_lnum, ltail_lnum);
470 c->ltail_lnum = ltail_lnum;
472 * The commit is finished and from now on it must be guaranteed that
473 * there is always enough space for the next commit.
475 c->min_log_bytes = c->leb_size;
477 spin_lock(&c->buds_lock);
478 c->bud_bytes -= c->cmt_bud_bytes;
479 spin_unlock(&c->buds_lock);
481 err = dbg_check_bud_bytes(c);
483 mutex_unlock(&c->log_mutex);
484 return err;
488 * ubifs_log_post_commit - things to do after commit is completed.
489 * @c: UBIFS file-system description object
490 * @old_ltail_lnum: old log tail LEB number
492 * Release buds only after commit is completed, because they must be unchanged
493 * if recovery is needed.
495 * Unmap log LEBs only after commit is completed, because they may be needed for
496 * recovery.
498 * This function returns %0 on success and a negative error code on failure.
500 int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
502 int lnum, err = 0;
504 while (!list_empty(&c->old_buds)) {
505 struct ubifs_bud *bud;
507 bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
508 err = ubifs_return_leb(c, bud->lnum);
509 if (err)
510 return err;
511 list_del(&bud->list);
512 kfree(bud);
514 mutex_lock(&c->log_mutex);
515 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
516 lnum = ubifs_next_log_lnum(c, lnum)) {
517 dbg_log("unmap log LEB %d", lnum);
518 err = ubifs_leb_unmap(c, lnum);
519 if (err)
520 goto out;
522 out:
523 mutex_unlock(&c->log_mutex);
524 return err;
528 * struct done_ref - references that have been done.
529 * @rb: rb-tree node
530 * @lnum: LEB number
532 struct done_ref {
533 struct rb_node rb;
534 int lnum;
538 * done_already - determine if a reference has been done already.
539 * @done_tree: rb-tree to store references that have been done
540 * @lnum: LEB number of reference
542 * This function returns %1 if the reference has been done, %0 if not, otherwise
543 * a negative error code is returned.
545 static int done_already(struct rb_root *done_tree, int lnum)
547 struct rb_node **p = &done_tree->rb_node, *parent = NULL;
548 struct done_ref *dr;
550 while (*p) {
551 parent = *p;
552 dr = rb_entry(parent, struct done_ref, rb);
553 if (lnum < dr->lnum)
554 p = &(*p)->rb_left;
555 else if (lnum > dr->lnum)
556 p = &(*p)->rb_right;
557 else
558 return 1;
561 dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
562 if (!dr)
563 return -ENOMEM;
565 dr->lnum = lnum;
567 rb_link_node(&dr->rb, parent, p);
568 rb_insert_color(&dr->rb, done_tree);
570 return 0;
574 * destroy_done_tree - destroy the done tree.
575 * @done_tree: done tree to destroy
577 static void destroy_done_tree(struct rb_root *done_tree)
579 struct rb_node *this = done_tree->rb_node;
580 struct done_ref *dr;
582 while (this) {
583 if (this->rb_left) {
584 this = this->rb_left;
585 continue;
586 } else if (this->rb_right) {
587 this = this->rb_right;
588 continue;
590 dr = rb_entry(this, struct done_ref, rb);
591 this = rb_parent(this);
592 if (this) {
593 if (this->rb_left == &dr->rb)
594 this->rb_left = NULL;
595 else
596 this->rb_right = NULL;
598 kfree(dr);
603 * add_node - add a node to the consolidated log.
604 * @c: UBIFS file-system description object
605 * @buf: buffer to which to add
606 * @lnum: LEB number to which to write is passed and returned here
607 * @offs: offset to where to write is passed and returned here
608 * @node: node to add
610 * This function returns %0 on success and a negative error code on failure.
612 static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
613 void *node)
615 struct ubifs_ch *ch = node;
616 int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
618 if (len > remains) {
619 int sz = ALIGN(*offs, c->min_io_size), err;
621 ubifs_pad(c, buf + *offs, sz - *offs);
622 err = ubifs_leb_change(c, *lnum, buf, sz);
623 if (err)
624 return err;
625 *lnum = ubifs_next_log_lnum(c, *lnum);
626 *offs = 0;
628 memcpy(buf + *offs, node, len);
629 *offs += ALIGN(len, 8);
630 return 0;
634 * ubifs_consolidate_log - consolidate the log.
635 * @c: UBIFS file-system description object
637 * Repeated failed commits could cause the log to be full, but at least 1 LEB is
638 * needed for commit. This function rewrites the reference nodes in the log
639 * omitting duplicates, and failed CS nodes, and leaving no gaps.
641 * This function returns %0 on success and a negative error code on failure.
643 int ubifs_consolidate_log(struct ubifs_info *c)
645 struct ubifs_scan_leb *sleb;
646 struct ubifs_scan_node *snod;
647 struct rb_root done_tree = RB_ROOT;
648 int lnum, err, first = 1, write_lnum, offs = 0;
649 void *buf;
651 dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
652 c->lhead_lnum);
653 buf = vmalloc(c->leb_size);
654 if (!buf)
655 return -ENOMEM;
656 lnum = c->ltail_lnum;
657 write_lnum = lnum;
658 while (1) {
659 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
660 if (IS_ERR(sleb)) {
661 err = PTR_ERR(sleb);
662 goto out_free;
664 list_for_each_entry(snod, &sleb->nodes, list) {
665 switch (snod->type) {
666 case UBIFS_REF_NODE: {
667 struct ubifs_ref_node *ref = snod->node;
668 int ref_lnum = le32_to_cpu(ref->lnum);
670 err = done_already(&done_tree, ref_lnum);
671 if (err < 0)
672 goto out_scan;
673 if (err != 1) {
674 err = add_node(c, buf, &write_lnum,
675 &offs, snod->node);
676 if (err)
677 goto out_scan;
679 break;
681 case UBIFS_CS_NODE:
682 if (!first)
683 break;
684 err = add_node(c, buf, &write_lnum, &offs,
685 snod->node);
686 if (err)
687 goto out_scan;
688 first = 0;
689 break;
692 ubifs_scan_destroy(sleb);
693 if (lnum == c->lhead_lnum)
694 break;
695 lnum = ubifs_next_log_lnum(c, lnum);
697 if (offs) {
698 int sz = ALIGN(offs, c->min_io_size);
700 ubifs_pad(c, buf + offs, sz - offs);
701 err = ubifs_leb_change(c, write_lnum, buf, sz);
702 if (err)
703 goto out_free;
704 offs = ALIGN(offs, c->min_io_size);
706 destroy_done_tree(&done_tree);
707 vfree(buf);
708 if (write_lnum == c->lhead_lnum) {
709 ubifs_err("log is too full");
710 return -EINVAL;
712 /* Unmap remaining LEBs */
713 lnum = write_lnum;
714 do {
715 lnum = ubifs_next_log_lnum(c, lnum);
716 err = ubifs_leb_unmap(c, lnum);
717 if (err)
718 return err;
719 } while (lnum != c->lhead_lnum);
720 c->lhead_lnum = write_lnum;
721 c->lhead_offs = offs;
722 dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
723 return 0;
725 out_scan:
726 ubifs_scan_destroy(sleb);
727 out_free:
728 destroy_done_tree(&done_tree);
729 vfree(buf);
730 return err;
734 * dbg_check_bud_bytes - make sure bud bytes calculation are all right.
735 * @c: UBIFS file-system description object
737 * This function makes sure the amount of flash space used by closed buds
738 * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
739 * case of failure.
741 static int dbg_check_bud_bytes(struct ubifs_info *c)
743 int i, err = 0;
744 struct ubifs_bud *bud;
745 long long bud_bytes = 0;
747 if (!dbg_is_chk_gen(c))
748 return 0;
750 spin_lock(&c->buds_lock);
751 for (i = 0; i < c->jhead_cnt; i++)
752 list_for_each_entry(bud, &c->jheads[i].buds_list, list)
753 bud_bytes += c->leb_size - bud->start;
755 if (c->bud_bytes != bud_bytes) {
756 ubifs_err("bad bud_bytes %lld, calculated %lld",
757 c->bud_bytes, bud_bytes);
758 err = -EINVAL;
760 spin_unlock(&c->buds_lock);
762 return err;