2 * This file is part of UBIFS.
4 * Copyright (C) 2006-2008 Nokia Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
24 * This file is a part of UBIFS journal implementation and contains various
25 * functions which manipulate the log. The log is a fixed area on the flash
26 * which does not contain any data but refers to buds. The log is a part of the
32 #ifdef CONFIG_UBIFS_FS_DEBUG
33 static int dbg_check_bud_bytes(struct ubifs_info
*c
);
35 #define dbg_check_bud_bytes(c) 0
39 * ubifs_search_bud - search bud LEB.
40 * @c: UBIFS file-system description object
41 * @lnum: logical eraseblock number to search
43 * This function searches bud LEB @lnum. Returns bud description object in case
44 * of success and %NULL if there is no bud with this LEB number.
46 struct ubifs_bud
*ubifs_search_bud(struct ubifs_info
*c
, int lnum
)
49 struct ubifs_bud
*bud
;
51 spin_lock(&c
->buds_lock
);
54 bud
= rb_entry(p
, struct ubifs_bud
, rb
);
57 else if (lnum
> bud
->lnum
)
60 spin_unlock(&c
->buds_lock
);
64 spin_unlock(&c
->buds_lock
);
69 * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
70 * @c: UBIFS file-system description object
71 * @lnum: logical eraseblock number to search
73 * This functions returns the wbuf for @lnum or %NULL if there is not one.
75 struct ubifs_wbuf
*ubifs_get_wbuf(struct ubifs_info
*c
, int lnum
)
78 struct ubifs_bud
*bud
;
84 spin_lock(&c
->buds_lock
);
87 bud
= rb_entry(p
, struct ubifs_bud
, rb
);
90 else if (lnum
> bud
->lnum
)
94 spin_unlock(&c
->buds_lock
);
95 return &c
->jheads
[jhead
].wbuf
;
98 spin_unlock(&c
->buds_lock
);
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
107 static inline int next_log_lnum(const struct ubifs_info
*c
, int lnum
)
110 if (lnum
> c
->log_last
)
111 lnum
= UBIFS_LOG_LNUM
;
117 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object
120 static inline long long empty_log_bytes(const struct ubifs_info
*c
)
124 h
= (long long)c
->lhead_lnum
* c
->leb_size
+ c
->lhead_offs
;
125 t
= (long long)c
->ltail_lnum
* c
->leb_size
;
128 return c
->log_bytes
- h
+ t
;
134 * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
135 * @c: UBIFS file-system description object
136 * @bud: the bud to add
138 void ubifs_add_bud(struct ubifs_info
*c
, struct ubifs_bud
*bud
)
140 struct rb_node
**p
, *parent
= NULL
;
142 struct ubifs_jhead
*jhead
;
144 spin_lock(&c
->buds_lock
);
145 p
= &c
->buds
.rb_node
;
148 b
= rb_entry(parent
, struct ubifs_bud
, rb
);
149 ubifs_assert(bud
->lnum
!= b
->lnum
);
150 if (bud
->lnum
< b
->lnum
)
156 rb_link_node(&bud
->rb
, parent
, p
);
157 rb_insert_color(&bud
->rb
, &c
->buds
);
159 jhead
= &c
->jheads
[bud
->jhead
];
160 list_add_tail(&bud
->list
, &jhead
->buds_list
);
162 ubifs_assert(c
->replaying
&& (c
->vfs_sb
->s_flags
& MS_RDONLY
));
165 * Note, although this is a new bud, we anyway account this space now,
166 * before any data has been written to it, because this is about to
167 * guarantee fixed mount time, and this bud will anyway be read and
170 c
->bud_bytes
+= c
->leb_size
- bud
->start
;
172 dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud
->lnum
,
173 bud
->start
, bud
->jhead
, c
->bud_bytes
);
174 spin_unlock(&c
->buds_lock
);
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
181 void ubifs_create_buds_lists(struct ubifs_info
*c
)
185 spin_lock(&c
->buds_lock
);
186 p
= rb_first(&c
->buds
);
188 struct ubifs_bud
*bud
= rb_entry(p
, struct ubifs_bud
, rb
);
189 struct ubifs_jhead
*jhead
= &c
->jheads
[bud
->jhead
];
191 list_add_tail(&bud
->list
, &jhead
->buds_list
);
194 spin_unlock(&c
->buds_lock
);
198 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to
201 * @lnum: LEB number of the bud
202 * @offs: starting offset of the bud
204 * This function writes reference node for the new bud LEB @lnum it to the log,
205 * and adds it to the buds tress. It also makes sure that log size does not
206 * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
207 * %-EAGAIN if commit is required, and a negative error codes in case of
210 int ubifs_add_bud_to_log(struct ubifs_info
*c
, int jhead
, int lnum
, int offs
)
213 struct ubifs_bud
*bud
;
214 struct ubifs_ref_node
*ref
;
216 bud
= kmalloc(sizeof(struct ubifs_bud
), GFP_NOFS
);
219 ref
= kzalloc(c
->ref_node_alsz
, GFP_NOFS
);
225 mutex_lock(&c
->log_mutex
);
232 /* Make sure we have enough space in the log */
233 if (empty_log_bytes(c
) - c
->ref_node_alsz
< c
->min_log_bytes
) {
234 dbg_log("not enough log space - %lld, required %d",
235 empty_log_bytes(c
), c
->min_log_bytes
);
236 ubifs_commit_required(c
);
242 * Make sure the the amount of space in buds will not exceed
243 * 'c->max_bud_bytes' limit, because we want to guarantee mount time
246 * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
247 * because we are holding @c->log_mutex. All @c->bud_bytes take place
248 * when both @c->log_mutex and @c->bud_bytes are locked.
250 if (c
->bud_bytes
+ c
->leb_size
- offs
> c
->max_bud_bytes
) {
251 dbg_log("bud bytes %lld (%lld max), require commit",
252 c
->bud_bytes
, c
->max_bud_bytes
);
253 ubifs_commit_required(c
);
259 * If the journal is full enough - start background commit. Note, it is
260 * OK to read 'c->cmt_state' without spinlock because integer reads
261 * are atomic in the kernel.
263 if (c
->bud_bytes
>= c
->bg_bud_bytes
&&
264 c
->cmt_state
== COMMIT_RESTING
) {
265 dbg_log("bud bytes %lld (%lld max), initiate BG commit",
266 c
->bud_bytes
, c
->max_bud_bytes
);
267 ubifs_request_bg_commit(c
);
274 ref
->ch
.node_type
= UBIFS_REF_NODE
;
275 ref
->lnum
= cpu_to_le32(bud
->lnum
);
276 ref
->offs
= cpu_to_le32(bud
->start
);
277 ref
->jhead
= cpu_to_le32(jhead
);
279 if (c
->lhead_offs
> c
->leb_size
- c
->ref_node_alsz
) {
280 c
->lhead_lnum
= next_log_lnum(c
, c
->lhead_lnum
);
284 if (c
->lhead_offs
== 0) {
285 /* Must ensure next log LEB has been unmapped */
286 err
= ubifs_leb_unmap(c
, c
->lhead_lnum
);
291 if (bud
->start
== 0) {
293 * Before writing the LEB reference which refers an empty LEB
294 * to the log, we have to make sure it is mapped, because
295 * otherwise we'd risk to refer an LEB with garbage in case of
296 * an unclean reboot, because the target LEB might have been
297 * unmapped, but not yet physically erased.
299 err
= ubi_leb_map(c
->ubi
, bud
->lnum
, UBI_SHORTTERM
);
304 dbg_log("write ref LEB %d:%d",
305 c
->lhead_lnum
, c
->lhead_offs
);
306 err
= ubifs_write_node(c
, ref
, UBIFS_REF_NODE_SZ
, c
->lhead_lnum
,
307 c
->lhead_offs
, UBI_SHORTTERM
);
311 c
->lhead_offs
+= c
->ref_node_alsz
;
313 ubifs_add_bud(c
, bud
);
315 mutex_unlock(&c
->log_mutex
);
321 ubifs_ro_mode(c
, err
);
322 mutex_unlock(&c
->log_mutex
);
329 * remove_buds - remove used buds.
330 * @c: UBIFS file-system description object
332 * This function removes use buds from the buds tree. It does not remove the
333 * buds which are pointed to by journal heads.
335 static void remove_buds(struct ubifs_info
*c
)
339 ubifs_assert(list_empty(&c
->old_buds
));
340 c
->cmt_bud_bytes
= 0;
341 spin_lock(&c
->buds_lock
);
342 p
= rb_first(&c
->buds
);
344 struct rb_node
*p1
= p
;
345 struct ubifs_bud
*bud
;
346 struct ubifs_wbuf
*wbuf
;
349 bud
= rb_entry(p1
, struct ubifs_bud
, rb
);
350 wbuf
= &c
->jheads
[bud
->jhead
].wbuf
;
352 if (wbuf
->lnum
== bud
->lnum
) {
354 * Do not remove buds which are pointed to by journal
355 * heads (non-closed buds).
357 c
->cmt_bud_bytes
+= wbuf
->offs
- bud
->start
;
358 dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
359 "cmt_bud_bytes %lld", bud
->lnum
, bud
->start
,
360 bud
->jhead
, wbuf
->offs
- bud
->start
,
362 bud
->start
= wbuf
->offs
;
364 c
->cmt_bud_bytes
+= c
->leb_size
- bud
->start
;
365 dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
366 "cmt_bud_bytes %lld", bud
->lnum
, bud
->start
,
367 bud
->jhead
, c
->leb_size
- bud
->start
,
369 rb_erase(p1
, &c
->buds
);
370 list_del(&bud
->list
);
372 * If the commit does not finish, the recovery will need
373 * to replay the journal, in which case the old buds
374 * must be unchanged. Do not release them until post
375 * commit i.e. do not allow them to be garbage
378 list_add(&bud
->list
, &c
->old_buds
);
381 spin_unlock(&c
->buds_lock
);
385 * ubifs_log_start_commit - start commit.
386 * @c: UBIFS file-system description object
387 * @ltail_lnum: return new log tail LEB number
389 * The commit operation starts with writing "commit start" node to the log and
390 * reference nodes for all journal heads which will define new journal after
391 * the commit has been finished. The commit start and reference nodes are
392 * written in one go to the nearest empty log LEB (hence, when commit is
393 * finished UBIFS may safely unmap all the previous log LEBs). This function
394 * returns zero in case of success and a negative error code in case of
397 int ubifs_log_start_commit(struct ubifs_info
*c
, int *ltail_lnum
)
400 struct ubifs_cs_node
*cs
;
401 struct ubifs_ref_node
*ref
;
402 int err
, i
, max_len
, len
;
404 err
= dbg_check_bud_bytes(c
);
408 max_len
= UBIFS_CS_NODE_SZ
+ c
->jhead_cnt
* UBIFS_REF_NODE_SZ
;
409 max_len
= ALIGN(max_len
, c
->min_io_size
);
410 buf
= cs
= kmalloc(max_len
, GFP_NOFS
);
414 cs
->ch
.node_type
= UBIFS_CS_NODE
;
415 cs
->cmt_no
= cpu_to_le64(c
->cmt_no
);
416 ubifs_prepare_node(c
, cs
, UBIFS_CS_NODE_SZ
, 0);
419 * Note, we do not lock 'c->log_mutex' because this is the commit start
420 * phase and we are exclusively using the log. And we do not lock
421 * write-buffer because nobody can write to the file-system at this
425 len
= UBIFS_CS_NODE_SZ
;
426 for (i
= 0; i
< c
->jhead_cnt
; i
++) {
427 int lnum
= c
->jheads
[i
].wbuf
.lnum
;
428 int offs
= c
->jheads
[i
].wbuf
.offs
;
430 if (lnum
== -1 || offs
== c
->leb_size
)
433 dbg_log("add ref to LEB %d:%d for jhead %d", lnum
, offs
, i
);
435 ref
->ch
.node_type
= UBIFS_REF_NODE
;
436 ref
->lnum
= cpu_to_le32(lnum
);
437 ref
->offs
= cpu_to_le32(offs
);
438 ref
->jhead
= cpu_to_le32(i
);
440 ubifs_prepare_node(c
, ref
, UBIFS_REF_NODE_SZ
, 0);
441 len
+= UBIFS_REF_NODE_SZ
;
444 ubifs_pad(c
, buf
+ len
, ALIGN(len
, c
->min_io_size
) - len
);
446 /* Switch to the next log LEB */
448 c
->lhead_lnum
= next_log_lnum(c
, c
->lhead_lnum
);
452 if (c
->lhead_offs
== 0) {
453 /* Must ensure next LEB has been unmapped */
454 err
= ubifs_leb_unmap(c
, c
->lhead_lnum
);
459 len
= ALIGN(len
, c
->min_io_size
);
460 dbg_log("writing commit start at LEB %d:0, len %d", c
->lhead_lnum
, len
);
461 err
= ubifs_leb_write(c
, c
->lhead_lnum
, cs
, 0, len
, UBI_SHORTTERM
);
465 *ltail_lnum
= c
->lhead_lnum
;
467 c
->lhead_offs
+= len
;
468 if (c
->lhead_offs
== c
->leb_size
) {
469 c
->lhead_lnum
= next_log_lnum(c
, c
->lhead_lnum
);
476 * We have started the commit and now users may use the rest of the log
479 c
->min_log_bytes
= 0;
487 * ubifs_log_end_commit - end commit.
488 * @c: UBIFS file-system description object
489 * @ltail_lnum: new log tail LEB number
491 * This function is called on when the commit operation was finished. It
492 * moves log tail to new position and unmaps LEBs which contain obsolete data.
493 * Returns zero in case of success and a negative error code in case of
496 int ubifs_log_end_commit(struct ubifs_info
*c
, int ltail_lnum
)
501 * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
502 * writes during commit. Its only short "commit" start phase when
503 * writers are blocked.
505 mutex_lock(&c
->log_mutex
);
507 dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
508 c
->ltail_lnum
, ltail_lnum
);
510 c
->ltail_lnum
= ltail_lnum
;
512 * The commit is finished and from now on it must be guaranteed that
513 * there is always enough space for the next commit.
515 c
->min_log_bytes
= c
->leb_size
;
517 spin_lock(&c
->buds_lock
);
518 c
->bud_bytes
-= c
->cmt_bud_bytes
;
519 spin_unlock(&c
->buds_lock
);
521 err
= dbg_check_bud_bytes(c
);
523 mutex_unlock(&c
->log_mutex
);
528 * ubifs_log_post_commit - things to do after commit is completed.
529 * @c: UBIFS file-system description object
530 * @old_ltail_lnum: old log tail LEB number
532 * Release buds only after commit is completed, because they must be unchanged
533 * if recovery is needed.
535 * Unmap log LEBs only after commit is completed, because they may be needed for
538 * This function returns %0 on success and a negative error code on failure.
540 int ubifs_log_post_commit(struct ubifs_info
*c
, int old_ltail_lnum
)
544 while (!list_empty(&c
->old_buds
)) {
545 struct ubifs_bud
*bud
;
547 bud
= list_entry(c
->old_buds
.next
, struct ubifs_bud
, list
);
548 err
= ubifs_return_leb(c
, bud
->lnum
);
551 list_del(&bud
->list
);
554 mutex_lock(&c
->log_mutex
);
555 for (lnum
= old_ltail_lnum
; lnum
!= c
->ltail_lnum
;
556 lnum
= next_log_lnum(c
, lnum
)) {
557 dbg_log("unmap log LEB %d", lnum
);
558 err
= ubifs_leb_unmap(c
, lnum
);
563 mutex_unlock(&c
->log_mutex
);
568 * struct done_ref - references that have been done.
578 * done_already - determine if a reference has been done already.
579 * @done_tree: rb-tree to store references that have been done
580 * @lnum: LEB number of reference
582 * This function returns %1 if the reference has been done, %0 if not, otherwise
583 * a negative error code is returned.
585 static int done_already(struct rb_root
*done_tree
, int lnum
)
587 struct rb_node
**p
= &done_tree
->rb_node
, *parent
= NULL
;
592 dr
= rb_entry(parent
, struct done_ref
, rb
);
595 else if (lnum
> dr
->lnum
)
601 dr
= kzalloc(sizeof(struct done_ref
), GFP_NOFS
);
607 rb_link_node(&dr
->rb
, parent
, p
);
608 rb_insert_color(&dr
->rb
, done_tree
);
614 * destroy_done_tree - destroy the done tree.
615 * @done_tree: done tree to destroy
617 static void destroy_done_tree(struct rb_root
*done_tree
)
619 struct rb_node
*this = done_tree
->rb_node
;
624 this = this->rb_left
;
626 } else if (this->rb_right
) {
627 this = this->rb_right
;
630 dr
= rb_entry(this, struct done_ref
, rb
);
631 this = rb_parent(this);
633 if (this->rb_left
== &dr
->rb
)
634 this->rb_left
= NULL
;
636 this->rb_right
= NULL
;
643 * add_node - add a node to the consolidated log.
644 * @c: UBIFS file-system description object
645 * @buf: buffer to which to add
646 * @lnum: LEB number to which to write is passed and returned here
647 * @offs: offset to where to write is passed and returned here
650 * This function returns %0 on success and a negative error code on failure.
652 static int add_node(struct ubifs_info
*c
, void *buf
, int *lnum
, int *offs
,
655 struct ubifs_ch
*ch
= node
;
656 int len
= le32_to_cpu(ch
->len
), remains
= c
->leb_size
- *offs
;
659 int sz
= ALIGN(*offs
, c
->min_io_size
), err
;
661 ubifs_pad(c
, buf
+ *offs
, sz
- *offs
);
662 err
= ubifs_leb_change(c
, *lnum
, buf
, sz
, UBI_SHORTTERM
);
665 *lnum
= next_log_lnum(c
, *lnum
);
668 memcpy(buf
+ *offs
, node
, len
);
669 *offs
+= ALIGN(len
, 8);
674 * ubifs_consolidate_log - consolidate the log.
675 * @c: UBIFS file-system description object
677 * Repeated failed commits could cause the log to be full, but at least 1 LEB is
678 * needed for commit. This function rewrites the reference nodes in the log
679 * omitting duplicates, and failed CS nodes, and leaving no gaps.
681 * This function returns %0 on success and a negative error code on failure.
683 int ubifs_consolidate_log(struct ubifs_info
*c
)
685 struct ubifs_scan_leb
*sleb
;
686 struct ubifs_scan_node
*snod
;
687 struct rb_root done_tree
= RB_ROOT
;
688 int lnum
, err
, first
= 1, write_lnum
, offs
= 0;
691 dbg_rcvry("log tail LEB %d, log head LEB %d", c
->ltail_lnum
,
693 buf
= vmalloc(c
->leb_size
);
696 lnum
= c
->ltail_lnum
;
699 sleb
= ubifs_scan(c
, lnum
, 0, c
->sbuf
);
704 list_for_each_entry(snod
, &sleb
->nodes
, list
) {
705 switch (snod
->type
) {
706 case UBIFS_REF_NODE
: {
707 struct ubifs_ref_node
*ref
= snod
->node
;
708 int ref_lnum
= le32_to_cpu(ref
->lnum
);
710 err
= done_already(&done_tree
, ref_lnum
);
714 err
= add_node(c
, buf
, &write_lnum
,
724 err
= add_node(c
, buf
, &write_lnum
, &offs
,
732 ubifs_scan_destroy(sleb
);
733 if (lnum
== c
->lhead_lnum
)
735 lnum
= next_log_lnum(c
, lnum
);
738 int sz
= ALIGN(offs
, c
->min_io_size
);
740 ubifs_pad(c
, buf
+ offs
, sz
- offs
);
741 err
= ubifs_leb_change(c
, write_lnum
, buf
, sz
, UBI_SHORTTERM
);
744 offs
= ALIGN(offs
, c
->min_io_size
);
746 destroy_done_tree(&done_tree
);
748 if (write_lnum
== c
->lhead_lnum
) {
749 ubifs_err("log is too full");
752 /* Unmap remaining LEBs */
755 lnum
= next_log_lnum(c
, lnum
);
756 err
= ubifs_leb_unmap(c
, lnum
);
759 } while (lnum
!= c
->lhead_lnum
);
760 c
->lhead_lnum
= write_lnum
;
761 c
->lhead_offs
= offs
;
762 dbg_rcvry("new log head at %d:%d", c
->lhead_lnum
, c
->lhead_offs
);
766 ubifs_scan_destroy(sleb
);
768 destroy_done_tree(&done_tree
);
773 #ifdef CONFIG_UBIFS_FS_DEBUG
776 * dbg_check_bud_bytes - make sure bud bytes calculation are all right.
777 * @c: UBIFS file-system description object
779 * This function makes sure the amount of flash space used by closed buds
780 * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
783 static int dbg_check_bud_bytes(struct ubifs_info
*c
)
786 struct ubifs_bud
*bud
;
787 long long bud_bytes
= 0;
789 if (!(ubifs_chk_flags
& UBIFS_CHK_GEN
))
792 spin_lock(&c
->buds_lock
);
793 for (i
= 0; i
< c
->jhead_cnt
; i
++)
794 list_for_each_entry(bud
, &c
->jheads
[i
].buds_list
, list
)
795 bud_bytes
+= c
->leb_size
- bud
->start
;
797 if (c
->bud_bytes
!= bud_bytes
) {
798 ubifs_err("bad bud_bytes %lld, calculated %lld",
799 c
->bud_bytes
, bud_bytes
);
802 spin_unlock(&c
->buds_lock
);
807 #endif /* CONFIG_UBIFS_FS_DEBUG */