2 * fs/logfs/journal.c - journal handling code
4 * As should be obvious for Linux kernel code, license is GPLv2
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
9 #include <linux/slab.h>
11 static void logfs_calc_free(struct super_block
*sb
)
13 struct logfs_super
*super
= logfs_super(sb
);
14 u64 reserve
, no_segs
= super
->s_no_segs
;
18 /* superblock segments */
20 super
->s_no_journal_segs
= 0;
23 if (super
->s_journal_seg
[i
]) {
25 super
->s_no_journal_segs
++;
28 /* open segments plus one extra per level for GC */
29 no_segs
-= 2 * super
->s_total_levels
;
31 free
= no_segs
* (super
->s_segsize
- LOGFS_SEGMENT_RESERVE
);
32 free
-= super
->s_used_bytes
;
33 /* just a bit extra */
34 free
-= super
->s_total_levels
* 4096;
36 /* Bad blocks are 'paid' for with speed reserve - the filesystem
37 * simply gets slower as bad blocks accumulate. Until the bad blocks
38 * exceed the speed reserve - then the filesystem gets smaller.
40 reserve
= super
->s_bad_segments
+ super
->s_bad_seg_reserve
;
41 reserve
*= super
->s_segsize
- LOGFS_SEGMENT_RESERVE
;
42 reserve
= max(reserve
, super
->s_speed_reserve
);
47 super
->s_free_bytes
= free
;
50 static void reserve_sb_and_journal(struct super_block
*sb
)
52 struct logfs_super
*super
= logfs_super(sb
);
53 struct btree_head32
*head
= &super
->s_reserved_segments
;
56 err
= btree_insert32(head
, seg_no(sb
, super
->s_sb_ofs
[0]), (void *)1,
60 err
= btree_insert32(head
, seg_no(sb
, super
->s_sb_ofs
[1]), (void *)1,
65 if (!super
->s_journal_seg
[i
])
67 err
= btree_insert32(head
, super
->s_journal_seg
[i
], (void *)1,
73 static void read_dynsb(struct super_block
*sb
,
74 struct logfs_je_dynsb
*dynsb
)
76 struct logfs_super
*super
= logfs_super(sb
);
78 super
->s_gec
= be64_to_cpu(dynsb
->ds_gec
);
79 super
->s_sweeper
= be64_to_cpu(dynsb
->ds_sweeper
);
80 super
->s_victim_ino
= be64_to_cpu(dynsb
->ds_victim_ino
);
81 super
->s_rename_dir
= be64_to_cpu(dynsb
->ds_rename_dir
);
82 super
->s_rename_pos
= be64_to_cpu(dynsb
->ds_rename_pos
);
83 super
->s_used_bytes
= be64_to_cpu(dynsb
->ds_used_bytes
);
84 super
->s_generation
= be32_to_cpu(dynsb
->ds_generation
);
87 static void read_anchor(struct super_block
*sb
,
88 struct logfs_je_anchor
*da
)
90 struct logfs_super
*super
= logfs_super(sb
);
91 struct inode
*inode
= super
->s_master_inode
;
92 struct logfs_inode
*li
= logfs_inode(inode
);
95 super
->s_last_ino
= be64_to_cpu(da
->da_last_ino
);
97 li
->li_height
= da
->da_height
;
98 i_size_write(inode
, be64_to_cpu(da
->da_size
));
99 li
->li_used_bytes
= be64_to_cpu(da
->da_used_bytes
);
101 for (i
= 0; i
< LOGFS_EMBEDDED_FIELDS
; i
++)
102 li
->li_data
[i
] = be64_to_cpu(da
->da_data
[i
]);
105 static void read_erasecount(struct super_block
*sb
,
106 struct logfs_je_journal_ec
*ec
)
108 struct logfs_super
*super
= logfs_super(sb
);
112 super
->s_journal_ec
[i
] = be32_to_cpu(ec
->ec
[i
]);
115 static int read_area(struct super_block
*sb
, struct logfs_je_area
*a
)
117 struct logfs_super
*super
= logfs_super(sb
);
118 struct logfs_area
*area
= super
->s_area
[a
->gc_level
];
120 u32 writemask
= ~(super
->s_writesize
- 1);
122 if (a
->gc_level
>= LOGFS_NO_AREAS
)
124 if (a
->vim
!= VIM_DEFAULT
)
125 return -EIO
; /* TODO: close area and continue */
127 area
->a_used_bytes
= be32_to_cpu(a
->used_bytes
);
128 area
->a_written_bytes
= area
->a_used_bytes
& writemask
;
129 area
->a_segno
= be32_to_cpu(a
->segno
);
133 ofs
= dev_ofs(sb
, area
->a_segno
, area
->a_written_bytes
);
134 if (super
->s_writesize
> 1)
135 return logfs_buf_recover(area
, ofs
, a
+ 1, super
->s_writesize
);
137 return logfs_buf_recover(area
, ofs
, NULL
, 0);
140 static void *unpack(void *from
, void *to
)
142 struct logfs_journal_header
*jh
= from
;
143 void *data
= from
+ sizeof(struct logfs_journal_header
);
145 size_t inlen
, outlen
;
147 inlen
= be16_to_cpu(jh
->h_len
);
148 outlen
= be16_to_cpu(jh
->h_datalen
);
150 if (jh
->h_compr
== COMPR_NONE
)
151 memcpy(to
, data
, inlen
);
153 err
= logfs_uncompress(data
, to
, inlen
, outlen
);
159 static int __read_je_header(struct super_block
*sb
, u64 ofs
,
160 struct logfs_journal_header
*jh
)
162 struct logfs_super
*super
= logfs_super(sb
);
163 size_t bufsize
= max_t(size_t, sb
->s_blocksize
, super
->s_writesize
)
164 + MAX_JOURNAL_HEADER
;
165 u16 type
, len
, datalen
;
168 /* read header only */
169 err
= wbuf_read(sb
, ofs
, sizeof(*jh
), jh
);
172 type
= be16_to_cpu(jh
->h_type
);
173 len
= be16_to_cpu(jh
->h_len
);
174 datalen
= be16_to_cpu(jh
->h_datalen
);
175 if (len
> sb
->s_blocksize
)
177 if ((type
< JE_FIRST
) || (type
> JE_LAST
))
179 if (datalen
> bufsize
)
184 static int __read_je_payload(struct super_block
*sb
, u64 ofs
,
185 struct logfs_journal_header
*jh
)
190 len
= be16_to_cpu(jh
->h_len
);
191 err
= wbuf_read(sb
, ofs
+ sizeof(*jh
), len
, jh
+ 1);
194 if (jh
->h_crc
!= logfs_crc32(jh
, len
+ sizeof(*jh
), 4)) {
195 /* Old code was confused. It forgot about the header length
196 * and stopped calculating the crc 16 bytes before the end
198 * FIXME: Remove this hack once the old code is fixed.
200 if (jh
->h_crc
== logfs_crc32(jh
, len
, 4))
209 * jh needs to be large enough to hold the complete entry, not just the header
211 static int __read_je(struct super_block
*sb
, u64 ofs
,
212 struct logfs_journal_header
*jh
)
216 err
= __read_je_header(sb
, ofs
, jh
);
219 return __read_je_payload(sb
, ofs
, jh
);
222 static int read_je(struct super_block
*sb
, u64 ofs
)
224 struct logfs_super
*super
= logfs_super(sb
);
225 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
226 void *scratch
= super
->s_je
;
230 err
= __read_je(sb
, ofs
, jh
);
233 type
= be16_to_cpu(jh
->h_type
);
234 datalen
= be16_to_cpu(jh
->h_datalen
);
238 read_dynsb(sb
, unpack(jh
, scratch
));
241 read_anchor(sb
, unpack(jh
, scratch
));
244 read_erasecount(sb
, unpack(jh
, scratch
));
247 err
= read_area(sb
, unpack(jh
, scratch
));
250 err
= logfs_load_object_aliases(sb
, unpack(jh
, scratch
),
260 static int logfs_read_segment(struct super_block
*sb
, u32 segno
)
262 struct logfs_super
*super
= logfs_super(sb
);
263 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
264 u64 ofs
, seg_ofs
= dev_ofs(sb
, segno
, 0);
265 u32 h_ofs
, last_ofs
= 0;
266 u16 len
, datalen
, last_len
= 0;
269 /* search for most recent commit */
270 for (h_ofs
= 0; h_ofs
< super
->s_segsize
; h_ofs
+= sizeof(*jh
)) {
271 ofs
= seg_ofs
+ h_ofs
;
272 err
= __read_je_header(sb
, ofs
, jh
);
275 if (jh
->h_type
!= cpu_to_be16(JE_COMMIT
))
277 err
= __read_je_payload(sb
, ofs
, jh
);
280 len
= be16_to_cpu(jh
->h_len
);
281 datalen
= be16_to_cpu(jh
->h_datalen
);
282 if ((datalen
> sizeof(super
->s_je_array
)) ||
283 (datalen
% sizeof(__be64
)))
287 h_ofs
+= ALIGN(len
, sizeof(*jh
)) - sizeof(*jh
);
292 ofs
= seg_ofs
+ last_ofs
;
293 log_journal("Read commit from %llx\n", ofs
);
294 err
= __read_je(sb
, ofs
, jh
);
295 BUG_ON(err
); /* We should have caught it in the scan loop already */
299 unpack(jh
, super
->s_je_array
);
300 super
->s_no_je
= last_len
/ sizeof(__be64
);
301 /* iterate over array */
302 for (i
= 0; i
< super
->s_no_je
; i
++) {
303 err
= read_je(sb
, be64_to_cpu(super
->s_je_array
[i
]));
307 super
->s_journal_area
->a_segno
= segno
;
311 static u64
read_gec(struct super_block
*sb
, u32 segno
)
313 struct logfs_segment_header sh
;
319 err
= wbuf_read(sb
, dev_ofs(sb
, segno
, 0), sizeof(sh
), &sh
);
322 crc
= logfs_crc32(&sh
, sizeof(sh
), 4);
324 WARN_ON(sh
.gec
!= cpu_to_be64(0xffffffffffffffffull
));
325 /* Most likely it was just erased */
328 return be64_to_cpu(sh
.gec
);
331 static int logfs_read_journal(struct super_block
*sb
)
333 struct logfs_super
*super
= logfs_super(sb
);
334 u64 gec
[LOGFS_JOURNAL_SEGS
], max
;
340 journal_for_each(i
) {
341 segno
= super
->s_journal_seg
[i
];
342 gec
[i
] = read_gec(sb
, super
->s_journal_seg
[i
]);
350 /* FIXME: Try older segments in case of error */
351 return logfs_read_segment(sb
, super
->s_journal_seg
[max_i
]);
355 * First search the current segment (outer loop), then pick the next segment
356 * in the array, skipping any zero entries (inner loop).
358 static void journal_get_free_segment(struct logfs_area
*area
)
360 struct logfs_super
*super
= logfs_super(area
->a_sb
);
363 journal_for_each(i
) {
364 if (area
->a_segno
!= super
->s_journal_seg
[i
])
369 if (i
== LOGFS_JOURNAL_SEGS
)
371 } while (!super
->s_journal_seg
[i
]);
373 area
->a_segno
= super
->s_journal_seg
[i
];
374 area
->a_erase_count
= ++(super
->s_journal_ec
[i
]);
375 log_journal("Journal now at %x (ec %x)\n", area
->a_segno
,
376 area
->a_erase_count
);
382 static void journal_get_erase_count(struct logfs_area
*area
)
384 /* erase count is stored globally and incremented in
385 * journal_get_free_segment() - nothing to do here */
388 static int journal_erase_segment(struct logfs_area
*area
)
390 struct super_block
*sb
= area
->a_sb
;
392 struct logfs_segment_header sh
;
393 unsigned char c
[ALIGN(sizeof(struct logfs_segment_header
), 16)];
398 err
= logfs_erase_segment(sb
, area
->a_segno
, 1);
402 memset(&u
, 0, sizeof(u
));
404 u
.sh
.type
= SEG_JOURNAL
;
406 u
.sh
.segno
= cpu_to_be32(area
->a_segno
);
407 u
.sh
.ec
= cpu_to_be32(area
->a_erase_count
);
408 u
.sh
.gec
= cpu_to_be64(logfs_super(sb
)->s_gec
);
409 u
.sh
.crc
= logfs_crc32(&u
.sh
, sizeof(u
.sh
), 4);
411 /* This causes a bug in segment.c. Not yet. */
412 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
414 ofs
= dev_ofs(sb
, area
->a_segno
, 0);
415 area
->a_used_bytes
= sizeof(u
);
416 logfs_buf_write(area
, ofs
, &u
, sizeof(u
));
420 static size_t __logfs_write_header(struct logfs_super
*super
,
421 struct logfs_journal_header
*jh
, size_t len
, size_t datalen
,
424 jh
->h_len
= cpu_to_be16(len
);
425 jh
->h_type
= cpu_to_be16(type
);
426 jh
->h_datalen
= cpu_to_be16(datalen
);
433 jh
->h_crc
= logfs_crc32(jh
, len
+ sizeof(*jh
), 4);
434 return ALIGN(len
, 16) + sizeof(*jh
);
437 static size_t logfs_write_header(struct logfs_super
*super
,
438 struct logfs_journal_header
*jh
, size_t datalen
, u16 type
)
440 size_t len
= datalen
;
442 return __logfs_write_header(super
, jh
, len
, datalen
, type
, COMPR_NONE
);
445 static inline size_t logfs_journal_erasecount_size(struct logfs_super
*super
)
447 return LOGFS_JOURNAL_SEGS
* sizeof(__be32
);
450 static void *logfs_write_erasecount(struct super_block
*sb
, void *_ec
,
451 u16
*type
, size_t *len
)
453 struct logfs_super
*super
= logfs_super(sb
);
454 struct logfs_je_journal_ec
*ec
= _ec
;
458 ec
->ec
[i
] = cpu_to_be32(super
->s_journal_ec
[i
]);
459 *type
= JE_ERASECOUNT
;
460 *len
= logfs_journal_erasecount_size(super
);
464 static void account_shadow(void *_shadow
, unsigned long _sb
, u64 ignore
,
467 struct logfs_shadow
*shadow
= _shadow
;
468 struct super_block
*sb
= (void *)_sb
;
469 struct logfs_super
*super
= logfs_super(sb
);
471 /* consume new space */
472 super
->s_free_bytes
-= shadow
->new_len
;
473 super
->s_used_bytes
+= shadow
->new_len
;
474 super
->s_dirty_used_bytes
-= shadow
->new_len
;
476 /* free up old space */
477 super
->s_free_bytes
+= shadow
->old_len
;
478 super
->s_used_bytes
-= shadow
->old_len
;
479 super
->s_dirty_free_bytes
-= shadow
->old_len
;
481 logfs_set_segment_used(sb
, shadow
->old_ofs
, -shadow
->old_len
);
482 logfs_set_segment_used(sb
, shadow
->new_ofs
, shadow
->new_len
);
484 log_journal("account_shadow(%llx, %llx, %x) %llx->%llx %x->%x\n",
485 shadow
->ino
, shadow
->bix
, shadow
->gc_level
,
486 shadow
->old_ofs
, shadow
->new_ofs
,
487 shadow
->old_len
, shadow
->new_len
);
488 mempool_free(shadow
, super
->s_shadow_pool
);
491 static void account_shadows(struct super_block
*sb
)
493 struct logfs_super
*super
= logfs_super(sb
);
494 struct inode
*inode
= super
->s_master_inode
;
495 struct logfs_inode
*li
= logfs_inode(inode
);
496 struct shadow_tree
*tree
= &super
->s_shadow_tree
;
498 btree_grim_visitor64(&tree
->new, (unsigned long)sb
, account_shadow
);
499 btree_grim_visitor64(&tree
->old
, (unsigned long)sb
, account_shadow
);
500 btree_grim_visitor32(&tree
->segment_map
, 0, NULL
);
501 tree
->no_shadowed_segments
= 0;
505 * We never actually use the structure, when attached to the
506 * master inode. But it is easier to always free it here than
507 * to have checks in several places elsewhere when allocating
510 li
->li_block
->ops
->free_block(sb
, li
->li_block
);
512 BUG_ON((s64
)li
->li_used_bytes
< 0);
515 static void *__logfs_write_anchor(struct super_block
*sb
, void *_da
,
516 u16
*type
, size_t *len
)
518 struct logfs_super
*super
= logfs_super(sb
);
519 struct logfs_je_anchor
*da
= _da
;
520 struct inode
*inode
= super
->s_master_inode
;
521 struct logfs_inode
*li
= logfs_inode(inode
);
524 da
->da_height
= li
->li_height
;
525 da
->da_last_ino
= cpu_to_be64(super
->s_last_ino
);
526 da
->da_size
= cpu_to_be64(i_size_read(inode
));
527 da
->da_used_bytes
= cpu_to_be64(li
->li_used_bytes
);
528 for (i
= 0; i
< LOGFS_EMBEDDED_FIELDS
; i
++)
529 da
->da_data
[i
] = cpu_to_be64(li
->li_data
[i
]);
535 static void *logfs_write_dynsb(struct super_block
*sb
, void *_dynsb
,
536 u16
*type
, size_t *len
)
538 struct logfs_super
*super
= logfs_super(sb
);
539 struct logfs_je_dynsb
*dynsb
= _dynsb
;
541 dynsb
->ds_gec
= cpu_to_be64(super
->s_gec
);
542 dynsb
->ds_sweeper
= cpu_to_be64(super
->s_sweeper
);
543 dynsb
->ds_victim_ino
= cpu_to_be64(super
->s_victim_ino
);
544 dynsb
->ds_rename_dir
= cpu_to_be64(super
->s_rename_dir
);
545 dynsb
->ds_rename_pos
= cpu_to_be64(super
->s_rename_pos
);
546 dynsb
->ds_used_bytes
= cpu_to_be64(super
->s_used_bytes
);
547 dynsb
->ds_generation
= cpu_to_be32(super
->s_generation
);
549 *len
= sizeof(*dynsb
);
553 static void write_wbuf(struct super_block
*sb
, struct logfs_area
*area
,
556 struct logfs_super
*super
= logfs_super(sb
);
557 struct address_space
*mapping
= super
->s_mapping_inode
->i_mapping
;
563 ofs
= dev_ofs(sb
, area
->a_segno
,
564 area
->a_used_bytes
& ~(super
->s_writesize
- 1));
565 index
= ofs
>> PAGE_SHIFT
;
566 page_ofs
= ofs
& (PAGE_SIZE
- 1);
568 page
= find_or_create_page(mapping
, index
, GFP_NOFS
);
570 memcpy(wbuf
, page_address(page
) + page_ofs
, super
->s_writesize
);
574 static void *logfs_write_area(struct super_block
*sb
, void *_a
,
575 u16
*type
, size_t *len
)
577 struct logfs_super
*super
= logfs_super(sb
);
578 struct logfs_area
*area
= super
->s_area
[super
->s_sum_index
];
579 struct logfs_je_area
*a
= _a
;
581 a
->vim
= VIM_DEFAULT
;
582 a
->gc_level
= super
->s_sum_index
;
583 a
->used_bytes
= cpu_to_be32(area
->a_used_bytes
);
584 a
->segno
= cpu_to_be32(area
->a_segno
);
585 if (super
->s_writesize
> 1)
586 write_wbuf(sb
, area
, a
+ 1);
589 *len
= sizeof(*a
) + super
->s_writesize
;
593 static void *logfs_write_commit(struct super_block
*sb
, void *h
,
594 u16
*type
, size_t *len
)
596 struct logfs_super
*super
= logfs_super(sb
);
599 *len
= super
->s_no_je
* sizeof(__be64
);
600 return super
->s_je_array
;
603 static size_t __logfs_write_je(struct super_block
*sb
, void *buf
, u16 type
,
606 struct logfs_super
*super
= logfs_super(sb
);
607 void *header
= super
->s_compressed_je
;
608 void *data
= header
+ sizeof(struct logfs_journal_header
);
609 ssize_t compr_len
, pad_len
;
610 u8 compr
= COMPR_ZLIB
;
613 return logfs_write_header(super
, header
, 0, type
);
615 compr_len
= logfs_compress(buf
, data
, len
, sb
->s_blocksize
);
616 if (compr_len
< 0 || type
== JE_ANCHOR
) {
617 memcpy(data
, buf
, len
);
622 pad_len
= ALIGN(compr_len
, 16);
623 memset(data
+ compr_len
, 0, pad_len
- compr_len
);
625 return __logfs_write_header(super
, header
, compr_len
, len
, type
, compr
);
628 static s64
logfs_get_free_bytes(struct logfs_area
*area
, size_t *bytes
,
631 u32 writesize
= logfs_super(area
->a_sb
)->s_writesize
;
635 ret
= logfs_open_area(area
, *bytes
);
639 ofs
= area
->a_used_bytes
;
640 area
->a_used_bytes
+= *bytes
;
643 area
->a_used_bytes
= ALIGN(area
->a_used_bytes
, writesize
);
644 *bytes
= area
->a_used_bytes
- ofs
;
647 return dev_ofs(area
->a_sb
, area
->a_segno
, ofs
);
650 static int logfs_write_je_buf(struct super_block
*sb
, void *buf
, u16 type
,
653 struct logfs_super
*super
= logfs_super(sb
);
654 struct logfs_area
*area
= super
->s_journal_area
;
655 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
660 len
= __logfs_write_je(sb
, buf
, type
, buf_len
);
661 if (jh
->h_type
== cpu_to_be16(JE_COMMIT
))
664 ofs
= logfs_get_free_bytes(area
, &len
, must_pad
);
667 logfs_buf_write(area
, ofs
, super
->s_compressed_je
, len
);
668 BUG_ON(super
->s_no_je
>= MAX_JOURNAL_ENTRIES
);
669 super
->s_je_array
[super
->s_no_je
++] = cpu_to_be64(ofs
);
673 static int logfs_write_je(struct super_block
*sb
,
674 void* (*write
)(struct super_block
*sb
, void *scratch
,
675 u16
*type
, size_t *len
))
681 buf
= write(sb
, logfs_super(sb
)->s_je
, &type
, &len
);
682 return logfs_write_je_buf(sb
, buf
, type
, len
);
685 int write_alias_journal(struct super_block
*sb
, u64 ino
, u64 bix
,
686 level_t level
, int child_no
, __be64 val
)
688 struct logfs_super
*super
= logfs_super(sb
);
689 struct logfs_obj_alias
*oa
= super
->s_je
;
690 int err
= 0, fill
= super
->s_je_fill
;
692 log_aliases("logfs_write_obj_aliases #%x(%llx, %llx, %x, %x) %llx\n",
693 fill
, ino
, bix
, level
, child_no
, be64_to_cpu(val
));
694 oa
[fill
].ino
= cpu_to_be64(ino
);
695 oa
[fill
].bix
= cpu_to_be64(bix
);
697 oa
[fill
].level
= (__force u8
)level
;
698 oa
[fill
].child_no
= cpu_to_be16(child_no
);
700 if (fill
>= sb
->s_blocksize
/ sizeof(*oa
)) {
701 err
= logfs_write_je_buf(sb
, oa
, JE_OBJ_ALIAS
, sb
->s_blocksize
);
705 super
->s_je_fill
= fill
;
709 static int logfs_write_obj_aliases(struct super_block
*sb
)
711 struct logfs_super
*super
= logfs_super(sb
);
714 log_journal("logfs_write_obj_aliases: %d aliases to write\n",
715 super
->s_no_object_aliases
);
716 super
->s_je_fill
= 0;
717 err
= logfs_write_obj_aliases_pagecache(sb
);
721 if (super
->s_je_fill
)
722 err
= logfs_write_je_buf(sb
, super
->s_je
, JE_OBJ_ALIAS
,
724 * sizeof(struct logfs_obj_alias
));
729 * Write all journal entries. The goto logic ensures that all journal entries
730 * are written whenever a new segment is used. It is ugly and potentially a
731 * bit wasteful, but robustness is more important. With this we can *always*
732 * erase all journal segments except the one containing the most recent commit.
734 void logfs_write_anchor(struct super_block
*sb
)
736 struct logfs_super
*super
= logfs_super(sb
);
737 struct logfs_area
*area
= super
->s_journal_area
;
740 if (!(super
->s_flags
& LOGFS_SB_FLAG_DIRTY
))
742 super
->s_flags
&= ~LOGFS_SB_FLAG_DIRTY
;
744 BUG_ON(super
->s_flags
& LOGFS_SB_FLAG_SHUTDOWN
);
745 mutex_lock(&super
->s_journal_mutex
);
747 /* Do this first or suffer corruption */
748 logfs_sync_segments(sb
);
754 if (!super
->s_area
[i
]->a_is_open
)
756 super
->s_sum_index
= i
;
757 err
= logfs_write_je(sb
, logfs_write_area
);
761 err
= logfs_write_obj_aliases(sb
);
764 err
= logfs_write_je(sb
, logfs_write_erasecount
);
767 err
= logfs_write_je(sb
, __logfs_write_anchor
);
770 err
= logfs_write_je(sb
, logfs_write_dynsb
);
774 * Order is imperative. First we sync all writes, including the
775 * non-committed journal writes. Then we write the final commit and
776 * sync the current journal segment.
777 * There is a theoretical bug here. Syncing the journal segment will
778 * write a number of journal entries and the final commit. All these
779 * are written in a single operation. If the device layer writes the
780 * data back-to-front, the commit will precede the other journal
781 * entries, leaving a race window.
782 * Two fixes are possible. Preferred is to fix the device layer to
783 * ensure writes happen front-to-back. Alternatively we can insert
784 * another logfs_sync_area() super->s_devops->sync() combo before
785 * writing the commit.
788 * On another subject, super->s_devops->sync is usually not necessary.
789 * Unless called from sys_sync or friends, a barrier would suffice.
791 super
->s_devops
->sync(sb
);
792 err
= logfs_write_je(sb
, logfs_write_commit
);
795 log_journal("Write commit to %llx\n",
796 be64_to_cpu(super
->s_je_array
[super
->s_no_je
- 1]));
797 logfs_sync_area(area
);
798 BUG_ON(area
->a_used_bytes
!= area
->a_written_bytes
);
799 super
->s_devops
->sync(sb
);
801 mutex_unlock(&super
->s_journal_mutex
);
805 void do_logfs_journal_wl_pass(struct super_block
*sb
)
807 struct logfs_super
*super
= logfs_super(sb
);
808 struct logfs_area
*area
= super
->s_journal_area
;
809 struct btree_head32
*head
= &super
->s_reserved_segments
;
813 log_journal("Journal requires wear-leveling.\n");
814 /* Drop old segments */
816 if (super
->s_journal_seg
[i
]) {
817 btree_remove32(head
, super
->s_journal_seg
[i
]);
818 logfs_set_segment_unreserved(sb
,
819 super
->s_journal_seg
[i
],
820 super
->s_journal_ec
[i
]);
821 super
->s_journal_seg
[i
] = 0;
822 super
->s_journal_ec
[i
] = 0;
824 /* Get new segments */
825 for (i
= 0; i
< super
->s_no_journal_segs
; i
++) {
826 segno
= get_best_cand(sb
, &super
->s_reserve_list
, &ec
);
827 super
->s_journal_seg
[i
] = segno
;
828 super
->s_journal_ec
[i
] = ec
;
829 logfs_set_segment_reserved(sb
, segno
);
830 err
= btree_insert32(head
, segno
, (void *)1, GFP_NOFS
);
831 BUG_ON(err
); /* mempool should prevent this */
832 err
= logfs_erase_segment(sb
, segno
, 1);
833 BUG_ON(err
); /* FIXME: remount-ro would be nicer */
835 /* Manually move journal_area */
836 freeseg(sb
, area
->a_segno
);
837 area
->a_segno
= super
->s_journal_seg
[0];
839 area
->a_used_bytes
= 0;
841 logfs_write_anchor(sb
);
842 /* Write superblocks */
843 err
= logfs_write_sb(sb
);
847 static const struct logfs_area_ops journal_area_ops
= {
848 .get_free_segment
= journal_get_free_segment
,
849 .get_erase_count
= journal_get_erase_count
,
850 .erase_segment
= journal_erase_segment
,
853 int logfs_init_journal(struct super_block
*sb
)
855 struct logfs_super
*super
= logfs_super(sb
);
856 size_t bufsize
= max_t(size_t, sb
->s_blocksize
, super
->s_writesize
)
857 + MAX_JOURNAL_HEADER
;
860 mutex_init(&super
->s_journal_mutex
);
861 btree_init_mempool32(&super
->s_reserved_segments
, super
->s_btree_pool
);
863 super
->s_je
= kzalloc(bufsize
, GFP_KERNEL
);
867 super
->s_compressed_je
= kzalloc(bufsize
, GFP_KERNEL
);
868 if (!super
->s_compressed_je
)
871 super
->s_master_inode
= logfs_new_meta_inode(sb
, LOGFS_INO_MASTER
);
872 if (IS_ERR(super
->s_master_inode
))
873 return PTR_ERR(super
->s_master_inode
);
875 ret
= logfs_read_journal(sb
);
879 reserve_sb_and_journal(sb
);
882 super
->s_journal_area
->a_ops
= &journal_area_ops
;
886 void logfs_cleanup_journal(struct super_block
*sb
)
888 struct logfs_super
*super
= logfs_super(sb
);
890 btree_grim_visitor32(&super
->s_reserved_segments
, 0, NULL
);
892 kfree(super
->s_compressed_je
);