2 * fs/logfs/journal.c - journal handling code
4 * As should be obvious for Linux kernel code, license is GPLv2
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
10 static void logfs_calc_free(struct super_block
*sb
)
12 struct logfs_super
*super
= logfs_super(sb
);
13 u64 reserve
, no_segs
= super
->s_no_segs
;
17 /* superblock segments */
19 super
->s_no_journal_segs
= 0;
22 if (super
->s_journal_seg
[i
]) {
24 super
->s_no_journal_segs
++;
27 /* open segments plus one extra per level for GC */
28 no_segs
-= 2 * super
->s_total_levels
;
30 free
= no_segs
* (super
->s_segsize
- LOGFS_SEGMENT_RESERVE
);
31 free
-= super
->s_used_bytes
;
32 /* just a bit extra */
33 free
-= super
->s_total_levels
* 4096;
35 /* Bad blocks are 'paid' for with speed reserve - the filesystem
36 * simply gets slower as bad blocks accumulate. Until the bad blocks
37 * exceed the speed reserve - then the filesystem gets smaller.
39 reserve
= super
->s_bad_segments
+ super
->s_bad_seg_reserve
;
40 reserve
*= super
->s_segsize
- LOGFS_SEGMENT_RESERVE
;
41 reserve
= max(reserve
, super
->s_speed_reserve
);
46 super
->s_free_bytes
= free
;
49 static void reserve_sb_and_journal(struct super_block
*sb
)
51 struct logfs_super
*super
= logfs_super(sb
);
52 struct btree_head32
*head
= &super
->s_reserved_segments
;
55 err
= btree_insert32(head
, seg_no(sb
, super
->s_sb_ofs
[0]), (void *)1,
59 err
= btree_insert32(head
, seg_no(sb
, super
->s_sb_ofs
[1]), (void *)1,
64 if (!super
->s_journal_seg
[i
])
66 err
= btree_insert32(head
, super
->s_journal_seg
[i
], (void *)1,
72 static void read_dynsb(struct super_block
*sb
,
73 struct logfs_je_dynsb
*dynsb
)
75 struct logfs_super
*super
= logfs_super(sb
);
77 super
->s_gec
= be64_to_cpu(dynsb
->ds_gec
);
78 super
->s_sweeper
= be64_to_cpu(dynsb
->ds_sweeper
);
79 super
->s_victim_ino
= be64_to_cpu(dynsb
->ds_victim_ino
);
80 super
->s_rename_dir
= be64_to_cpu(dynsb
->ds_rename_dir
);
81 super
->s_rename_pos
= be64_to_cpu(dynsb
->ds_rename_pos
);
82 super
->s_used_bytes
= be64_to_cpu(dynsb
->ds_used_bytes
);
83 super
->s_generation
= be32_to_cpu(dynsb
->ds_generation
);
86 static void read_anchor(struct super_block
*sb
,
87 struct logfs_je_anchor
*da
)
89 struct logfs_super
*super
= logfs_super(sb
);
90 struct inode
*inode
= super
->s_master_inode
;
91 struct logfs_inode
*li
= logfs_inode(inode
);
94 super
->s_last_ino
= be64_to_cpu(da
->da_last_ino
);
96 li
->li_height
= da
->da_height
;
97 i_size_write(inode
, be64_to_cpu(da
->da_size
));
98 li
->li_used_bytes
= be64_to_cpu(da
->da_used_bytes
);
100 for (i
= 0; i
< LOGFS_EMBEDDED_FIELDS
; i
++)
101 li
->li_data
[i
] = be64_to_cpu(da
->da_data
[i
]);
104 static void read_erasecount(struct super_block
*sb
,
105 struct logfs_je_journal_ec
*ec
)
107 struct logfs_super
*super
= logfs_super(sb
);
111 super
->s_journal_ec
[i
] = be32_to_cpu(ec
->ec
[i
]);
114 static int read_area(struct super_block
*sb
, struct logfs_je_area
*a
)
116 struct logfs_super
*super
= logfs_super(sb
);
117 struct logfs_area
*area
= super
->s_area
[a
->gc_level
];
119 u32 writemask
= ~(super
->s_writesize
- 1);
121 if (a
->gc_level
>= LOGFS_NO_AREAS
)
123 if (a
->vim
!= VIM_DEFAULT
)
124 return -EIO
; /* TODO: close area and continue */
126 area
->a_used_bytes
= be32_to_cpu(a
->used_bytes
);
127 area
->a_written_bytes
= area
->a_used_bytes
& writemask
;
128 area
->a_segno
= be32_to_cpu(a
->segno
);
132 ofs
= dev_ofs(sb
, area
->a_segno
, area
->a_written_bytes
);
133 if (super
->s_writesize
> 1)
134 logfs_buf_recover(area
, ofs
, a
+ 1, super
->s_writesize
);
136 logfs_buf_recover(area
, ofs
, NULL
, 0);
140 static void *unpack(void *from
, void *to
)
142 struct logfs_journal_header
*jh
= from
;
143 void *data
= from
+ sizeof(struct logfs_journal_header
);
145 size_t inlen
, outlen
;
147 inlen
= be16_to_cpu(jh
->h_len
);
148 outlen
= be16_to_cpu(jh
->h_datalen
);
150 if (jh
->h_compr
== COMPR_NONE
)
151 memcpy(to
, data
, inlen
);
153 err
= logfs_uncompress(data
, to
, inlen
, outlen
);
159 static int __read_je_header(struct super_block
*sb
, u64 ofs
,
160 struct logfs_journal_header
*jh
)
162 struct logfs_super
*super
= logfs_super(sb
);
163 size_t bufsize
= max_t(size_t, sb
->s_blocksize
, super
->s_writesize
)
164 + MAX_JOURNAL_HEADER
;
165 u16 type
, len
, datalen
;
168 /* read header only */
169 err
= wbuf_read(sb
, ofs
, sizeof(*jh
), jh
);
172 type
= be16_to_cpu(jh
->h_type
);
173 len
= be16_to_cpu(jh
->h_len
);
174 datalen
= be16_to_cpu(jh
->h_datalen
);
175 if (len
> sb
->s_blocksize
)
177 if ((type
< JE_FIRST
) || (type
> JE_LAST
))
179 if (datalen
> bufsize
)
184 static int __read_je_payload(struct super_block
*sb
, u64 ofs
,
185 struct logfs_journal_header
*jh
)
190 len
= be16_to_cpu(jh
->h_len
);
191 err
= wbuf_read(sb
, ofs
+ sizeof(*jh
), len
, jh
+ 1);
194 if (jh
->h_crc
!= logfs_crc32(jh
, len
+ sizeof(*jh
), 4)) {
195 /* Old code was confused. It forgot about the header length
196 * and stopped calculating the crc 16 bytes before the end
198 * FIXME: Remove this hack once the old code is fixed.
200 if (jh
->h_crc
== logfs_crc32(jh
, len
, 4))
209 * jh needs to be large enough to hold the complete entry, not just the header
211 static int __read_je(struct super_block
*sb
, u64 ofs
,
212 struct logfs_journal_header
*jh
)
216 err
= __read_je_header(sb
, ofs
, jh
);
219 return __read_je_payload(sb
, ofs
, jh
);
222 static int read_je(struct super_block
*sb
, u64 ofs
)
224 struct logfs_super
*super
= logfs_super(sb
);
225 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
226 void *scratch
= super
->s_je
;
230 err
= __read_je(sb
, ofs
, jh
);
233 type
= be16_to_cpu(jh
->h_type
);
234 datalen
= be16_to_cpu(jh
->h_datalen
);
238 read_dynsb(sb
, unpack(jh
, scratch
));
241 read_anchor(sb
, unpack(jh
, scratch
));
244 read_erasecount(sb
, unpack(jh
, scratch
));
247 read_area(sb
, unpack(jh
, scratch
));
250 err
= logfs_load_object_aliases(sb
, unpack(jh
, scratch
),
260 static int logfs_read_segment(struct super_block
*sb
, u32 segno
)
262 struct logfs_super
*super
= logfs_super(sb
);
263 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
264 u64 ofs
, seg_ofs
= dev_ofs(sb
, segno
, 0);
265 u32 h_ofs
, last_ofs
= 0;
266 u16 len
, datalen
, last_len
= 0;
269 /* search for most recent commit */
270 for (h_ofs
= 0; h_ofs
< super
->s_segsize
; h_ofs
+= sizeof(*jh
)) {
271 ofs
= seg_ofs
+ h_ofs
;
272 err
= __read_je_header(sb
, ofs
, jh
);
275 if (jh
->h_type
!= cpu_to_be16(JE_COMMIT
))
277 err
= __read_je_payload(sb
, ofs
, jh
);
280 len
= be16_to_cpu(jh
->h_len
);
281 datalen
= be16_to_cpu(jh
->h_datalen
);
282 if ((datalen
> sizeof(super
->s_je_array
)) ||
283 (datalen
% sizeof(__be64
)))
287 h_ofs
+= ALIGN(len
, sizeof(*jh
)) - sizeof(*jh
);
292 ofs
= seg_ofs
+ last_ofs
;
293 log_journal("Read commit from %llx\n", ofs
);
294 err
= __read_je(sb
, ofs
, jh
);
295 BUG_ON(err
); /* We should have caught it in the scan loop already */
299 unpack(jh
, super
->s_je_array
);
300 super
->s_no_je
= last_len
/ sizeof(__be64
);
301 /* iterate over array */
302 for (i
= 0; i
< super
->s_no_je
; i
++) {
303 err
= read_je(sb
, be64_to_cpu(super
->s_je_array
[i
]));
307 super
->s_journal_area
->a_segno
= segno
;
311 static u64
read_gec(struct super_block
*sb
, u32 segno
)
313 struct logfs_segment_header sh
;
319 err
= wbuf_read(sb
, dev_ofs(sb
, segno
, 0), sizeof(sh
), &sh
);
322 crc
= logfs_crc32(&sh
, sizeof(sh
), 4);
324 WARN_ON(sh
.gec
!= cpu_to_be64(0xffffffffffffffffull
));
325 /* Most likely it was just erased */
328 return be64_to_cpu(sh
.gec
);
331 static int logfs_read_journal(struct super_block
*sb
)
333 struct logfs_super
*super
= logfs_super(sb
);
334 u64 gec
[LOGFS_JOURNAL_SEGS
], max
;
340 journal_for_each(i
) {
341 segno
= super
->s_journal_seg
[i
];
342 gec
[i
] = read_gec(sb
, super
->s_journal_seg
[i
]);
350 /* FIXME: Try older segments in case of error */
351 return logfs_read_segment(sb
, super
->s_journal_seg
[max_i
]);
355 * First search the current segment (outer loop), then pick the next segment
356 * in the array, skipping any zero entries (inner loop).
358 static void journal_get_free_segment(struct logfs_area
*area
)
360 struct logfs_super
*super
= logfs_super(area
->a_sb
);
363 journal_for_each(i
) {
364 if (area
->a_segno
!= super
->s_journal_seg
[i
])
369 if (i
== LOGFS_JOURNAL_SEGS
)
371 } while (!super
->s_journal_seg
[i
]);
373 area
->a_segno
= super
->s_journal_seg
[i
];
374 area
->a_erase_count
= ++(super
->s_journal_ec
[i
]);
375 log_journal("Journal now at %x (ec %x)\n", area
->a_segno
,
376 area
->a_erase_count
);
382 static void journal_get_erase_count(struct logfs_area
*area
)
384 /* erase count is stored globally and incremented in
385 * journal_get_free_segment() - nothing to do here */
388 static int journal_erase_segment(struct logfs_area
*area
)
390 struct super_block
*sb
= area
->a_sb
;
391 struct logfs_segment_header sh
;
395 err
= logfs_erase_segment(sb
, area
->a_segno
, 1);
400 sh
.type
= SEG_JOURNAL
;
402 sh
.segno
= cpu_to_be32(area
->a_segno
);
403 sh
.ec
= cpu_to_be32(area
->a_erase_count
);
404 sh
.gec
= cpu_to_be64(logfs_super(sb
)->s_gec
);
405 sh
.crc
= logfs_crc32(&sh
, sizeof(sh
), 4);
407 /* This causes a bug in segment.c. Not yet. */
408 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
410 ofs
= dev_ofs(sb
, area
->a_segno
, 0);
411 area
->a_used_bytes
= ALIGN(sizeof(sh
), 16);
412 logfs_buf_write(area
, ofs
, &sh
, sizeof(sh
));
416 static size_t __logfs_write_header(struct logfs_super
*super
,
417 struct logfs_journal_header
*jh
, size_t len
, size_t datalen
,
420 jh
->h_len
= cpu_to_be16(len
);
421 jh
->h_type
= cpu_to_be16(type
);
422 jh
->h_datalen
= cpu_to_be16(datalen
);
429 jh
->h_crc
= logfs_crc32(jh
, len
+ sizeof(*jh
), 4);
430 return ALIGN(len
, 16) + sizeof(*jh
);
433 static size_t logfs_write_header(struct logfs_super
*super
,
434 struct logfs_journal_header
*jh
, size_t datalen
, u16 type
)
436 size_t len
= datalen
;
438 return __logfs_write_header(super
, jh
, len
, datalen
, type
, COMPR_NONE
);
441 static inline size_t logfs_journal_erasecount_size(struct logfs_super
*super
)
443 return LOGFS_JOURNAL_SEGS
* sizeof(__be32
);
446 static void *logfs_write_erasecount(struct super_block
*sb
, void *_ec
,
447 u16
*type
, size_t *len
)
449 struct logfs_super
*super
= logfs_super(sb
);
450 struct logfs_je_journal_ec
*ec
= _ec
;
454 ec
->ec
[i
] = cpu_to_be32(super
->s_journal_ec
[i
]);
455 *type
= JE_ERASECOUNT
;
456 *len
= logfs_journal_erasecount_size(super
);
460 static void account_shadow(void *_shadow
, unsigned long _sb
, u64 ignore
,
463 struct logfs_shadow
*shadow
= _shadow
;
464 struct super_block
*sb
= (void *)_sb
;
465 struct logfs_super
*super
= logfs_super(sb
);
467 /* consume new space */
468 super
->s_free_bytes
-= shadow
->new_len
;
469 super
->s_used_bytes
+= shadow
->new_len
;
470 super
->s_dirty_used_bytes
-= shadow
->new_len
;
472 /* free up old space */
473 super
->s_free_bytes
+= shadow
->old_len
;
474 super
->s_used_bytes
-= shadow
->old_len
;
475 super
->s_dirty_free_bytes
-= shadow
->old_len
;
477 logfs_set_segment_used(sb
, shadow
->old_ofs
, -shadow
->old_len
);
478 logfs_set_segment_used(sb
, shadow
->new_ofs
, shadow
->new_len
);
480 log_journal("account_shadow(%llx, %llx, %x) %llx->%llx %x->%x\n",
481 shadow
->ino
, shadow
->bix
, shadow
->gc_level
,
482 shadow
->old_ofs
, shadow
->new_ofs
,
483 shadow
->old_len
, shadow
->new_len
);
484 mempool_free(shadow
, super
->s_shadow_pool
);
487 static void account_shadows(struct super_block
*sb
)
489 struct logfs_super
*super
= logfs_super(sb
);
490 struct inode
*inode
= super
->s_master_inode
;
491 struct logfs_inode
*li
= logfs_inode(inode
);
492 struct shadow_tree
*tree
= &super
->s_shadow_tree
;
494 btree_grim_visitor64(&tree
->new, (unsigned long)sb
, account_shadow
);
495 btree_grim_visitor64(&tree
->old
, (unsigned long)sb
, account_shadow
);
499 * We never actually use the structure, when attached to the
500 * master inode. But it is easier to always free it here than
501 * to have checks in several places elsewhere when allocating
504 li
->li_block
->ops
->free_block(sb
, li
->li_block
);
506 BUG_ON((s64
)li
->li_used_bytes
< 0);
509 static void *__logfs_write_anchor(struct super_block
*sb
, void *_da
,
510 u16
*type
, size_t *len
)
512 struct logfs_super
*super
= logfs_super(sb
);
513 struct logfs_je_anchor
*da
= _da
;
514 struct inode
*inode
= super
->s_master_inode
;
515 struct logfs_inode
*li
= logfs_inode(inode
);
518 da
->da_height
= li
->li_height
;
519 da
->da_last_ino
= cpu_to_be64(super
->s_last_ino
);
520 da
->da_size
= cpu_to_be64(i_size_read(inode
));
521 da
->da_used_bytes
= cpu_to_be64(li
->li_used_bytes
);
522 for (i
= 0; i
< LOGFS_EMBEDDED_FIELDS
; i
++)
523 da
->da_data
[i
] = cpu_to_be64(li
->li_data
[i
]);
529 static void *logfs_write_dynsb(struct super_block
*sb
, void *_dynsb
,
530 u16
*type
, size_t *len
)
532 struct logfs_super
*super
= logfs_super(sb
);
533 struct logfs_je_dynsb
*dynsb
= _dynsb
;
535 dynsb
->ds_gec
= cpu_to_be64(super
->s_gec
);
536 dynsb
->ds_sweeper
= cpu_to_be64(super
->s_sweeper
);
537 dynsb
->ds_victim_ino
= cpu_to_be64(super
->s_victim_ino
);
538 dynsb
->ds_rename_dir
= cpu_to_be64(super
->s_rename_dir
);
539 dynsb
->ds_rename_pos
= cpu_to_be64(super
->s_rename_pos
);
540 dynsb
->ds_used_bytes
= cpu_to_be64(super
->s_used_bytes
);
541 dynsb
->ds_generation
= cpu_to_be32(super
->s_generation
);
543 *len
= sizeof(*dynsb
);
547 static void write_wbuf(struct super_block
*sb
, struct logfs_area
*area
,
550 struct logfs_super
*super
= logfs_super(sb
);
551 struct address_space
*mapping
= super
->s_mapping_inode
->i_mapping
;
557 ofs
= dev_ofs(sb
, area
->a_segno
,
558 area
->a_used_bytes
& ~(super
->s_writesize
- 1));
559 index
= ofs
>> PAGE_SHIFT
;
560 page_ofs
= ofs
& (PAGE_SIZE
- 1);
562 page
= find_lock_page(mapping
, index
);
564 memcpy(wbuf
, page_address(page
) + page_ofs
, super
->s_writesize
);
568 static void *logfs_write_area(struct super_block
*sb
, void *_a
,
569 u16
*type
, size_t *len
)
571 struct logfs_super
*super
= logfs_super(sb
);
572 struct logfs_area
*area
= super
->s_area
[super
->s_sum_index
];
573 struct logfs_je_area
*a
= _a
;
575 a
->vim
= VIM_DEFAULT
;
576 a
->gc_level
= super
->s_sum_index
;
577 a
->used_bytes
= cpu_to_be32(area
->a_used_bytes
);
578 a
->segno
= cpu_to_be32(area
->a_segno
);
579 if (super
->s_writesize
> 1)
580 write_wbuf(sb
, area
, a
+ 1);
583 *len
= sizeof(*a
) + super
->s_writesize
;
587 static void *logfs_write_commit(struct super_block
*sb
, void *h
,
588 u16
*type
, size_t *len
)
590 struct logfs_super
*super
= logfs_super(sb
);
593 *len
= super
->s_no_je
* sizeof(__be64
);
594 return super
->s_je_array
;
597 static size_t __logfs_write_je(struct super_block
*sb
, void *buf
, u16 type
,
600 struct logfs_super
*super
= logfs_super(sb
);
601 void *header
= super
->s_compressed_je
;
602 void *data
= header
+ sizeof(struct logfs_journal_header
);
603 ssize_t compr_len
, pad_len
;
604 u8 compr
= COMPR_ZLIB
;
607 return logfs_write_header(super
, header
, 0, type
);
609 compr_len
= logfs_compress(buf
, data
, len
, sb
->s_blocksize
);
610 if (compr_len
< 0 || type
== JE_ANCHOR
) {
611 BUG_ON(len
> sb
->s_blocksize
);
612 memcpy(data
, buf
, len
);
617 pad_len
= ALIGN(compr_len
, 16);
618 memset(data
+ compr_len
, 0, pad_len
- compr_len
);
620 return __logfs_write_header(super
, header
, compr_len
, len
, type
, compr
);
623 static s64
logfs_get_free_bytes(struct logfs_area
*area
, size_t *bytes
,
626 u32 writesize
= logfs_super(area
->a_sb
)->s_writesize
;
630 ret
= logfs_open_area(area
, *bytes
);
634 ofs
= area
->a_used_bytes
;
635 area
->a_used_bytes
+= *bytes
;
638 area
->a_used_bytes
= ALIGN(area
->a_used_bytes
, writesize
);
639 *bytes
= area
->a_used_bytes
- ofs
;
642 return dev_ofs(area
->a_sb
, area
->a_segno
, ofs
);
645 static int logfs_write_je_buf(struct super_block
*sb
, void *buf
, u16 type
,
648 struct logfs_super
*super
= logfs_super(sb
);
649 struct logfs_area
*area
= super
->s_journal_area
;
650 struct logfs_journal_header
*jh
= super
->s_compressed_je
;
655 len
= __logfs_write_je(sb
, buf
, type
, buf_len
);
656 if (jh
->h_type
== cpu_to_be16(JE_COMMIT
))
659 ofs
= logfs_get_free_bytes(area
, &len
, must_pad
);
662 logfs_buf_write(area
, ofs
, super
->s_compressed_je
, len
);
663 super
->s_je_array
[super
->s_no_je
++] = cpu_to_be64(ofs
);
667 static int logfs_write_je(struct super_block
*sb
,
668 void* (*write
)(struct super_block
*sb
, void *scratch
,
669 u16
*type
, size_t *len
))
675 buf
= write(sb
, logfs_super(sb
)->s_je
, &type
, &len
);
676 return logfs_write_je_buf(sb
, buf
, type
, len
);
679 int write_alias_journal(struct super_block
*sb
, u64 ino
, u64 bix
,
680 level_t level
, int child_no
, __be64 val
)
682 struct logfs_super
*super
= logfs_super(sb
);
683 struct logfs_obj_alias
*oa
= super
->s_je
;
684 int err
= 0, fill
= super
->s_je_fill
;
686 log_aliases("logfs_write_obj_aliases #%x(%llx, %llx, %x, %x) %llx\n",
687 fill
, ino
, bix
, level
, child_no
, be64_to_cpu(val
));
688 oa
[fill
].ino
= cpu_to_be64(ino
);
689 oa
[fill
].bix
= cpu_to_be64(bix
);
691 oa
[fill
].level
= (__force u8
)level
;
692 oa
[fill
].child_no
= cpu_to_be16(child_no
);
694 if (fill
>= sb
->s_blocksize
/ sizeof(*oa
)) {
695 err
= logfs_write_je_buf(sb
, oa
, JE_OBJ_ALIAS
, sb
->s_blocksize
);
699 super
->s_je_fill
= fill
;
703 static int logfs_write_obj_aliases(struct super_block
*sb
)
705 struct logfs_super
*super
= logfs_super(sb
);
708 log_journal("logfs_write_obj_aliases: %d aliases to write\n",
709 super
->s_no_object_aliases
);
710 super
->s_je_fill
= 0;
711 err
= logfs_write_obj_aliases_pagecache(sb
);
715 if (super
->s_je_fill
)
716 err
= logfs_write_je_buf(sb
, super
->s_je
, JE_OBJ_ALIAS
,
718 * sizeof(struct logfs_obj_alias
));
723 * Write all journal entries. The goto logic ensures that all journal entries
724 * are written whenever a new segment is used. It is ugly and potentially a
725 * bit wasteful, but robustness is more important. With this we can *always*
726 * erase all journal segments except the one containing the most recent commit.
728 void logfs_write_anchor(struct super_block
*sb
)
730 struct logfs_super
*super
= logfs_super(sb
);
731 struct logfs_area
*area
= super
->s_journal_area
;
734 if (!(super
->s_flags
& LOGFS_SB_FLAG_DIRTY
))
736 super
->s_flags
&= ~LOGFS_SB_FLAG_DIRTY
;
738 BUG_ON(super
->s_flags
& LOGFS_SB_FLAG_SHUTDOWN
);
739 mutex_lock(&super
->s_journal_mutex
);
741 /* Do this first or suffer corruption */
742 logfs_sync_segments(sb
);
748 if (!super
->s_area
[i
]->a_is_open
)
750 super
->s_sum_index
= i
;
751 err
= logfs_write_je(sb
, logfs_write_area
);
755 err
= logfs_write_obj_aliases(sb
);
758 err
= logfs_write_je(sb
, logfs_write_erasecount
);
761 err
= logfs_write_je(sb
, __logfs_write_anchor
);
764 err
= logfs_write_je(sb
, logfs_write_dynsb
);
768 * Order is imperative. First we sync all writes, including the
769 * non-committed journal writes. Then we write the final commit and
770 * sync the current journal segment.
771 * There is a theoretical bug here. Syncing the journal segment will
772 * write a number of journal entries and the final commit. All these
773 * are written in a single operation. If the device layer writes the
774 * data back-to-front, the commit will precede the other journal
775 * entries, leaving a race window.
776 * Two fixes are possible. Preferred is to fix the device layer to
777 * ensure writes happen front-to-back. Alternatively we can insert
778 * another logfs_sync_area() super->s_devops->sync() combo before
779 * writing the commit.
782 * On another subject, super->s_devops->sync is usually not necessary.
783 * Unless called from sys_sync or friends, a barrier would suffice.
785 super
->s_devops
->sync(sb
);
786 err
= logfs_write_je(sb
, logfs_write_commit
);
789 log_journal("Write commit to %llx\n",
790 be64_to_cpu(super
->s_je_array
[super
->s_no_je
- 1]));
791 logfs_sync_area(area
);
792 BUG_ON(area
->a_used_bytes
!= area
->a_written_bytes
);
793 super
->s_devops
->sync(sb
);
795 mutex_unlock(&super
->s_journal_mutex
);
799 void do_logfs_journal_wl_pass(struct super_block
*sb
)
801 struct logfs_super
*super
= logfs_super(sb
);
802 struct logfs_area
*area
= super
->s_journal_area
;
806 log_journal("Journal requires wear-leveling.\n");
807 /* Drop old segments */
809 if (super
->s_journal_seg
[i
]) {
810 logfs_set_segment_unreserved(sb
,
811 super
->s_journal_seg
[i
],
812 super
->s_journal_ec
[i
]);
813 super
->s_journal_seg
[i
] = 0;
814 super
->s_journal_ec
[i
] = 0;
816 /* Get new segments */
817 for (i
= 0; i
< super
->s_no_journal_segs
; i
++) {
818 segno
= get_best_cand(sb
, &super
->s_reserve_list
, &ec
);
819 super
->s_journal_seg
[i
] = segno
;
820 super
->s_journal_ec
[i
] = ec
;
821 logfs_set_segment_reserved(sb
, segno
);
823 /* Manually move journal_area */
824 area
->a_segno
= super
->s_journal_seg
[0];
826 area
->a_used_bytes
= 0;
828 logfs_write_anchor(sb
);
829 /* Write superblocks */
830 err
= logfs_write_sb(sb
);
834 static const struct logfs_area_ops journal_area_ops
= {
835 .get_free_segment
= journal_get_free_segment
,
836 .get_erase_count
= journal_get_erase_count
,
837 .erase_segment
= journal_erase_segment
,
840 int logfs_init_journal(struct super_block
*sb
)
842 struct logfs_super
*super
= logfs_super(sb
);
843 size_t bufsize
= max_t(size_t, sb
->s_blocksize
, super
->s_writesize
)
844 + MAX_JOURNAL_HEADER
;
847 mutex_init(&super
->s_journal_mutex
);
848 btree_init_mempool32(&super
->s_reserved_segments
, super
->s_btree_pool
);
850 super
->s_je
= kzalloc(bufsize
, GFP_KERNEL
);
854 super
->s_compressed_je
= kzalloc(bufsize
, GFP_KERNEL
);
855 if (!super
->s_compressed_je
)
858 super
->s_master_inode
= logfs_new_meta_inode(sb
, LOGFS_INO_MASTER
);
859 if (IS_ERR(super
->s_master_inode
))
860 return PTR_ERR(super
->s_master_inode
);
862 ret
= logfs_read_journal(sb
);
866 reserve_sb_and_journal(sb
);
869 super
->s_journal_area
->a_ops
= &journal_area_ops
;
873 void logfs_cleanup_journal(struct super_block
*sb
)
875 struct logfs_super
*super
= logfs_super(sb
);
877 btree_grim_visitor32(&super
->s_reserved_segments
, 0, NULL
);
878 destroy_meta_inode(super
->s_master_inode
);
879 super
->s_master_inode
= NULL
;
881 kfree(super
->s_compressed_je
);