1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
5 * Copyright (C) 2000 Red Hat, Inc.
6 * Copyright (C) 2000 Los Alamos National Laboratory
7 * Copyright (C) 2000 TurboLinux, Inc.
8 * Copyright (C) 2001 Mountain View Data, Inc.
9 * Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
11 * This file is part of InterMezzo, http://www.inter-mezzo.org.
13 * InterMezzo is free software; you can redistribute it and/or
14 * modify it under the terms of version 2 of the GNU General Public
15 * License as published by the Free Software Foundation.
17 * InterMezzo is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with InterMezzo; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include <linux/types.h>
28 #include <linux/param.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <linux/locks.h>
37 #include <asm/segment.h>
38 #include <asm/uaccess.h>
39 #include <linux/string.h>
40 #include <linux/smp_lock.h>
41 #if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
42 #include <linux/jbd.h>
43 #include <linux/ext3_fs.h>
44 #include <linux/ext3_jbd.h>
47 #include <linux/intermezzo_fs.h>
48 #include <linux/intermezzo_psdev.h>
50 #if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
52 #define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
53 #define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
55 /* space requirements:
57 used to truncate the KML forward to next fset->chunksize boundary
61 write header (< one block)
62 write one path (< MAX_PATHLEN)
63 possibly write another path (< MAX_PATHLEN)
64 write suffix (< one block)
65 presto_update_last_rcvd
69 static loff_t
presto_e3_freespace(struct presto_cache
*cache
,
70 struct super_block
*sb
)
72 loff_t freebl
= le32_to_cpu(sb
->u
.ext3_sb
.s_es
->s_free_blocks_count
);
73 loff_t avail
= freebl
-
74 le32_to_cpu(sb
->u
.ext3_sb
.s_es
->s_r_blocks_count
);
75 return (avail
<< EXT3_BLOCK_SIZE_BITS(sb
));
78 /* start the filesystem journal operations */
79 static void *presto_e3_trans_start(struct presto_file_set
*fset
,
84 int trunc_blks
, one_path_blks
, extra_path_blks
,
85 extra_name_blks
, lml_blks
;
86 __u32 avail_kmlblocks
;
89 if ( presto_no_journal(fset
) ||
90 strcmp(fset
->fset_cache
->cache_type
, "ext3"))
92 CDEBUG(D_JOURNAL
, "got cache_type \"%s\"\n",
93 fset
->fset_cache
->cache_type
);
97 avail_kmlblocks
= inode
->i_sb
->u
.ext3_sb
.s_es
->s_free_blocks_count
;
99 if ( avail_kmlblocks
< 3 ) {
100 return ERR_PTR(-ENOSPC
);
103 if ( (op
!= KML_OPCODE_UNLINK
&& op
!= KML_OPCODE_RMDIR
)
104 && avail_kmlblocks
< 6 ) {
105 return ERR_PTR(-ENOSPC
);
108 /* Need journal space for:
109 at least three writes to KML (two one block writes, one a path)
110 possibly a second name (unlink, rmdir)
111 possibly a second path (symlink, rename)
112 a one block write to the last rcvd file
115 trunc_blks
= EXT3_DATA_TRANS_BLOCKS
+ 1;
116 one_path_blks
= 4*EXT3_DATA_TRANS_BLOCKS
+ MAX_PATH_BLOCKS(inode
) + 3;
117 lml_blks
= 4*EXT3_DATA_TRANS_BLOCKS
+ MAX_PATH_BLOCKS(inode
) + 2;
118 extra_path_blks
= EXT3_DATA_TRANS_BLOCKS
+ MAX_PATH_BLOCKS(inode
);
119 extra_name_blks
= EXT3_DATA_TRANS_BLOCKS
+ MAX_NAME_BLOCKS(inode
);
121 /* additional blocks appear for "two pathname" operations
122 and operations involving the LML records
125 case KML_OPCODE_TRUNC
:
126 jblocks
= one_path_blks
+ extra_name_blks
+ trunc_blks
127 + EXT3_DELETE_TRANS_BLOCKS
;
129 case KML_OPCODE_KML_TRUNC
:
130 /* Hopefully this is a little better, but I'm still mostly
133 jblocks
= extra_name_blks
+ trunc_blks
+
134 EXT3_DELETE_TRANS_BLOCKS
+ 2;
137 jblocks
+= extra_name_blks
+ trunc_blks
+
138 EXT3_DELETE_TRANS_BLOCKS
+ 2;
141 jblocks
+= 2 * extra_path_blks
+ trunc_blks
+
142 2 * EXT3_DATA_TRANS_BLOCKS
+ 2 + 3;
145 jblocks
+= 2 * extra_path_blks
+ trunc_blks
+
146 2 * EXT3_DATA_TRANS_BLOCKS
+ 2 + 3;
148 case KML_OPCODE_RELEASE
:
150 jblocks = one_path_blks + lml_blks + 2*trunc_blks;
152 jblocks
= one_path_blks
;
154 case KML_OPCODE_SETATTR
:
155 jblocks
= one_path_blks
+ trunc_blks
+ 1 ;
157 case KML_OPCODE_CREATE
:
158 jblocks
= one_path_blks
+ trunc_blks
159 + EXT3_DATA_TRANS_BLOCKS
+ 3 + 2;
161 case KML_OPCODE_LINK
:
162 jblocks
= one_path_blks
+ trunc_blks
163 + EXT3_DATA_TRANS_BLOCKS
+ 2;
165 case KML_OPCODE_UNLINK
:
166 jblocks
= one_path_blks
+ extra_name_blks
+ trunc_blks
167 + EXT3_DELETE_TRANS_BLOCKS
+ 2;
169 case KML_OPCODE_SYMLINK
:
170 jblocks
= one_path_blks
+ extra_path_blks
+ trunc_blks
171 + EXT3_DATA_TRANS_BLOCKS
+ 5;
173 case KML_OPCODE_MKDIR
:
174 jblocks
= one_path_blks
+ trunc_blks
175 + EXT3_DATA_TRANS_BLOCKS
+ 4 + 2;
177 case KML_OPCODE_RMDIR
:
178 jblocks
= one_path_blks
+ extra_name_blks
+ trunc_blks
179 + EXT3_DELETE_TRANS_BLOCKS
+ 1;
181 case KML_OPCODE_MKNOD
:
182 jblocks
= one_path_blks
+ trunc_blks
+
183 EXT3_DATA_TRANS_BLOCKS
+ 3 + 2;
185 case KML_OPCODE_RENAME
:
186 jblocks
= one_path_blks
+ extra_path_blks
+ trunc_blks
+
187 2 * EXT3_DATA_TRANS_BLOCKS
+ 2 + 3;
189 case KML_OPCODE_WRITE
:
190 jblocks
= one_path_blks
;
191 /* add this when we can wrap our transaction with
192 that of ext3_file_write (ordered writes)
193 + EXT3_DATA_TRANS_BLOCKS;
197 CDEBUG(D_JOURNAL
, "invalid operation %d for journal\n", op
);
201 CDEBUG(D_JOURNAL
, "creating journal handle (%d blocks) for op %d\n",
203 /* journal_start/stop does not do its own locking while updating
204 * the handle/transaction information. Hence we create our own
205 * critical section to protect these calls. -SHP
208 handle
= journal_start(EXT3_JOURNAL(inode
), jblocks
);
213 static void presto_e3_trans_commit(struct presto_file_set
*fset
, void *handle
)
215 if ( presto_no_journal(fset
) || !handle
)
218 /* See comments before journal_start above. -SHP */
220 journal_stop(handle
);
224 static void presto_e3_journal_file_data(struct inode
*inode
)
226 #ifdef EXT3_JOURNAL_DATA_FL
227 inode
->u
.ext3_i
.i_flags
|= EXT3_JOURNAL_DATA_FL
;
229 #warning You must have a facility to enable journaled writes for recovery!
233 /* The logic here is a slightly modified version of ext3/inode.c:block_to_path
235 static int presto_e3_has_all_data(struct inode
*inode
)
237 int ptrs
= EXT3_ADDR_PER_BLOCK(inode
->i_sb
);
238 int ptrs_bits
= EXT3_ADDR_PER_BLOCK_BITS(inode
->i_sb
);
239 const long direct_blocks
= EXT3_NDIR_BLOCKS
,
240 indirect_blocks
= ptrs
,
241 double_blocks
= (1 << (ptrs_bits
* 2));
242 long block
= (inode
->i_size
+ inode
->i_sb
->s_blocksize
- 1) >>
243 inode
->i_sb
->s_blocksize_bits
;
247 if (inode
->i_size
== 0) {
252 if (block
< direct_blocks
) {
253 /* No indirect blocks, no problem. */
254 } else if (block
< indirect_blocks
+ direct_blocks
) {
256 } else if (block
< double_blocks
+ indirect_blocks
+ direct_blocks
) {
258 } else if (((block
- double_blocks
- indirect_blocks
- direct_blocks
)
259 >> (ptrs_bits
* 2)) < ptrs
) {
263 block
*= (inode
->i_sb
->s_blocksize
/ 512);
265 CDEBUG(D_CACHE
, "Need %ld blocks, have %ld.\n", block
, inode
->i_blocks
);
267 if (block
> inode
->i_blocks
) {
276 struct journal_ops presto_ext3_journal_ops
= {
277 .tr_all_data
= presto_e3_has_all_data
,
278 .tr_avail
= presto_e3_freespace
,
279 .tr_start
= presto_e3_trans_start
,
280 .tr_commit
= presto_e3_trans_commit
,
281 .tr_journal_data
= presto_e3_journal_file_data
,
282 .tr_ilookup
= presto_iget_ilookup
285 #endif /* CONFIG_EXT3_FS */