add patch add-i_raw_lock
[ext4-patch-queue.git] / mballoc-allocate-larger-extents
blobfd2bafb1e2857f8d8486781fe64ff38bb51a3ca6
1 ext4: allocate larger contiguous extents using mballoc
3 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
5 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
6 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
7 ---
8 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
9 index ed8482e..9745b84 100644
10 --- a/fs/ext4/mballoc.c
11 +++ b/fs/ext4/mballoc.c
12 @@ -633,7 +633,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
14         BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
16 -       border = 2 << sb->s_blocksize_bits;
17 +       border = 1 << (sb->s_blocksize_bits + 1);
19         while (len > 0) {
20                 /* find how many blocks can be covered since this position */
21 @@ -3063,8 +3063,10 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
22  ext4_mb_normalize_request(struct ext4_allocation_context *ac,
23                                 struct ext4_allocation_request *ar)
24  {
25 -       int bsbits, max;
26 +       loff_t max;
27         ext4_lblk_t end;
28 +       int bsbits, chunk_blks;
29 +       unsigned int s_mb_stream_request;
30         loff_t size, orig_size, start_off;
31         ext4_lblk_t start, orig_start;
32         struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
33 @@ -3090,54 +3092,61 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
34         }
36         bsbits = ac->ac_sb->s_blocksize_bits;
37 +       s_mb_stream_request = EXT4_SB(ac->ac_sb)->s_mb_stream_request;
38 +       /* make sure this is power of 2 */
39 +       s_mb_stream_request =
40 +               roundup_pow_of_two((unsigned long)s_mb_stream_request);
42         /* first, let's learn actual file size
43          * given current request is allocated */
44         size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
45 -       size = size << bsbits;
46 -       if (size < i_size_read(ac->ac_inode))
47 -               size = i_size_read(ac->ac_inode);
49 -       /* max size of free chunks */
50 -       max = 2 << bsbits;
51 +       if (size < (i_size_read(ac->ac_inode) >> bsbits))
52 +               size = i_size_read(ac->ac_inode) >> bsbits;
53 +       /*
54 +        * max free chunk blocks.
55 +        * (max buddy cache order is (bsbits + 1).
56 +        */
57 +       max = 1 << (bsbits + 1);
59 -#define NRL_CHECK_SIZE(req, size, max, chunk_size)     \
60 -               (req <= (size) || max <= (chunk_size))
61 +       /*
62 +        * If buddy cache says it can have more than
63 +        * blocks per group then limit to blocks per group.
64 +        */
65 +       if (max > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
66 +               max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
68         /* first, try to predict filesize */
69         /* XXX: should this table be tunable? */
70 -       start_off = 0;
71 -       if (size <= 16 * 1024) {
72 -               size = 16 * 1024;
73 -       } else if (size <= 32 * 1024) {
74 -               size = 32 * 1024;
75 -       } else if (size <= 64 * 1024) {
76 -               size = 64 * 1024;
77 -       } else if (size <= 128 * 1024) {
78 -               size = 128 * 1024;
79 -       } else if (size <= 256 * 1024) {
80 -               size = 256 * 1024;
81 -       } else if (size <= 512 * 1024) {
82 -               size = 512 * 1024;
83 -       } else if (size <= 1024 * 1024) {
84 -               size = 1024 * 1024;
85 -       } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
86 -               start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
87 -                                               (21 - bsbits)) << 21;
88 -               size = 2 * 1024 * 1024;
89 -       } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
90 -               start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
91 -                                                       (22 - bsbits)) << 22;
92 -               size = 4 * 1024 * 1024;
93 -       } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
94 -                                       (8<<20)>>bsbits, max, 8 * 1024)) {
95 -               start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
96 -                                                       (23 - bsbits)) << 23;
97 -               size = 8 * 1024 * 1024;
98 -       } else {
99 -               start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
100 -               size      = ac->ac_o_ex.fe_len << bsbits;
101 +       /*
102 +        * less than s_mb_stream_request is using
103 +        * locality group preallocation
104 +        */
105 +       if (size <= s_mb_stream_request) {
106 +               size = s_mb_stream_request << bsbits;
107 +               goto found_size;
108 +       }
109 +       chunk_blks = s_mb_stream_request << 1;
110 +       while (1) {
111 +               if (size <= chunk_blks) {
112 +                       if (max <= chunk_blks)
113 +                               size = max << bsbits;
114 +                       else
115 +                               size = chunk_blks << bsbits;
116 +                       break;
117 +               }
118 +               chunk_blks = chunk_blks << 1;
119         }
121 +found_size:
122 +#if 0
123 +       /* Will i end up requesting for less that what i asked for ? */
124 +       start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits) & ~(size - 1);
125 +       start_off = start_off * size;
126 +#else
127 +       start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits);
128 +#endif
130 +       /* convert into blocks */
131         orig_size = size = size >> bsbits;
132         orig_start = start = start_off >> bsbits;
134 @@ -3216,6 +3225,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
135         }
136         BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
137                         start > ac->ac_o_ex.fe_logical);
139 +       if (size <= 0 ||  size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
140 +               printk(KERN_ALERT "size is %ld orig size is %ld\n", (long)size, (long)orig_size);
142         BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
144         /* now prepare goal request */
146 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
147 the body of a message to majordomo@vger.kernel.org
148 More majordomo info at  http://vger.kernel.org/majordomo-info.html