2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #include <linux/bio.h>
17 #include <linux/dst.h>
18 #include <linux/slab.h>
19 #include <linux/mempool.h>
22 * Transaction memory pool size.
24 static int dst_mempool_num
= 32;
25 module_param(dst_mempool_num
, int, 0644);
28 * Transaction tree management.
30 static inline int dst_trans_cmp(dst_gen_t gen
, dst_gen_t
new)
39 struct dst_trans
*dst_trans_search(struct dst_node
*node
, dst_gen_t gen
)
41 struct rb_root
*root
= &node
->trans_root
;
42 struct rb_node
*n
= root
->rb_node
;
43 struct dst_trans
*t
, *ret
= NULL
;
47 t
= rb_entry(n
, struct dst_trans
, trans_entry
);
49 cmp
= dst_trans_cmp(t
->gen
, gen
);
60 dprintk("%s: %s transaction: id: %llu.\n", __func__
,
61 (ret
) ? "found" : "not found", gen
);
66 static int dst_trans_insert(struct dst_trans
*new)
68 struct rb_root
*root
= &new->n
->trans_root
;
69 struct rb_node
**n
= &root
->rb_node
, *parent
= NULL
;
70 struct dst_trans
*ret
= NULL
, *t
;
76 t
= rb_entry(parent
, struct dst_trans
, trans_entry
);
78 cmp
= dst_trans_cmp(t
->gen
, new->gen
);
82 n
= &parent
->rb_right
;
89 new->send_time
= jiffies
;
91 printk(KERN_DEBUG
"%s: exist: old: gen: %llu, bio: %llu/%u, "
92 "send_time: %lu, new: gen: %llu, bio: %llu/%u, "
93 "send_time: %lu.\n", __func__
,
94 ret
->gen
, (u64
)ret
->bio
->bi_sector
,
95 ret
->bio
->bi_size
, ret
->send_time
,
96 new->gen
, (u64
)new->bio
->bi_sector
,
97 new->bio
->bi_size
, new->send_time
);
101 rb_link_node(&new->trans_entry
, parent
, n
);
102 rb_insert_color(&new->trans_entry
, root
);
104 dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n",
105 __func__
, new->gen
, (u64
)new->bio
->bi_sector
,
106 new->bio
->bi_size
, new->send_time
);
111 int dst_trans_remove_nolock(struct dst_trans
*t
)
113 struct dst_node
*n
= t
->n
;
115 if (t
->trans_entry
.rb_parent_color
) {
116 rb_erase(&t
->trans_entry
, &n
->trans_root
);
117 t
->trans_entry
.rb_parent_color
= 0;
122 int dst_trans_remove(struct dst_trans
*t
)
125 struct dst_node
*n
= t
->n
;
127 mutex_lock(&n
->trans_lock
);
128 ret
= dst_trans_remove_nolock(t
);
129 mutex_unlock(&n
->trans_lock
);
135 * When transaction is completed and there are no more users,
136 * we complete appriate block IO request with given error status.
138 void dst_trans_put(struct dst_trans
*t
)
140 if (atomic_dec_and_test(&t
->refcnt
)) {
141 struct bio
*bio
= t
->bio
;
143 dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n",
144 __func__
, t
, t
->gen
, bio
);
146 bio_endio(bio
, t
->error
);
150 mempool_free(t
, t
->n
->trans_pool
);
155 * Process given block IO request: allocate transaction, insert it into the tree
156 * and send/schedule crypto processing.
158 int dst_process_bio(struct dst_node
*n
, struct bio
*bio
)
163 t
= mempool_alloc(n
->trans_pool
, GFP_NOFS
);
167 t
->n
= dst_node_get(n
);
171 atomic_set(&t
->refcnt
, 1);
172 t
->gen
= atomic_long_inc_return(&n
->gen
);
174 t
->enc
= bio_data_dir(bio
);
175 dst_bio_to_cmd(bio
, &t
->cmd
, DST_IO
, t
->gen
);
177 mutex_lock(&n
->trans_lock
);
178 err
= dst_trans_insert(t
);
179 mutex_unlock(&n
->trans_lock
);
183 dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n",
184 __func__
, t
->gen
, (u64
)bio
->bi_sector
,
185 bio
->bi_size
, t
->enc
, dst_need_crypto(n
));
187 if (dst_need_crypto(n
) && t
->enc
)
196 mempool_free(t
, n
->trans_pool
);
204 * Scan for timeout/stale transactions.
205 * Each transaction is being resent multiple times before error completion.
207 static void dst_trans_scan(struct work_struct
*work
)
209 struct dst_node
*n
= container_of(work
, struct dst_node
,
211 struct rb_node
*rb_node
;
213 unsigned long timeout
= n
->trans_scan_timeout
;
214 int num
= 10 * n
->trans_max_retries
;
216 mutex_lock(&n
->trans_lock
);
218 for (rb_node
= rb_first(&n
->trans_root
); rb_node
; ) {
219 t
= rb_entry(rb_node
, struct dst_trans
, trans_entry
);
221 if (timeout
&& time_after(t
->send_time
+ timeout
, jiffies
)
225 dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n",
226 __func__
, t
, t
->gen
, n
->name
,
227 t
->retries
, n
->trans_max_retries
);
234 rb_node
= rb_next(rb_node
);
236 if (timeout
&& (++t
->retries
< n
->trans_max_retries
)) {
239 t
->error
= -ETIMEDOUT
;
240 dst_trans_remove_nolock(t
);
247 mutex_unlock(&n
->trans_lock
);
250 * If no timeout specified then system is in the middle of exiting
251 * process, so no need to reschedule scanning process again.
256 schedule_delayed_work(&n
->trans_work
, timeout
);
261 * Flush all transactions and mark them as timed out.
262 * Destroy transaction pools.
264 void dst_node_trans_exit(struct dst_node
*n
)
267 struct rb_node
*rb_node
;
272 dprintk("%s: n: %p, cancelling the work.\n", __func__
, n
);
273 cancel_delayed_work_sync(&n
->trans_work
);
274 flush_scheduled_work();
275 dprintk("%s: n: %p, work has been cancelled.\n", __func__
, n
);
277 for (rb_node
= rb_first(&n
->trans_root
); rb_node
; ) {
278 t
= rb_entry(rb_node
, struct dst_trans
, trans_entry
);
280 dprintk("%s: t: %p, gen: %llu, n: %s.\n",
281 __func__
, t
, t
->gen
, n
->name
);
283 rb_node
= rb_next(rb_node
);
285 t
->error
= -ETIMEDOUT
;
286 dst_trans_remove_nolock(t
);
290 mempool_destroy(n
->trans_pool
);
291 kmem_cache_destroy(n
->trans_cache
);
295 * Initialize transaction storage for given node.
296 * Transaction stores not only control information,
297 * but also network command and crypto data (if needed)
298 * to reduce number of allocations. Thus transaction size
299 * differs from node to node.
301 int dst_node_trans_init(struct dst_node
*n
, unsigned int size
)
304 * We need this, since node with given name can be dropped from the
305 * hash table, but be still alive, so subsequent creation of the node
306 * with the same name may collide with existing cache name.
309 snprintf(n
->cache_name
, sizeof(n
->cache_name
), "%s-%p", n
->name
, n
);
311 n
->trans_cache
= kmem_cache_create(n
->cache_name
,
312 size
+ n
->crypto
.crypto_attached_size
,
317 n
->trans_pool
= mempool_create_slab_pool(dst_mempool_num
,
320 goto err_out_cache_destroy
;
322 mutex_init(&n
->trans_lock
);
323 n
->trans_root
= RB_ROOT
;
325 INIT_DELAYED_WORK(&n
->trans_work
, dst_trans_scan
);
326 schedule_delayed_work(&n
->trans_work
, n
->trans_scan_timeout
);
328 dprintk("%s: n: %p, size: %u, crypto: %u.\n",
329 __func__
, n
, size
, n
->crypto
.crypto_attached_size
);
333 err_out_cache_destroy
:
334 kmem_cache_destroy(n
->trans_cache
);