2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
3 * Copyright(c) 2009 Intel Corporation
5 * based on raid6recov.c:
6 * Copyright 2002 H. Peter Anvin
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
23 #include <linux/kernel.h>
24 #include <linux/interrupt.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/raid/pq.h>
27 #include <linux/async_tx.h>
29 static struct dma_async_tx_descriptor
*
30 async_sum_product(struct page
*dest
, struct page
**srcs
, unsigned char *coef
,
31 size_t len
, struct async_submit_ctl
*submit
)
33 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
34 &dest
, 1, srcs
, 2, len
);
35 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
36 const u8
*amul
, *bmul
;
41 dma_addr_t dma_dest
[2];
42 dma_addr_t dma_src
[2];
43 struct device
*dev
= dma
->dev
;
44 struct dma_async_tx_descriptor
*tx
;
45 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
47 if (submit
->flags
& ASYNC_TX_FENCE
)
48 dma_flags
|= DMA_PREP_FENCE
;
49 dma_dest
[1] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
50 dma_src
[0] = dma_map_page(dev
, srcs
[0], 0, len
, DMA_TO_DEVICE
);
51 dma_src
[1] = dma_map_page(dev
, srcs
[1], 0, len
, DMA_TO_DEVICE
);
52 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, dma_src
, 2, coef
,
55 async_tx_submit(chan
, tx
, submit
);
59 /* could not get a descriptor, unmap and fall through to
60 * the synchronous path
62 dma_unmap_page(dev
, dma_dest
[1], len
, DMA_BIDIRECTIONAL
);
63 dma_unmap_page(dev
, dma_src
[0], len
, DMA_TO_DEVICE
);
64 dma_unmap_page(dev
, dma_src
[1], len
, DMA_TO_DEVICE
);
67 /* run the operation synchronously */
68 async_tx_quiesce(&submit
->depend_tx
);
69 amul
= raid6_gfmul
[coef
[0]];
70 bmul
= raid6_gfmul
[coef
[1]];
71 a
= page_address(srcs
[0]);
72 b
= page_address(srcs
[1]);
73 c
= page_address(dest
);
84 static struct dma_async_tx_descriptor
*
85 async_mult(struct page
*dest
, struct page
*src
, u8 coef
, size_t len
,
86 struct async_submit_ctl
*submit
)
88 struct dma_chan
*chan
= async_tx_find_channel(submit
, DMA_PQ
,
89 &dest
, 1, &src
, 1, len
);
90 struct dma_device
*dma
= chan
? chan
->device
: NULL
;
91 const u8
*qmul
; /* Q multiplier table */
95 dma_addr_t dma_dest
[2];
96 dma_addr_t dma_src
[1];
97 struct device
*dev
= dma
->dev
;
98 struct dma_async_tx_descriptor
*tx
;
99 enum dma_ctrl_flags dma_flags
= DMA_PREP_PQ_DISABLE_P
;
101 if (submit
->flags
& ASYNC_TX_FENCE
)
102 dma_flags
|= DMA_PREP_FENCE
;
103 dma_dest
[1] = dma_map_page(dev
, dest
, 0, len
, DMA_BIDIRECTIONAL
);
104 dma_src
[0] = dma_map_page(dev
, src
, 0, len
, DMA_TO_DEVICE
);
105 tx
= dma
->device_prep_dma_pq(chan
, dma_dest
, dma_src
, 1, &coef
,
108 async_tx_submit(chan
, tx
, submit
);
112 /* could not get a descriptor, unmap and fall through to
113 * the synchronous path
115 dma_unmap_page(dev
, dma_dest
[1], len
, DMA_BIDIRECTIONAL
);
116 dma_unmap_page(dev
, dma_src
[0], len
, DMA_TO_DEVICE
);
119 /* no channel available, or failed to allocate a descriptor, so
120 * perform the operation synchronously
122 async_tx_quiesce(&submit
->depend_tx
);
123 qmul
= raid6_gfmul
[coef
];
124 d
= page_address(dest
);
125 s
= page_address(src
);
133 static struct dma_async_tx_descriptor
*
134 __2data_recov_4(int disks
, size_t bytes
, int faila
, int failb
,
135 struct page
**blocks
, struct async_submit_ctl
*submit
)
137 struct dma_async_tx_descriptor
*tx
= NULL
;
138 struct page
*p
, *q
, *a
, *b
;
139 struct page
*srcs
[2];
140 unsigned char coef
[2];
141 enum async_tx_flags flags
= submit
->flags
;
142 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
143 void *cb_param
= submit
->cb_param
;
144 void *scribble
= submit
->scribble
;
152 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
153 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
156 coef
[0] = raid6_gfexi
[failb
-faila
];
157 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
158 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
159 tx
= async_sum_product(b
, srcs
, coef
, bytes
, submit
);
164 init_async_submit(submit
, flags
| ASYNC_TX_XOR_ZERO_DST
, tx
, cb_fn
,
166 tx
= async_xor(a
, srcs
, 0, 2, bytes
, submit
);
172 static struct dma_async_tx_descriptor
*
173 __2data_recov_5(int disks
, size_t bytes
, int faila
, int failb
,
174 struct page
**blocks
, struct async_submit_ctl
*submit
)
176 struct dma_async_tx_descriptor
*tx
= NULL
;
177 struct page
*p
, *q
, *g
, *dp
, *dq
;
178 struct page
*srcs
[2];
179 unsigned char coef
[2];
180 enum async_tx_flags flags
= submit
->flags
;
181 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
182 void *cb_param
= submit
->cb_param
;
183 void *scribble
= submit
->scribble
;
184 int good_srcs
, good
, i
;
188 for (i
= 0; i
< disks
-2; i
++) {
189 if (blocks
[i
] == NULL
)
191 if (i
== faila
|| i
== failb
)
196 BUG_ON(good_srcs
> 1);
202 /* Compute syndrome with zero for the missing data pages
203 * Use the dead data pages as temporary storage for delta p and
209 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
210 tx
= async_memcpy(dp
, g
, 0, 0, bytes
, submit
);
211 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
212 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
214 /* compute P + Pxy */
217 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
218 NULL
, NULL
, scribble
);
219 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
221 /* compute Q + Qxy */
224 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
225 NULL
, NULL
, scribble
);
226 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
228 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
231 coef
[0] = raid6_gfexi
[failb
-faila
];
232 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
233 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
234 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
239 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
241 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
246 static struct dma_async_tx_descriptor
*
247 __2data_recov_n(int disks
, size_t bytes
, int faila
, int failb
,
248 struct page
**blocks
, struct async_submit_ctl
*submit
)
250 struct dma_async_tx_descriptor
*tx
= NULL
;
251 struct page
*p
, *q
, *dp
, *dq
;
252 struct page
*srcs
[2];
253 unsigned char coef
[2];
254 enum async_tx_flags flags
= submit
->flags
;
255 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
256 void *cb_param
= submit
->cb_param
;
257 void *scribble
= submit
->scribble
;
262 /* Compute syndrome with zero for the missing data pages
263 * Use the dead data pages as temporary storage for
264 * delta p and delta q
267 blocks
[faila
] = NULL
;
268 blocks
[disks
-2] = dp
;
270 blocks
[failb
] = NULL
;
271 blocks
[disks
-1] = dq
;
273 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
274 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
276 /* Restore pointer table */
282 /* compute P + Pxy */
285 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
286 NULL
, NULL
, scribble
);
287 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
289 /* compute Q + Qxy */
292 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
293 NULL
, NULL
, scribble
);
294 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
296 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
299 coef
[0] = raid6_gfexi
[failb
-faila
];
300 coef
[1] = raid6_gfinv
[raid6_gfexp
[faila
]^raid6_gfexp
[failb
]];
301 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
302 tx
= async_sum_product(dq
, srcs
, coef
, bytes
, submit
);
307 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
309 tx
= async_xor(dp
, srcs
, 0, 2, bytes
, submit
);
315 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
316 * @disks: number of disks in the RAID-6 array
318 * @faila: first failed drive index
319 * @failb: second failed drive index
320 * @blocks: array of source pointers where the last two entries are p and q
321 * @submit: submission/completion modifiers
323 struct dma_async_tx_descriptor
*
324 async_raid6_2data_recov(int disks
, size_t bytes
, int faila
, int failb
,
325 struct page
**blocks
, struct async_submit_ctl
*submit
)
327 void *scribble
= submit
->scribble
;
328 int non_zero_srcs
, i
;
330 BUG_ON(faila
== failb
);
334 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
336 /* if a dma resource is not available or a scribble buffer is not
337 * available punt to the synchronous path. In the 'dma not
338 * available' case be sure to use the scribble buffer to
339 * preserve the content of 'blocks' as the caller intended.
341 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
342 void **ptrs
= scribble
? scribble
: (void **) blocks
;
344 async_tx_quiesce(&submit
->depend_tx
);
345 for (i
= 0; i
< disks
; i
++)
346 if (blocks
[i
] == NULL
)
347 ptrs
[i
] = (void *) raid6_empty_zero_page
;
349 ptrs
[i
] = page_address(blocks
[i
]);
351 raid6_2data_recov(disks
, bytes
, faila
, failb
, ptrs
);
353 async_tx_sync_epilog(submit
);
359 for (i
= 0; i
< disks
-2 && non_zero_srcs
< 4; i
++)
362 switch (non_zero_srcs
) {
365 /* There must be at least 2 sources - the failed devices. */
369 /* dma devices do not uniformly understand a zero source pq
370 * operation (in contrast to the synchronous case), so
371 * explicitly handle the special case of a 4 disk array with
372 * both data disks missing.
374 return __2data_recov_4(disks
, bytes
, faila
, failb
, blocks
, submit
);
376 /* dma devices do not uniformly understand a single
377 * source pq operation (in contrast to the synchronous
378 * case), so explicitly handle the special case of a 5 disk
379 * array with 2 of 3 data disks missing.
381 return __2data_recov_5(disks
, bytes
, faila
, failb
, blocks
, submit
);
383 return __2data_recov_n(disks
, bytes
, faila
, failb
, blocks
, submit
);
386 EXPORT_SYMBOL_GPL(async_raid6_2data_recov
);
389 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
390 * @disks: number of disks in the RAID-6 array
392 * @faila: failed drive index
393 * @blocks: array of source pointers where the last two entries are p and q
394 * @submit: submission/completion modifiers
396 struct dma_async_tx_descriptor
*
397 async_raid6_datap_recov(int disks
, size_t bytes
, int faila
,
398 struct page
**blocks
, struct async_submit_ctl
*submit
)
400 struct dma_async_tx_descriptor
*tx
= NULL
;
401 struct page
*p
, *q
, *dq
;
403 enum async_tx_flags flags
= submit
->flags
;
404 dma_async_tx_callback cb_fn
= submit
->cb_fn
;
405 void *cb_param
= submit
->cb_param
;
406 void *scribble
= submit
->scribble
;
407 int good_srcs
, good
, i
;
408 struct page
*srcs
[2];
410 pr_debug("%s: disks: %d len: %zu\n", __func__
, disks
, bytes
);
412 /* if a dma resource is not available or a scribble buffer is not
413 * available punt to the synchronous path. In the 'dma not
414 * available' case be sure to use the scribble buffer to
415 * preserve the content of 'blocks' as the caller intended.
417 if (!async_dma_find_channel(DMA_PQ
) || !scribble
) {
418 void **ptrs
= scribble
? scribble
: (void **) blocks
;
420 async_tx_quiesce(&submit
->depend_tx
);
421 for (i
= 0; i
< disks
; i
++)
422 if (blocks
[i
] == NULL
)
423 ptrs
[i
] = (void*)raid6_empty_zero_page
;
425 ptrs
[i
] = page_address(blocks
[i
]);
427 raid6_datap_recov(disks
, bytes
, faila
, ptrs
);
429 async_tx_sync_epilog(submit
);
436 for (i
= 0; i
< disks
-2; i
++) {
446 BUG_ON(good_srcs
== 0);
451 /* Compute syndrome with zero for the missing data page
452 * Use the dead data page as temporary storage for delta q
455 blocks
[faila
] = NULL
;
456 blocks
[disks
-1] = dq
;
458 /* in the 4-disk case we only need to perform a single source
459 * multiplication with the one good data block.
461 if (good_srcs
== 1) {
462 struct page
*g
= blocks
[good
];
464 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
466 tx
= async_memcpy(p
, g
, 0, 0, bytes
, submit
);
468 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
470 tx
= async_mult(dq
, g
, raid6_gfexp
[good
], bytes
, submit
);
472 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
,
474 tx
= async_gen_syndrome(blocks
, 0, disks
, bytes
, submit
);
477 /* Restore pointer table */
481 /* calculate g^{-faila} */
482 coef
= raid6_gfinv
[raid6_gfexp
[faila
]];
486 init_async_submit(submit
, ASYNC_TX_FENCE
|ASYNC_TX_XOR_DROP_DST
, tx
,
487 NULL
, NULL
, scribble
);
488 tx
= async_xor(dq
, srcs
, 0, 2, bytes
, submit
);
490 init_async_submit(submit
, ASYNC_TX_FENCE
, tx
, NULL
, NULL
, scribble
);
491 tx
= async_mult(dq
, dq
, coef
, bytes
, submit
);
495 init_async_submit(submit
, flags
| ASYNC_TX_XOR_DROP_DST
, tx
, cb_fn
,
497 tx
= async_xor(p
, srcs
, 0, 2, bytes
, submit
);
501 EXPORT_SYMBOL_GPL(async_raid6_datap_recov
);
503 MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
504 MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
505 MODULE_LICENSE("GPL");