1 /* 2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API. 3 * Copyright(c) 2009 Intel Corporation 4 * 5 * based on raid6recov.c: 6 * Copyright 2002 H. Peter Anvin 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms of the GNU General Public License as published by the Free 10 * Software Foundation; either version 2 of the License, or (at your option) 11 * any later version. 12 * 13 * This program is distributed in the hope that it will be useful, but WITHOUT 14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 * more details. 17 * 18 * You should have received a copy of the GNU General Public License along with 19 * this program; if not, write to the Free Software Foundation, Inc., 51 20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/interrupt.h> 25 #include <linux/module.h> 26 #include <linux/dma-mapping.h> 27 #include <linux/raid/pq.h> 28 #include <linux/async_tx.h> 29 30 static struct dma_async_tx_descriptor * 31 async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, 32 size_t len, struct async_submit_ctl *submit) 33 { 34 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 35 &dest, 1, srcs, 2, len); 36 struct dma_device *dma = chan ? chan->device : NULL; 37 const u8 *amul, *bmul; 38 u8 ax, bx; 39 u8 *a, *b, *c; 40 41 if (dma) { 42 dma_addr_t dma_dest[2]; 43 dma_addr_t dma_src[2]; 44 struct device *dev = dma->dev; 45 struct dma_async_tx_descriptor *tx; 46 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 47 48 if (submit->flags & ASYNC_TX_FENCE) 49 dma_flags |= DMA_PREP_FENCE; 50 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 51 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); 52 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); 53 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, 54 len, dma_flags); 55 if (tx) { 56 async_tx_submit(chan, tx, submit); 57 return tx; 58 } 59 60 /* could not get a descriptor, unmap and fall through to 61 * the synchronous path 62 */ 63 dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); 64 dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); 65 dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); 66 } 67 68 /* run the operation synchronously */ 69 async_tx_quiesce(&submit->depend_tx); 70 amul = raid6_gfmul[coef[0]]; 71 bmul = raid6_gfmul[coef[1]]; 72 a = page_address(srcs[0]); 73 b = page_address(srcs[1]); 74 c = page_address(dest); 75 76 while (len--) { 77 ax = amul[*a++]; 78 bx = bmul[*b++]; 79 *c++ = ax ^ bx; 80 } 81 82 return NULL; 83 } 84 85 static struct dma_async_tx_descriptor * 86 async_mult(struct page *dest, struct page *src, u8 coef, size_t len, 87 struct async_submit_ctl *submit) 88 { 89 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 90 &dest, 1, &src, 1, len); 91 struct dma_device *dma = chan ? chan->device : NULL; 92 const u8 *qmul; /* Q multiplier table */ 93 u8 *d, *s; 94 95 if (dma) { 96 dma_addr_t dma_dest[2]; 97 dma_addr_t dma_src[1]; 98 struct device *dev = dma->dev; 99 struct dma_async_tx_descriptor *tx; 100 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 101 102 if (submit->flags & ASYNC_TX_FENCE) 103 dma_flags |= DMA_PREP_FENCE; 104 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 105 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); 106 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, 107 len, dma_flags); 108 if (tx) { 109 async_tx_submit(chan, tx, submit); 110 return tx; 111 } 112 113 /* could not get a descriptor, unmap and fall through to 114 * the synchronous path 115 */ 116 dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); 117 dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); 118 } 119 120 /* no channel available, or failed to allocate a descriptor, so 121 * perform the operation synchronously 122 */ 123 async_tx_quiesce(&submit->depend_tx); 124 qmul = raid6_gfmul[coef]; 125 d = page_address(dest); 126 s = page_address(src); 127 128 while (len--) 129 *d++ = qmul[*s++]; 130 131 return NULL; 132 } 133 134 static struct dma_async_tx_descriptor * 135 __2data_recov_4(int disks, size_t bytes, int faila, int failb, 136 struct page **blocks, struct async_submit_ctl *submit) 137 { 138 struct dma_async_tx_descriptor *tx = NULL; 139 struct page *p, *q, *a, *b; 140 struct page *srcs[2]; 141 unsigned char coef[2]; 142 enum async_tx_flags flags = submit->flags; 143 dma_async_tx_callback cb_fn = submit->cb_fn; 144 void *cb_param = submit->cb_param; 145 void *scribble = submit->scribble; 146 147 p = blocks[disks-2]; 148 q = blocks[disks-1]; 149 150 a = blocks[faila]; 151 b = blocks[failb]; 152 153 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ 154 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 155 srcs[0] = p; 156 srcs[1] = q; 157 coef[0] = raid6_gfexi[failb-faila]; 158 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 159 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 160 tx = async_sum_product(b, srcs, coef, bytes, submit); 161 162 /* Dy = P+Pxy+Dx */ 163 srcs[0] = p; 164 srcs[1] = b; 165 init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, 166 cb_param, scribble); 167 tx = async_xor(a, srcs, 0, 2, bytes, submit); 168 169 return tx; 170 171 } 172 173 static struct dma_async_tx_descriptor * 174 __2data_recov_5(int disks, size_t bytes, int faila, int failb, 175 struct page **blocks, struct async_submit_ctl *submit) 176 { 177 struct dma_async_tx_descriptor *tx = NULL; 178 struct page *p, *q, *g, *dp, *dq; 179 struct page *srcs[2]; 180 unsigned char coef[2]; 181 enum async_tx_flags flags = submit->flags; 182 dma_async_tx_callback cb_fn = submit->cb_fn; 183 void *cb_param = submit->cb_param; 184 void *scribble = submit->scribble; 185 int good_srcs, good, i; 186 187 good_srcs = 0; 188 good = -1; 189 for (i = 0; i < disks-2; i++) { 190 if (blocks[i] == NULL) 191 continue; 192 if (i == faila || i == failb) 193 continue; 194 good = i; 195 good_srcs++; 196 } 197 BUG_ON(good_srcs > 1); 198 199 p = blocks[disks-2]; 200 q = blocks[disks-1]; 201 g = blocks[good]; 202 203 /* Compute syndrome with zero for the missing data pages 204 * Use the dead data pages as temporary storage for delta p and 205 * delta q 206 */ 207 dp = blocks[faila]; 208 dq = blocks[failb]; 209 210 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 211 tx = async_memcpy(dp, g, 0, 0, bytes, submit); 212 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 213 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 214 215 /* compute P + Pxy */ 216 srcs[0] = dp; 217 srcs[1] = p; 218 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 219 NULL, NULL, scribble); 220 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 221 222 /* compute Q + Qxy */ 223 srcs[0] = dq; 224 srcs[1] = q; 225 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 226 NULL, NULL, scribble); 227 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 228 229 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 230 srcs[0] = dp; 231 srcs[1] = dq; 232 coef[0] = raid6_gfexi[failb-faila]; 233 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 234 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 235 tx = async_sum_product(dq, srcs, coef, bytes, submit); 236 237 /* Dy = P+Pxy+Dx */ 238 srcs[0] = dp; 239 srcs[1] = dq; 240 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 241 cb_param, scribble); 242 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 243 244 return tx; 245 } 246 247 static struct dma_async_tx_descriptor * 248 __2data_recov_n(int disks, size_t bytes, int faila, int failb, 249 struct page **blocks, struct async_submit_ctl *submit) 250 { 251 struct dma_async_tx_descriptor *tx = NULL; 252 struct page *p, *q, *dp, *dq; 253 struct page *srcs[2]; 254 unsigned char coef[2]; 255 enum async_tx_flags flags = submit->flags; 256 dma_async_tx_callback cb_fn = submit->cb_fn; 257 void *cb_param = submit->cb_param; 258 void *scribble = submit->scribble; 259 260 p = blocks[disks-2]; 261 q = blocks[disks-1]; 262 263 /* Compute syndrome with zero for the missing data pages 264 * Use the dead data pages as temporary storage for 265 * delta p and delta q 266 */ 267 dp = blocks[faila]; 268 blocks[faila] = NULL; 269 blocks[disks-2] = dp; 270 dq = blocks[failb]; 271 blocks[failb] = NULL; 272 blocks[disks-1] = dq; 273 274 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 275 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 276 277 /* Restore pointer table */ 278 blocks[faila] = dp; 279 blocks[failb] = dq; 280 blocks[disks-2] = p; 281 blocks[disks-1] = q; 282 283 /* compute P + Pxy */ 284 srcs[0] = dp; 285 srcs[1] = p; 286 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 287 NULL, NULL, scribble); 288 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 289 290 /* compute Q + Qxy */ 291 srcs[0] = dq; 292 srcs[1] = q; 293 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 294 NULL, NULL, scribble); 295 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 296 297 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 298 srcs[0] = dp; 299 srcs[1] = dq; 300 coef[0] = raid6_gfexi[failb-faila]; 301 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 302 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 303 tx = async_sum_product(dq, srcs, coef, bytes, submit); 304 305 /* Dy = P+Pxy+Dx */ 306 srcs[0] = dp; 307 srcs[1] = dq; 308 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 309 cb_param, scribble); 310 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 311 312 return tx; 313 } 314 315 /** 316 * async_raid6_2data_recov - asynchronously calculate two missing data blocks 317 * @disks: number of disks in the RAID-6 array 318 * @bytes: block size 319 * @faila: first failed drive index 320 * @failb: second failed drive index 321 * @blocks: array of source pointers where the last two entries are p and q 322 * @submit: submission/completion modifiers 323 */ 324 struct dma_async_tx_descriptor * 325 async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, 326 struct page **blocks, struct async_submit_ctl *submit) 327 { 328 void *scribble = submit->scribble; 329 int non_zero_srcs, i; 330 331 BUG_ON(faila == failb); 332 if (failb < faila) 333 swap(faila, failb); 334 335 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); 336 337 /* if a dma resource is not available or a scribble buffer is not 338 * available punt to the synchronous path. In the 'dma not 339 * available' case be sure to use the scribble buffer to 340 * preserve the content of 'blocks' as the caller intended. 341 */ 342 if (!async_dma_find_channel(DMA_PQ) || !scribble) { 343 void **ptrs = scribble ? scribble : (void **) blocks; 344 345 async_tx_quiesce(&submit->depend_tx); 346 for (i = 0; i < disks; i++) 347 if (blocks[i] == NULL) 348 ptrs[i] = (void *) raid6_empty_zero_page; 349 else 350 ptrs[i] = page_address(blocks[i]); 351 352 raid6_2data_recov(disks, bytes, faila, failb, ptrs); 353 354 async_tx_sync_epilog(submit); 355 356 return NULL; 357 } 358 359 non_zero_srcs = 0; 360 for (i = 0; i < disks-2 && non_zero_srcs < 4; i++) 361 if (blocks[i]) 362 non_zero_srcs++; 363 switch (non_zero_srcs) { 364 case 0: 365 case 1: 366 /* There must be at least 2 sources - the failed devices. */ 367 BUG(); 368 369 case 2: 370 /* dma devices do not uniformly understand a zero source pq 371 * operation (in contrast to the synchronous case), so 372 * explicitly handle the special case of a 4 disk array with 373 * both data disks missing. 374 */ 375 return __2data_recov_4(disks, bytes, faila, failb, blocks, submit); 376 case 3: 377 /* dma devices do not uniformly understand a single 378 * source pq operation (in contrast to the synchronous 379 * case), so explicitly handle the special case of a 5 disk 380 * array with 2 of 3 data disks missing. 381 */ 382 return __2data_recov_5(disks, bytes, faila, failb, blocks, submit); 383 default: 384 return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); 385 } 386 } 387 EXPORT_SYMBOL_GPL(async_raid6_2data_recov); 388 389 /** 390 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block 391 * @disks: number of disks in the RAID-6 array 392 * @bytes: block size 393 * @faila: failed drive index 394 * @blocks: array of source pointers where the last two entries are p and q 395 * @submit: submission/completion modifiers 396 */ 397 struct dma_async_tx_descriptor * 398 async_raid6_datap_recov(int disks, size_t bytes, int faila, 399 struct page **blocks, struct async_submit_ctl *submit) 400 { 401 struct dma_async_tx_descriptor *tx = NULL; 402 struct page *p, *q, *dq; 403 u8 coef; 404 enum async_tx_flags flags = submit->flags; 405 dma_async_tx_callback cb_fn = submit->cb_fn; 406 void *cb_param = submit->cb_param; 407 void *scribble = submit->scribble; 408 int good_srcs, good, i; 409 struct page *srcs[2]; 410 411 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); 412 413 /* if a dma resource is not available or a scribble buffer is not 414 * available punt to the synchronous path. In the 'dma not 415 * available' case be sure to use the scribble buffer to 416 * preserve the content of 'blocks' as the caller intended. 417 */ 418 if (!async_dma_find_channel(DMA_PQ) || !scribble) { 419 void **ptrs = scribble ? scribble : (void **) blocks; 420 421 async_tx_quiesce(&submit->depend_tx); 422 for (i = 0; i < disks; i++) 423 if (blocks[i] == NULL) 424 ptrs[i] = (void*)raid6_empty_zero_page; 425 else 426 ptrs[i] = page_address(blocks[i]); 427 428 raid6_datap_recov(disks, bytes, faila, ptrs); 429 430 async_tx_sync_epilog(submit); 431 432 return NULL; 433 } 434 435 good_srcs = 0; 436 good = -1; 437 for (i = 0; i < disks-2; i++) { 438 if (i == faila) 439 continue; 440 if (blocks[i]) { 441 good = i; 442 good_srcs++; 443 if (good_srcs > 1) 444 break; 445 } 446 } 447 BUG_ON(good_srcs == 0); 448 449 p = blocks[disks-2]; 450 q = blocks[disks-1]; 451 452 /* Compute syndrome with zero for the missing data page 453 * Use the dead data page as temporary storage for delta q 454 */ 455 dq = blocks[faila]; 456 blocks[faila] = NULL; 457 blocks[disks-1] = dq; 458 459 /* in the 4-disk case we only need to perform a single source 460 * multiplication with the one good data block. 461 */ 462 if (good_srcs == 1) { 463 struct page *g = blocks[good]; 464 465 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, 466 scribble); 467 tx = async_memcpy(p, g, 0, 0, bytes, submit); 468 469 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, 470 scribble); 471 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 472 } else { 473 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, 474 scribble); 475 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 476 } 477 478 /* Restore pointer table */ 479 blocks[faila] = dq; 480 blocks[disks-1] = q; 481 482 /* calculate g^{-faila} */ 483 coef = raid6_gfinv[raid6_gfexp[faila]]; 484 485 srcs[0] = dq; 486 srcs[1] = q; 487 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 488 NULL, NULL, scribble); 489 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 490 491 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 492 tx = async_mult(dq, dq, coef, bytes, submit); 493 494 srcs[0] = p; 495 srcs[1] = dq; 496 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 497 cb_param, scribble); 498 tx = async_xor(p, srcs, 0, 2, bytes, submit); 499 500 return tx; 501 } 502 EXPORT_SYMBOL_GPL(async_raid6_datap_recov); 503 504 MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); 505 MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); 506 MODULE_LICENSE("GPL"); 507