1 /* 2 * xor offload engine api 3 * 4 * Copyright © 2006, Intel Corporation. 5 * 6 * Dan Williams <dan.j.williams@intel.com> 7 * 8 * with architecture considerations by: 9 * Neil Brown <neilb@suse.de> 10 * Jeff Garzik <jeff@garzik.org> 11 * 12 * This program is free software; you can redistribute it and/or modify it 13 * under the terms and conditions of the GNU General Public License, 14 * version 2, as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 * more details. 20 * 21 * You should have received a copy of the GNU General Public License along with 22 * this program; if not, write to the Free Software Foundation, Inc., 23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 * 25 */ 26 #include <linux/kernel.h> 27 #include <linux/interrupt.h> 28 #include <linux/mm.h> 29 #include <linux/dma-mapping.h> 30 #include <linux/raid/xor.h> 31 #include <linux/async_tx.h> 32 33 /* do_async_xor - dma map the pages and perform the xor with an engine */ 34 static __async_inline struct dma_async_tx_descriptor * 35 do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, 36 unsigned int offset, int src_cnt, size_t len, 37 enum async_tx_flags flags, 38 struct dma_async_tx_descriptor *depend_tx, 39 dma_async_tx_callback cb_fn, void *cb_param) 40 { 41 struct dma_device *dma = chan->device; 42 dma_addr_t *dma_src = (dma_addr_t *) src_list; 43 struct dma_async_tx_descriptor *tx = NULL; 44 int src_off = 0; 45 int i; 46 dma_async_tx_callback _cb_fn; 47 void *_cb_param; 48 enum async_tx_flags async_flags; 49 enum dma_ctrl_flags dma_flags; 50 int xor_src_cnt; 51 dma_addr_t dma_dest; 52 53 /* map the dest bidrectional in case it is re-used as a source */ 54 dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL); 55 for (i = 0; i < src_cnt; i++) { 56 /* only map the dest once */ 57 if (unlikely(src_list[i] == dest)) { 58 dma_src[i] = dma_dest; 59 continue; 60 } 61 dma_src[i] = dma_map_page(dma->dev, src_list[i], offset, 62 len, DMA_TO_DEVICE); 63 } 64 65 while (src_cnt) { 66 async_flags = flags; 67 dma_flags = 0; 68 xor_src_cnt = min(src_cnt, dma->max_xor); 69 /* if we are submitting additional xors, leave the chain open, 70 * clear the callback parameters, and leave the destination 71 * buffer mapped 72 */ 73 if (src_cnt > xor_src_cnt) { 74 async_flags &= ~ASYNC_TX_ACK; 75 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; 76 _cb_fn = NULL; 77 _cb_param = NULL; 78 } else { 79 _cb_fn = cb_fn; 80 _cb_param = cb_param; 81 } 82 if (_cb_fn) 83 dma_flags |= DMA_PREP_INTERRUPT; 84 85 /* Since we have clobbered the src_list we are committed 86 * to doing this asynchronously. Drivers force forward progress 87 * in case they can not provide a descriptor 88 */ 89 tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off], 90 xor_src_cnt, len, dma_flags); 91 92 if (unlikely(!tx)) 93 async_tx_quiesce(&depend_tx); 94 95 /* spin wait for the preceeding transactions to complete */ 96 while (unlikely(!tx)) { 97 dma_async_issue_pending(chan); 98 tx = dma->device_prep_dma_xor(chan, dma_dest, 99 &dma_src[src_off], 100 xor_src_cnt, len, 101 dma_flags); 102 } 103 104 async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, 105 _cb_param); 106 107 depend_tx = tx; 108 flags |= ASYNC_TX_DEP_ACK; 109 110 if (src_cnt > xor_src_cnt) { 111 /* drop completed sources */ 112 src_cnt -= xor_src_cnt; 113 src_off += xor_src_cnt; 114 115 /* use the intermediate result a source */ 116 dma_src[--src_off] = dma_dest; 117 src_cnt++; 118 } else 119 break; 120 } 121 122 return tx; 123 } 124 125 static void 126 do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, 127 int src_cnt, size_t len, enum async_tx_flags flags, 128 dma_async_tx_callback cb_fn, void *cb_param) 129 { 130 int i; 131 int xor_src_cnt; 132 int src_off = 0; 133 void *dest_buf; 134 void **srcs = (void **) src_list; 135 136 /* reuse the 'src_list' array to convert to buffer pointers */ 137 for (i = 0; i < src_cnt; i++) 138 srcs[i] = page_address(src_list[i]) + offset; 139 140 /* set destination address */ 141 dest_buf = page_address(dest) + offset; 142 143 if (flags & ASYNC_TX_XOR_ZERO_DST) 144 memset(dest_buf, 0, len); 145 146 while (src_cnt > 0) { 147 /* process up to 'MAX_XOR_BLOCKS' sources */ 148 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); 149 xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]); 150 151 /* drop completed sources */ 152 src_cnt -= xor_src_cnt; 153 src_off += xor_src_cnt; 154 } 155 156 async_tx_sync_epilog(cb_fn, cb_param); 157 } 158 159 /** 160 * async_xor - attempt to xor a set of blocks with a dma engine. 161 * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST 162 * flag must be set to not include dest data in the calculation. The 163 * assumption with dma eninges is that they only use the destination 164 * buffer as a source when it is explicity specified in the source list. 165 * @dest: destination page 166 * @src_list: array of source pages (if the dest is also a source it must be 167 * at index zero). The contents of this array may be overwritten. 168 * @offset: offset in pages to start transaction 169 * @src_cnt: number of source pages 170 * @len: length in bytes 171 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, 172 * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 173 * @depend_tx: xor depends on the result of this transaction. 174 * @cb_fn: function to call when the xor completes 175 * @cb_param: parameter to pass to the callback routine 176 */ 177 struct dma_async_tx_descriptor * 178 async_xor(struct page *dest, struct page **src_list, unsigned int offset, 179 int src_cnt, size_t len, enum async_tx_flags flags, 180 struct dma_async_tx_descriptor *depend_tx, 181 dma_async_tx_callback cb_fn, void *cb_param) 182 { 183 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, 184 &dest, 1, src_list, 185 src_cnt, len); 186 BUG_ON(src_cnt <= 1); 187 188 if (chan) { 189 /* run the xor asynchronously */ 190 pr_debug("%s (async): len: %zu\n", __func__, len); 191 192 return do_async_xor(chan, dest, src_list, offset, src_cnt, len, 193 flags, depend_tx, cb_fn, cb_param); 194 } else { 195 /* run the xor synchronously */ 196 pr_debug("%s (sync): len: %zu\n", __func__, len); 197 198 /* in the sync case the dest is an implied source 199 * (assumes the dest is the first source) 200 */ 201 if (flags & ASYNC_TX_XOR_DROP_DST) { 202 src_cnt--; 203 src_list++; 204 } 205 206 /* wait for any prerequisite operations */ 207 async_tx_quiesce(&depend_tx); 208 209 do_sync_xor(dest, src_list, offset, src_cnt, len, 210 flags, cb_fn, cb_param); 211 212 return NULL; 213 } 214 } 215 EXPORT_SYMBOL_GPL(async_xor); 216 217 static int page_is_zero(struct page *p, unsigned int offset, size_t len) 218 { 219 char *a = page_address(p) + offset; 220 return ((*(u32 *) a) == 0 && 221 memcmp(a, a + 4, len - 4) == 0); 222 } 223 224 /** 225 * async_xor_zero_sum - attempt a xor parity check with a dma engine. 226 * @dest: destination page used if the xor is performed synchronously 227 * @src_list: array of source pages. The dest page must be listed as a source 228 * at index zero. The contents of this array may be overwritten. 229 * @offset: offset in pages to start transaction 230 * @src_cnt: number of source pages 231 * @len: length in bytes 232 * @result: 0 if sum == 0 else non-zero 233 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 234 * @depend_tx: xor depends on the result of this transaction. 235 * @cb_fn: function to call when the xor completes 236 * @cb_param: parameter to pass to the callback routine 237 */ 238 struct dma_async_tx_descriptor * 239 async_xor_zero_sum(struct page *dest, struct page **src_list, 240 unsigned int offset, int src_cnt, size_t len, 241 u32 *result, enum async_tx_flags flags, 242 struct dma_async_tx_descriptor *depend_tx, 243 dma_async_tx_callback cb_fn, void *cb_param) 244 { 245 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, 246 &dest, 1, src_list, 247 src_cnt, len); 248 struct dma_device *device = chan ? chan->device : NULL; 249 struct dma_async_tx_descriptor *tx = NULL; 250 251 BUG_ON(src_cnt <= 1); 252 253 if (device && src_cnt <= device->max_xor) { 254 dma_addr_t *dma_src = (dma_addr_t *) src_list; 255 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 256 int i; 257 258 pr_debug("%s: (async) len: %zu\n", __func__, len); 259 260 for (i = 0; i < src_cnt; i++) 261 dma_src[i] = dma_map_page(device->dev, src_list[i], 262 offset, len, DMA_TO_DEVICE); 263 264 tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, 265 len, result, 266 dma_prep_flags); 267 if (unlikely(!tx)) { 268 async_tx_quiesce(&depend_tx); 269 270 while (!tx) { 271 dma_async_issue_pending(chan); 272 tx = device->device_prep_dma_zero_sum(chan, 273 dma_src, src_cnt, len, result, 274 dma_prep_flags); 275 } 276 } 277 278 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 279 } else { 280 unsigned long xor_flags = flags; 281 282 pr_debug("%s: (sync) len: %zu\n", __func__, len); 283 284 xor_flags |= ASYNC_TX_XOR_DROP_DST; 285 xor_flags &= ~ASYNC_TX_ACK; 286 287 tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, 288 depend_tx, NULL, NULL); 289 290 async_tx_quiesce(&tx); 291 292 *result = page_is_zero(dest, offset, len) ? 0 : 1; 293 294 async_tx_sync_epilog(cb_fn, cb_param); 295 } 296 297 return tx; 298 } 299 EXPORT_SYMBOL_GPL(async_xor_zero_sum); 300 301 static int __init async_xor_init(void) 302 { 303 #ifdef CONFIG_ASYNC_TX_DMA 304 /* To conserve stack space the input src_list (array of page pointers) 305 * is reused to hold the array of dma addresses passed to the driver. 306 * This conversion is only possible when dma_addr_t is less than the 307 * the size of a pointer. HIGHMEM64G is known to violate this 308 * assumption. 309 */ 310 BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *)); 311 #endif 312 313 return 0; 314 } 315 316 static void __exit async_xor_exit(void) 317 { 318 do { } while (0); 319 } 320 321 module_init(async_xor_init); 322 module_exit(async_xor_exit); 323 324 MODULE_AUTHOR("Intel Corporation"); 325 MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); 326 MODULE_LICENSE("GPL"); 327