1 /* 2 * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/mm.h> 33 #include <linux/types.h> 34 #include <linux/device.h> 35 #include <linux/dmapool.h> 36 #include <linux/slab.h> 37 #include <linux/list.h> 38 #include <linux/highmem.h> 39 #include <linux/io.h> 40 #include <linux/uio.h> 41 #include <linux/rbtree.h> 42 #include <linux/spinlock.h> 43 #include <linux/delay.h> 44 45 #include "qib.h" 46 #include "qib_user_sdma.h" 47 48 /* minimum size of header */ 49 #define QIB_USER_SDMA_MIN_HEADER_LENGTH 64 50 /* expected size of headers (for dma_pool) */ 51 #define QIB_USER_SDMA_EXP_HEADER_LENGTH 64 52 /* attempt to drain the queue for 5secs */ 53 #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 54 55 struct qib_user_sdma_pkt { 56 struct list_head list; /* list element */ 57 58 u8 tiddma; /* if this is NEW tid-sdma */ 59 u8 largepkt; /* this is large pkt from kmalloc */ 60 u16 frag_size; /* frag size used by PSM */ 61 u16 index; /* last header index or push index */ 62 u16 naddr; /* dimension of addr (1..3) ... */ 63 u16 addrlimit; /* addr array size */ 64 u16 tidsmidx; /* current tidsm index */ 65 u16 tidsmcount; /* tidsm array item count */ 66 u16 payload_size; /* payload size so far for header */ 67 u32 bytes_togo; /* bytes for processing */ 68 u32 counter; /* sdma pkts queued counter for this entry */ 69 struct qib_tid_session_member *tidsm; /* tid session member array */ 70 struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ 71 u64 added; /* global descq number of entries */ 72 73 struct { 74 u16 offset; /* offset for kvaddr, addr */ 75 u16 length; /* length in page */ 76 u16 first_desc; /* first desc */ 77 u16 last_desc; /* last desc */ 78 u16 put_page; /* should we put_page? */ 79 u16 dma_mapped; /* is page dma_mapped? */ 80 u16 dma_length; /* for dma_unmap_page() */ 81 u16 padding; 82 struct page *page; /* may be NULL (coherent mem) */ 83 void *kvaddr; /* FIXME: only for pio hack */ 84 dma_addr_t addr; 85 } addr[4]; /* max pages, any more and we coalesce */ 86 }; 87 88 struct qib_user_sdma_queue { 89 /* 90 * pkts sent to dma engine are queued on this 91 * list head. the type of the elements of this 92 * list are struct qib_user_sdma_pkt... 93 */ 94 struct list_head sent; 95 96 /* 97 * Because above list will be accessed by both process and 98 * signal handler, we need a spinlock for it. 99 */ 100 spinlock_t sent_lock ____cacheline_aligned_in_smp; 101 102 /* headers with expected length are allocated from here... */ 103 char header_cache_name[64]; 104 struct dma_pool *header_cache; 105 106 /* packets are allocated from the slab cache... */ 107 char pkt_slab_name[64]; 108 struct kmem_cache *pkt_slab; 109 110 /* as packets go on the queued queue, they are counted... */ 111 u32 counter; 112 u32 sent_counter; 113 /* pending packets, not sending yet */ 114 u32 num_pending; 115 /* sending packets, not complete yet */ 116 u32 num_sending; 117 /* global descq number of entry of last sending packet */ 118 u64 added; 119 120 /* dma page table */ 121 struct rb_root dma_pages_root; 122 123 /* protect everything above... */ 124 struct mutex lock; 125 }; 126 127 struct qib_user_sdma_queue * 128 qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) 129 { 130 struct qib_user_sdma_queue *pq = 131 kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); 132 133 if (!pq) 134 goto done; 135 136 pq->counter = 0; 137 pq->sent_counter = 0; 138 pq->num_pending = 0; 139 pq->num_sending = 0; 140 pq->added = 0; 141 142 INIT_LIST_HEAD(&pq->sent); 143 spin_lock_init(&pq->sent_lock); 144 mutex_init(&pq->lock); 145 146 snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), 147 "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt); 148 pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name, 149 sizeof(struct qib_user_sdma_pkt), 150 0, 0, NULL); 151 152 if (!pq->pkt_slab) 153 goto err_kfree; 154 155 snprintf(pq->header_cache_name, sizeof(pq->header_cache_name), 156 "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt); 157 pq->header_cache = dma_pool_create(pq->header_cache_name, 158 dev, 159 QIB_USER_SDMA_EXP_HEADER_LENGTH, 160 4, 0); 161 if (!pq->header_cache) 162 goto err_slab; 163 164 pq->dma_pages_root = RB_ROOT; 165 166 goto done; 167 168 err_slab: 169 kmem_cache_destroy(pq->pkt_slab); 170 err_kfree: 171 kfree(pq); 172 pq = NULL; 173 174 done: 175 return pq; 176 } 177 178 static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, 179 int i, u16 offset, u16 len, 180 u16 first_desc, u16 last_desc, 181 u16 put_page, u16 dma_mapped, 182 struct page *page, void *kvaddr, 183 dma_addr_t dma_addr, u16 dma_length) 184 { 185 pkt->addr[i].offset = offset; 186 pkt->addr[i].length = len; 187 pkt->addr[i].first_desc = first_desc; 188 pkt->addr[i].last_desc = last_desc; 189 pkt->addr[i].put_page = put_page; 190 pkt->addr[i].dma_mapped = dma_mapped; 191 pkt->addr[i].page = page; 192 pkt->addr[i].kvaddr = kvaddr; 193 pkt->addr[i].addr = dma_addr; 194 pkt->addr[i].dma_length = dma_length; 195 } 196 197 static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, 198 size_t len, dma_addr_t *dma_addr) 199 { 200 void *hdr; 201 202 if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) 203 hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL, 204 dma_addr); 205 else 206 hdr = NULL; 207 208 if (!hdr) { 209 hdr = kmalloc(len, GFP_KERNEL); 210 if (!hdr) 211 return NULL; 212 213 *dma_addr = 0; 214 } 215 216 return hdr; 217 } 218 219 static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, 220 struct qib_user_sdma_queue *pq, 221 struct qib_user_sdma_pkt *pkt, 222 struct page *page, u16 put, 223 u16 offset, u16 len, void *kvaddr) 224 { 225 __le16 *pbc16; 226 void *pbcvaddr; 227 struct qib_message_header *hdr; 228 u16 newlen, pbclen, lastdesc, dma_mapped; 229 u32 vcto; 230 union qib_seqnum seqnum; 231 dma_addr_t pbcdaddr; 232 dma_addr_t dma_addr = 233 dma_map_page(&dd->pcidev->dev, 234 page, offset, len, DMA_TO_DEVICE); 235 int ret = 0; 236 237 if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { 238 /* 239 * dma mapping error, pkt has not managed 240 * this page yet, return the page here so 241 * the caller can ignore this page. 242 */ 243 if (put) { 244 put_page(page); 245 } else { 246 /* coalesce case */ 247 kunmap(page); 248 __free_page(page); 249 } 250 ret = -ENOMEM; 251 goto done; 252 } 253 offset = 0; 254 dma_mapped = 1; 255 256 257 next_fragment: 258 259 /* 260 * In tid-sdma, the transfer length is restricted by 261 * receiver side current tid page length. 262 */ 263 if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length) 264 newlen = pkt->tidsm[pkt->tidsmidx].length; 265 else 266 newlen = len; 267 268 /* 269 * Then the transfer length is restricted by MTU. 270 * the last descriptor flag is determined by: 271 * 1. the current packet is at frag size length. 272 * 2. the current tid page is done if tid-sdma. 273 * 3. there is no more byte togo if sdma. 274 */ 275 lastdesc = 0; 276 if ((pkt->payload_size + newlen) >= pkt->frag_size) { 277 newlen = pkt->frag_size - pkt->payload_size; 278 lastdesc = 1; 279 } else if (pkt->tiddma) { 280 if (newlen == pkt->tidsm[pkt->tidsmidx].length) 281 lastdesc = 1; 282 } else { 283 if (newlen == pkt->bytes_togo) 284 lastdesc = 1; 285 } 286 287 /* fill the next fragment in this page */ 288 qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ 289 offset, newlen, /* offset, len */ 290 0, lastdesc, /* first last desc */ 291 put, dma_mapped, /* put page, dma mapped */ 292 page, kvaddr, /* struct page, virt addr */ 293 dma_addr, len); /* dma addr, dma length */ 294 pkt->bytes_togo -= newlen; 295 pkt->payload_size += newlen; 296 pkt->naddr++; 297 if (pkt->naddr == pkt->addrlimit) { 298 ret = -EFAULT; 299 goto done; 300 } 301 302 /* If there is no more byte togo. (lastdesc==1) */ 303 if (pkt->bytes_togo == 0) { 304 /* The packet is done, header is not dma mapped yet. 305 * it should be from kmalloc */ 306 if (!pkt->addr[pkt->index].addr) { 307 pkt->addr[pkt->index].addr = 308 dma_map_single(&dd->pcidev->dev, 309 pkt->addr[pkt->index].kvaddr, 310 pkt->addr[pkt->index].dma_length, 311 DMA_TO_DEVICE); 312 if (dma_mapping_error(&dd->pcidev->dev, 313 pkt->addr[pkt->index].addr)) { 314 ret = -ENOMEM; 315 goto done; 316 } 317 pkt->addr[pkt->index].dma_mapped = 1; 318 } 319 320 goto done; 321 } 322 323 /* If tid-sdma, advance tid info. */ 324 if (pkt->tiddma) { 325 pkt->tidsm[pkt->tidsmidx].length -= newlen; 326 if (pkt->tidsm[pkt->tidsmidx].length) { 327 pkt->tidsm[pkt->tidsmidx].offset += newlen; 328 } else { 329 pkt->tidsmidx++; 330 if (pkt->tidsmidx == pkt->tidsmcount) { 331 ret = -EFAULT; 332 goto done; 333 } 334 } 335 } 336 337 /* 338 * If this is NOT the last descriptor. (newlen==len) 339 * the current packet is not done yet, but the current 340 * send side page is done. 341 */ 342 if (lastdesc == 0) 343 goto done; 344 345 /* 346 * If running this driver under PSM with message size 347 * fitting into one transfer unit, it is not possible 348 * to pass this line. otherwise, it is a buggggg. 349 */ 350 351 /* 352 * Since the current packet is done, and there are more 353 * bytes togo, we need to create a new sdma header, copying 354 * from previous sdma header and modify both. 355 */ 356 pbclen = pkt->addr[pkt->index].length; 357 pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr); 358 if (!pbcvaddr) { 359 ret = -ENOMEM; 360 goto done; 361 } 362 /* Copy the previous sdma header to new sdma header */ 363 pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr; 364 memcpy(pbcvaddr, pbc16, pbclen); 365 366 /* Modify the previous sdma header */ 367 hdr = (struct qib_message_header *)&pbc16[4]; 368 369 /* New pbc length */ 370 pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2)); 371 372 /* New packet length */ 373 hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); 374 375 if (pkt->tiddma) { 376 /* turn on the header suppression */ 377 hdr->iph.pkt_flags = 378 cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2); 379 /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */ 380 hdr->flags &= ~(0x04|0x20); 381 } else { 382 /* turn off extra bytes: 20-21 bits */ 383 hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF); 384 /* turn off ACK_REQ: 0x04 */ 385 hdr->flags &= ~(0x04); 386 } 387 388 /* New kdeth checksum */ 389 vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); 390 hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + 391 be16_to_cpu(hdr->lrh[2]) - 392 ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - 393 le16_to_cpu(hdr->iph.pkt_flags)); 394 395 /* The packet is done, header is not dma mapped yet. 396 * it should be from kmalloc */ 397 if (!pkt->addr[pkt->index].addr) { 398 pkt->addr[pkt->index].addr = 399 dma_map_single(&dd->pcidev->dev, 400 pkt->addr[pkt->index].kvaddr, 401 pkt->addr[pkt->index].dma_length, 402 DMA_TO_DEVICE); 403 if (dma_mapping_error(&dd->pcidev->dev, 404 pkt->addr[pkt->index].addr)) { 405 ret = -ENOMEM; 406 goto done; 407 } 408 pkt->addr[pkt->index].dma_mapped = 1; 409 } 410 411 /* Modify the new sdma header */ 412 pbc16 = (__le16 *)pbcvaddr; 413 hdr = (struct qib_message_header *)&pbc16[4]; 414 415 /* New pbc length */ 416 pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2)); 417 418 /* New packet length */ 419 hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); 420 421 if (pkt->tiddma) { 422 /* Set new tid and offset for new sdma header */ 423 hdr->iph.ver_ctxt_tid_offset = cpu_to_le32( 424 (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) + 425 (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) + 426 (pkt->tidsm[pkt->tidsmidx].offset>>2)); 427 } else { 428 /* Middle protocol new packet offset */ 429 hdr->uwords[2] += pkt->payload_size; 430 } 431 432 /* New kdeth checksum */ 433 vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); 434 hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + 435 be16_to_cpu(hdr->lrh[2]) - 436 ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - 437 le16_to_cpu(hdr->iph.pkt_flags)); 438 439 /* Next sequence number in new sdma header */ 440 seqnum.val = be32_to_cpu(hdr->bth[2]); 441 if (pkt->tiddma) 442 seqnum.seq++; 443 else 444 seqnum.pkt++; 445 hdr->bth[2] = cpu_to_be32(seqnum.val); 446 447 /* Init new sdma header. */ 448 qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ 449 0, pbclen, /* offset, len */ 450 1, 0, /* first last desc */ 451 0, 0, /* put page, dma mapped */ 452 NULL, pbcvaddr, /* struct page, virt addr */ 453 pbcdaddr, pbclen); /* dma addr, dma length */ 454 pkt->index = pkt->naddr; 455 pkt->payload_size = 0; 456 pkt->naddr++; 457 if (pkt->naddr == pkt->addrlimit) { 458 ret = -EFAULT; 459 goto done; 460 } 461 462 /* Prepare for next fragment in this page */ 463 if (newlen != len) { 464 if (dma_mapped) { 465 put = 0; 466 dma_mapped = 0; 467 page = NULL; 468 kvaddr = NULL; 469 } 470 len -= newlen; 471 offset += newlen; 472 473 goto next_fragment; 474 } 475 476 done: 477 return ret; 478 } 479 480 /* we've too many pages in the iovec, coalesce to a single page */ 481 static int qib_user_sdma_coalesce(const struct qib_devdata *dd, 482 struct qib_user_sdma_queue *pq, 483 struct qib_user_sdma_pkt *pkt, 484 const struct iovec *iov, 485 unsigned long niov) 486 { 487 int ret = 0; 488 struct page *page = alloc_page(GFP_KERNEL); 489 void *mpage_save; 490 char *mpage; 491 int i; 492 int len = 0; 493 494 if (!page) { 495 ret = -ENOMEM; 496 goto done; 497 } 498 499 mpage = kmap(page); 500 mpage_save = mpage; 501 for (i = 0; i < niov; i++) { 502 int cfur; 503 504 cfur = copy_from_user(mpage, 505 iov[i].iov_base, iov[i].iov_len); 506 if (cfur) { 507 ret = -EFAULT; 508 goto free_unmap; 509 } 510 511 mpage += iov[i].iov_len; 512 len += iov[i].iov_len; 513 } 514 515 ret = qib_user_sdma_page_to_frags(dd, pq, pkt, 516 page, 0, 0, len, mpage_save); 517 goto done; 518 519 free_unmap: 520 kunmap(page); 521 __free_page(page); 522 done: 523 return ret; 524 } 525 526 /* 527 * How many pages in this iovec element? 528 */ 529 static int qib_user_sdma_num_pages(const struct iovec *iov) 530 { 531 const unsigned long addr = (unsigned long) iov->iov_base; 532 const unsigned long len = iov->iov_len; 533 const unsigned long spage = addr & PAGE_MASK; 534 const unsigned long epage = (addr + len - 1) & PAGE_MASK; 535 536 return 1 + ((epage - spage) >> PAGE_SHIFT); 537 } 538 539 static void qib_user_sdma_free_pkt_frag(struct device *dev, 540 struct qib_user_sdma_queue *pq, 541 struct qib_user_sdma_pkt *pkt, 542 int frag) 543 { 544 const int i = frag; 545 546 if (pkt->addr[i].page) { 547 /* only user data has page */ 548 if (pkt->addr[i].dma_mapped) 549 dma_unmap_page(dev, 550 pkt->addr[i].addr, 551 pkt->addr[i].dma_length, 552 DMA_TO_DEVICE); 553 554 if (pkt->addr[i].kvaddr) 555 kunmap(pkt->addr[i].page); 556 557 if (pkt->addr[i].put_page) 558 put_page(pkt->addr[i].page); 559 else 560 __free_page(pkt->addr[i].page); 561 } else if (pkt->addr[i].kvaddr) { 562 /* for headers */ 563 if (pkt->addr[i].dma_mapped) { 564 /* from kmalloc & dma mapped */ 565 dma_unmap_single(dev, 566 pkt->addr[i].addr, 567 pkt->addr[i].dma_length, 568 DMA_TO_DEVICE); 569 kfree(pkt->addr[i].kvaddr); 570 } else if (pkt->addr[i].addr) { 571 /* free coherent mem from cache... */ 572 dma_pool_free(pq->header_cache, 573 pkt->addr[i].kvaddr, pkt->addr[i].addr); 574 } else { 575 /* from kmalloc but not dma mapped */ 576 kfree(pkt->addr[i].kvaddr); 577 } 578 } 579 } 580 581 /* return number of pages pinned... */ 582 static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, 583 struct qib_user_sdma_queue *pq, 584 struct qib_user_sdma_pkt *pkt, 585 unsigned long addr, int tlen, int npages) 586 { 587 struct page *pages[8]; 588 int i, j; 589 int ret = 0; 590 591 while (npages) { 592 if (npages > 8) 593 j = 8; 594 else 595 j = npages; 596 597 ret = get_user_pages(current, current->mm, addr, 598 j, 0, 1, pages, NULL); 599 if (ret != j) { 600 i = 0; 601 j = ret; 602 ret = -ENOMEM; 603 goto free_pages; 604 } 605 606 for (i = 0; i < j; i++) { 607 /* map the pages... */ 608 unsigned long fofs = addr & ~PAGE_MASK; 609 int flen = ((fofs + tlen) > PAGE_SIZE) ? 610 (PAGE_SIZE - fofs) : tlen; 611 612 ret = qib_user_sdma_page_to_frags(dd, pq, pkt, 613 pages[i], 1, fofs, flen, NULL); 614 if (ret < 0) { 615 /* current page has beed taken 616 * care of inside above call. 617 */ 618 i++; 619 goto free_pages; 620 } 621 622 addr += flen; 623 tlen -= flen; 624 } 625 626 npages -= j; 627 } 628 629 goto done; 630 631 /* if error, return all pages not managed by pkt */ 632 free_pages: 633 while (i < j) 634 put_page(pages[i++]); 635 636 done: 637 return ret; 638 } 639 640 static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, 641 struct qib_user_sdma_queue *pq, 642 struct qib_user_sdma_pkt *pkt, 643 const struct iovec *iov, 644 unsigned long niov) 645 { 646 int ret = 0; 647 unsigned long idx; 648 649 for (idx = 0; idx < niov; idx++) { 650 const int npages = qib_user_sdma_num_pages(iov + idx); 651 const unsigned long addr = (unsigned long) iov[idx].iov_base; 652 653 ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, 654 iov[idx].iov_len, npages); 655 if (ret < 0) 656 goto free_pkt; 657 } 658 659 goto done; 660 661 free_pkt: 662 /* we need to ignore the first entry here */ 663 for (idx = 1; idx < pkt->naddr; idx++) 664 qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); 665 666 /* need to dma unmap the first entry, this is to restore to 667 * the original state so that caller can free the memory in 668 * error condition. Caller does not know if dma mapped or not*/ 669 if (pkt->addr[0].dma_mapped) { 670 dma_unmap_single(&dd->pcidev->dev, 671 pkt->addr[0].addr, 672 pkt->addr[0].dma_length, 673 DMA_TO_DEVICE); 674 pkt->addr[0].addr = 0; 675 pkt->addr[0].dma_mapped = 0; 676 } 677 678 done: 679 return ret; 680 } 681 682 static int qib_user_sdma_init_payload(const struct qib_devdata *dd, 683 struct qib_user_sdma_queue *pq, 684 struct qib_user_sdma_pkt *pkt, 685 const struct iovec *iov, 686 unsigned long niov, int npages) 687 { 688 int ret = 0; 689 690 if (pkt->frag_size == pkt->bytes_togo && 691 npages >= ARRAY_SIZE(pkt->addr)) 692 ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov); 693 else 694 ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); 695 696 return ret; 697 } 698 699 /* free a packet list -- return counter value of last packet */ 700 static void qib_user_sdma_free_pkt_list(struct device *dev, 701 struct qib_user_sdma_queue *pq, 702 struct list_head *list) 703 { 704 struct qib_user_sdma_pkt *pkt, *pkt_next; 705 706 list_for_each_entry_safe(pkt, pkt_next, list, list) { 707 int i; 708 709 for (i = 0; i < pkt->naddr; i++) 710 qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); 711 712 if (pkt->largepkt) 713 kfree(pkt); 714 else 715 kmem_cache_free(pq->pkt_slab, pkt); 716 } 717 INIT_LIST_HEAD(list); 718 } 719 720 /* 721 * copy headers, coalesce etc -- pq->lock must be held 722 * 723 * we queue all the packets to list, returning the 724 * number of bytes total. list must be empty initially, 725 * as, if there is an error we clean it... 726 */ 727 static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, 728 struct qib_pportdata *ppd, 729 struct qib_user_sdma_queue *pq, 730 const struct iovec *iov, 731 unsigned long niov, 732 struct list_head *list, 733 int *maxpkts, int *ndesc) 734 { 735 unsigned long idx = 0; 736 int ret = 0; 737 int npkts = 0; 738 __le32 *pbc; 739 dma_addr_t dma_addr; 740 struct qib_user_sdma_pkt *pkt = NULL; 741 size_t len; 742 size_t nw; 743 u32 counter = pq->counter; 744 u16 frag_size; 745 746 while (idx < niov && npkts < *maxpkts) { 747 const unsigned long addr = (unsigned long) iov[idx].iov_base; 748 const unsigned long idx_save = idx; 749 unsigned pktnw; 750 unsigned pktnwc; 751 int nfrags = 0; 752 int npages = 0; 753 int bytes_togo = 0; 754 int tiddma = 0; 755 int cfur; 756 757 len = iov[idx].iov_len; 758 nw = len >> 2; 759 760 if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH || 761 len > PAGE_SIZE || len & 3 || addr & 3) { 762 ret = -EINVAL; 763 goto free_list; 764 } 765 766 pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr); 767 if (!pbc) { 768 ret = -ENOMEM; 769 goto free_list; 770 } 771 772 cfur = copy_from_user(pbc, iov[idx].iov_base, len); 773 if (cfur) { 774 ret = -EFAULT; 775 goto free_pbc; 776 } 777 778 /* 779 * This assignment is a bit strange. it's because the 780 * the pbc counts the number of 32 bit words in the full 781 * packet _except_ the first word of the pbc itself... 782 */ 783 pktnwc = nw - 1; 784 785 /* 786 * pktnw computation yields the number of 32 bit words 787 * that the caller has indicated in the PBC. note that 788 * this is one less than the total number of words that 789 * goes to the send DMA engine as the first 32 bit word 790 * of the PBC itself is not counted. Armed with this count, 791 * we can verify that the packet is consistent with the 792 * iovec lengths. 793 */ 794 pktnw = le32_to_cpu(*pbc) & 0xFFFF; 795 if (pktnw < pktnwc) { 796 ret = -EINVAL; 797 goto free_pbc; 798 } 799 800 idx++; 801 while (pktnwc < pktnw && idx < niov) { 802 const size_t slen = iov[idx].iov_len; 803 const unsigned long faddr = 804 (unsigned long) iov[idx].iov_base; 805 806 if (slen & 3 || faddr & 3 || !slen) { 807 ret = -EINVAL; 808 goto free_pbc; 809 } 810 811 npages += qib_user_sdma_num_pages(&iov[idx]); 812 813 bytes_togo += slen; 814 pktnwc += slen >> 2; 815 idx++; 816 nfrags++; 817 } 818 819 if (pktnwc != pktnw) { 820 ret = -EINVAL; 821 goto free_pbc; 822 } 823 824 frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF; 825 if (((frag_size ? frag_size : bytes_togo) + len) > 826 ppd->ibmaxlen) { 827 ret = -EINVAL; 828 goto free_pbc; 829 } 830 831 if (frag_size) { 832 int pktsize, tidsmsize, n; 833 834 n = npages*((2*PAGE_SIZE/frag_size)+1); 835 pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n; 836 837 /* 838 * Determine if this is tid-sdma or just sdma. 839 */ 840 tiddma = (((le32_to_cpu(pbc[7])>> 841 QLOGIC_IB_I_TID_SHIFT)& 842 QLOGIC_IB_I_TID_MASK) != 843 QLOGIC_IB_I_TID_MASK); 844 845 if (tiddma) 846 tidsmsize = iov[idx].iov_len; 847 else 848 tidsmsize = 0; 849 850 pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); 851 if (!pkt) { 852 ret = -ENOMEM; 853 goto free_pbc; 854 } 855 pkt->largepkt = 1; 856 pkt->frag_size = frag_size; 857 pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); 858 859 if (tiddma) { 860 char *tidsm = (char *)pkt + pktsize; 861 cfur = copy_from_user(tidsm, 862 iov[idx].iov_base, tidsmsize); 863 if (cfur) { 864 ret = -EFAULT; 865 goto free_pkt; 866 } 867 pkt->tidsm = 868 (struct qib_tid_session_member *)tidsm; 869 pkt->tidsmcount = tidsmsize/ 870 sizeof(struct qib_tid_session_member); 871 pkt->tidsmidx = 0; 872 idx++; 873 } 874 875 /* 876 * pbc 'fill1' field is borrowed to pass frag size, 877 * we need to clear it after picking frag size, the 878 * hardware requires this field to be zero. 879 */ 880 *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); 881 } else { 882 pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); 883 if (!pkt) { 884 ret = -ENOMEM; 885 goto free_pbc; 886 } 887 pkt->largepkt = 0; 888 pkt->frag_size = bytes_togo; 889 pkt->addrlimit = ARRAY_SIZE(pkt->addr); 890 } 891 pkt->bytes_togo = bytes_togo; 892 pkt->payload_size = 0; 893 pkt->counter = counter; 894 pkt->tiddma = tiddma; 895 896 /* setup the first header */ 897 qib_user_sdma_init_frag(pkt, 0, /* index */ 898 0, len, /* offset, len */ 899 1, 0, /* first last desc */ 900 0, 0, /* put page, dma mapped */ 901 NULL, pbc, /* struct page, virt addr */ 902 dma_addr, len); /* dma addr, dma length */ 903 pkt->index = 0; 904 pkt->naddr = 1; 905 906 if (nfrags) { 907 ret = qib_user_sdma_init_payload(dd, pq, pkt, 908 iov + idx_save + 1, 909 nfrags, npages); 910 if (ret < 0) 911 goto free_pkt; 912 } else { 913 /* since there is no payload, mark the 914 * header as the last desc. */ 915 pkt->addr[0].last_desc = 1; 916 917 if (dma_addr == 0) { 918 /* 919 * the header is not dma mapped yet. 920 * it should be from kmalloc. 921 */ 922 dma_addr = dma_map_single(&dd->pcidev->dev, 923 pbc, len, DMA_TO_DEVICE); 924 if (dma_mapping_error(&dd->pcidev->dev, 925 dma_addr)) { 926 ret = -ENOMEM; 927 goto free_pkt; 928 } 929 pkt->addr[0].addr = dma_addr; 930 pkt->addr[0].dma_mapped = 1; 931 } 932 } 933 934 counter++; 935 npkts++; 936 pkt->pq = pq; 937 pkt->index = 0; /* reset index for push on hw */ 938 *ndesc += pkt->naddr; 939 940 list_add_tail(&pkt->list, list); 941 } 942 943 *maxpkts = npkts; 944 ret = idx; 945 goto done; 946 947 free_pkt: 948 if (pkt->largepkt) 949 kfree(pkt); 950 else 951 kmem_cache_free(pq->pkt_slab, pkt); 952 free_pbc: 953 if (dma_addr) 954 dma_pool_free(pq->header_cache, pbc, dma_addr); 955 else 956 kfree(pbc); 957 free_list: 958 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); 959 done: 960 return ret; 961 } 962 963 static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq, 964 u32 c) 965 { 966 pq->sent_counter = c; 967 } 968 969 /* try to clean out queue -- needs pq->lock */ 970 static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, 971 struct qib_user_sdma_queue *pq) 972 { 973 struct qib_devdata *dd = ppd->dd; 974 struct list_head free_list; 975 struct qib_user_sdma_pkt *pkt; 976 struct qib_user_sdma_pkt *pkt_prev; 977 unsigned long flags; 978 int ret = 0; 979 980 if (!pq->num_sending) 981 return 0; 982 983 INIT_LIST_HEAD(&free_list); 984 985 /* 986 * We need this spin lock here because interrupt handler 987 * might modify this list in qib_user_sdma_send_desc(), also 988 * we can not get interrupted, otherwise it is a deadlock. 989 */ 990 spin_lock_irqsave(&pq->sent_lock, flags); 991 list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { 992 s64 descd = ppd->sdma_descq_removed - pkt->added; 993 994 if (descd < 0) 995 break; 996 997 list_move_tail(&pkt->list, &free_list); 998 999 /* one more packet cleaned */ 1000 ret++; 1001 pq->num_sending--; 1002 } 1003 spin_unlock_irqrestore(&pq->sent_lock, flags); 1004 1005 if (!list_empty(&free_list)) { 1006 u32 counter; 1007 1008 pkt = list_entry(free_list.prev, 1009 struct qib_user_sdma_pkt, list); 1010 counter = pkt->counter; 1011 1012 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); 1013 qib_user_sdma_set_complete_counter(pq, counter); 1014 } 1015 1016 return ret; 1017 } 1018 1019 void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) 1020 { 1021 if (!pq) 1022 return; 1023 1024 kmem_cache_destroy(pq->pkt_slab); 1025 dma_pool_destroy(pq->header_cache); 1026 kfree(pq); 1027 } 1028 1029 /* clean descriptor queue, returns > 0 if some elements cleaned */ 1030 static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd) 1031 { 1032 int ret; 1033 unsigned long flags; 1034 1035 spin_lock_irqsave(&ppd->sdma_lock, flags); 1036 ret = qib_sdma_make_progress(ppd); 1037 spin_unlock_irqrestore(&ppd->sdma_lock, flags); 1038 1039 return ret; 1040 } 1041 1042 /* we're in close, drain packets so that we can cleanup successfully... */ 1043 void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, 1044 struct qib_user_sdma_queue *pq) 1045 { 1046 struct qib_devdata *dd = ppd->dd; 1047 unsigned long flags; 1048 int i; 1049 1050 if (!pq) 1051 return; 1052 1053 for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) { 1054 mutex_lock(&pq->lock); 1055 if (!pq->num_pending && !pq->num_sending) { 1056 mutex_unlock(&pq->lock); 1057 break; 1058 } 1059 qib_user_sdma_hwqueue_clean(ppd); 1060 qib_user_sdma_queue_clean(ppd, pq); 1061 mutex_unlock(&pq->lock); 1062 msleep(10); 1063 } 1064 1065 if (pq->num_pending || pq->num_sending) { 1066 struct qib_user_sdma_pkt *pkt; 1067 struct qib_user_sdma_pkt *pkt_prev; 1068 struct list_head free_list; 1069 1070 mutex_lock(&pq->lock); 1071 spin_lock_irqsave(&ppd->sdma_lock, flags); 1072 /* 1073 * Since we hold sdma_lock, it is safe without sent_lock. 1074 */ 1075 if (pq->num_pending) { 1076 list_for_each_entry_safe(pkt, pkt_prev, 1077 &ppd->sdma_userpending, list) { 1078 if (pkt->pq == pq) { 1079 list_move_tail(&pkt->list, &pq->sent); 1080 pq->num_pending--; 1081 pq->num_sending++; 1082 } 1083 } 1084 } 1085 spin_unlock_irqrestore(&ppd->sdma_lock, flags); 1086 1087 qib_dev_err(dd, "user sdma lists not empty: forcing!\n"); 1088 INIT_LIST_HEAD(&free_list); 1089 list_splice_init(&pq->sent, &free_list); 1090 pq->num_sending = 0; 1091 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); 1092 mutex_unlock(&pq->lock); 1093 } 1094 } 1095 1096 static inline __le64 qib_sdma_make_desc0(u8 gen, 1097 u64 addr, u64 dwlen, u64 dwoffset) 1098 { 1099 return cpu_to_le64(/* SDmaPhyAddr[31:0] */ 1100 ((addr & 0xfffffffcULL) << 32) | 1101 /* SDmaGeneration[1:0] */ 1102 ((gen & 3ULL) << 30) | 1103 /* SDmaDwordCount[10:0] */ 1104 ((dwlen & 0x7ffULL) << 16) | 1105 /* SDmaBufOffset[12:2] */ 1106 (dwoffset & 0x7ffULL)); 1107 } 1108 1109 static inline __le64 qib_sdma_make_first_desc0(__le64 descq) 1110 { 1111 return descq | cpu_to_le64(1ULL << 12); 1112 } 1113 1114 static inline __le64 qib_sdma_make_last_desc0(__le64 descq) 1115 { 1116 /* last */ /* dma head */ 1117 return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); 1118 } 1119 1120 static inline __le64 qib_sdma_make_desc1(u64 addr) 1121 { 1122 /* SDmaPhyAddr[47:32] */ 1123 return cpu_to_le64(addr >> 32); 1124 } 1125 1126 static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, 1127 struct qib_user_sdma_pkt *pkt, int idx, 1128 unsigned ofs, u16 tail, u8 gen) 1129 { 1130 const u64 addr = (u64) pkt->addr[idx].addr + 1131 (u64) pkt->addr[idx].offset; 1132 const u64 dwlen = (u64) pkt->addr[idx].length / 4; 1133 __le64 *descqp; 1134 __le64 descq0; 1135 1136 descqp = &ppd->sdma_descq[tail].qw[0]; 1137 1138 descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs); 1139 if (pkt->addr[idx].first_desc) 1140 descq0 = qib_sdma_make_first_desc0(descq0); 1141 if (pkt->addr[idx].last_desc) { 1142 descq0 = qib_sdma_make_last_desc0(descq0); 1143 if (ppd->sdma_intrequest) { 1144 descq0 |= cpu_to_le64(1ULL << 15); 1145 ppd->sdma_intrequest = 0; 1146 } 1147 } 1148 1149 descqp[0] = descq0; 1150 descqp[1] = qib_sdma_make_desc1(addr); 1151 } 1152 1153 void qib_user_sdma_send_desc(struct qib_pportdata *ppd, 1154 struct list_head *pktlist) 1155 { 1156 struct qib_devdata *dd = ppd->dd; 1157 u16 nfree, nsent; 1158 u16 tail, tail_c; 1159 u8 gen, gen_c; 1160 1161 nfree = qib_sdma_descq_freecnt(ppd); 1162 if (!nfree) 1163 return; 1164 1165 retry: 1166 nsent = 0; 1167 tail_c = tail = ppd->sdma_descq_tail; 1168 gen_c = gen = ppd->sdma_generation; 1169 while (!list_empty(pktlist)) { 1170 struct qib_user_sdma_pkt *pkt = 1171 list_entry(pktlist->next, struct qib_user_sdma_pkt, 1172 list); 1173 int i, j, c = 0; 1174 unsigned ofs = 0; 1175 u16 dtail = tail; 1176 1177 for (i = pkt->index; i < pkt->naddr && nfree; i++) { 1178 qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen); 1179 ofs += pkt->addr[i].length >> 2; 1180 1181 if (++tail == ppd->sdma_descq_cnt) { 1182 tail = 0; 1183 ++gen; 1184 ppd->sdma_intrequest = 1; 1185 } else if (tail == (ppd->sdma_descq_cnt>>1)) { 1186 ppd->sdma_intrequest = 1; 1187 } 1188 nfree--; 1189 if (pkt->addr[i].last_desc == 0) 1190 continue; 1191 1192 /* 1193 * If the packet is >= 2KB mtu equivalent, we 1194 * have to use the large buffers, and have to 1195 * mark each descriptor as part of a large 1196 * buffer packet. 1197 */ 1198 if (ofs > dd->piosize2kmax_dwords) { 1199 for (j = pkt->index; j <= i; j++) { 1200 ppd->sdma_descq[dtail].qw[0] |= 1201 cpu_to_le64(1ULL << 14); 1202 if (++dtail == ppd->sdma_descq_cnt) 1203 dtail = 0; 1204 } 1205 } 1206 c += i + 1 - pkt->index; 1207 pkt->index = i + 1; /* index for next first */ 1208 tail_c = dtail = tail; 1209 gen_c = gen; 1210 ofs = 0; /* reset for next packet */ 1211 } 1212 1213 ppd->sdma_descq_added += c; 1214 nsent += c; 1215 if (pkt->index == pkt->naddr) { 1216 pkt->added = ppd->sdma_descq_added; 1217 pkt->pq->added = pkt->added; 1218 pkt->pq->num_pending--; 1219 spin_lock(&pkt->pq->sent_lock); 1220 pkt->pq->num_sending++; 1221 list_move_tail(&pkt->list, &pkt->pq->sent); 1222 spin_unlock(&pkt->pq->sent_lock); 1223 } 1224 if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt) 1225 break; 1226 } 1227 1228 /* advance the tail on the chip if necessary */ 1229 if (ppd->sdma_descq_tail != tail_c) { 1230 ppd->sdma_generation = gen_c; 1231 dd->f_sdma_update_tail(ppd, tail_c); 1232 } 1233 1234 if (nfree && !list_empty(pktlist)) 1235 goto retry; 1236 1237 return; 1238 } 1239 1240 /* pq->lock must be held, get packets on the wire... */ 1241 static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, 1242 struct qib_user_sdma_queue *pq, 1243 struct list_head *pktlist, int count) 1244 { 1245 int ret = 0; 1246 unsigned long flags; 1247 1248 if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) 1249 return -ECOMM; 1250 1251 spin_lock_irqsave(&ppd->sdma_lock, flags); 1252 1253 if (unlikely(!__qib_sdma_running(ppd))) { 1254 ret = -ECOMM; 1255 goto unlock; 1256 } 1257 1258 pq->num_pending += count; 1259 list_splice_tail_init(pktlist, &ppd->sdma_userpending); 1260 qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); 1261 1262 unlock: 1263 spin_unlock_irqrestore(&ppd->sdma_lock, flags); 1264 return ret; 1265 } 1266 1267 int qib_user_sdma_writev(struct qib_ctxtdata *rcd, 1268 struct qib_user_sdma_queue *pq, 1269 const struct iovec *iov, 1270 unsigned long dim) 1271 { 1272 struct qib_devdata *dd = rcd->dd; 1273 struct qib_pportdata *ppd = rcd->ppd; 1274 int ret = 0; 1275 struct list_head list; 1276 int npkts = 0; 1277 1278 INIT_LIST_HEAD(&list); 1279 1280 mutex_lock(&pq->lock); 1281 1282 /* why not -ECOMM like qib_user_sdma_push_pkts() below? */ 1283 if (!qib_sdma_running(ppd)) 1284 goto done_unlock; 1285 1286 /* if I have packets not complete yet */ 1287 if (pq->added > ppd->sdma_descq_removed) 1288 qib_user_sdma_hwqueue_clean(ppd); 1289 /* if I have complete packets to be freed */ 1290 if (pq->num_sending) 1291 qib_user_sdma_queue_clean(ppd, pq); 1292 1293 while (dim) { 1294 int mxp = 8; 1295 int ndesc = 0; 1296 1297 down_write(¤t->mm->mmap_sem); 1298 ret = qib_user_sdma_queue_pkts(dd, ppd, pq, 1299 iov, dim, &list, &mxp, &ndesc); 1300 up_write(¤t->mm->mmap_sem); 1301 1302 if (ret < 0) 1303 goto done_unlock; 1304 else { 1305 dim -= ret; 1306 iov += ret; 1307 } 1308 1309 /* force packets onto the sdma hw queue... */ 1310 if (!list_empty(&list)) { 1311 /* 1312 * Lazily clean hw queue. 1313 */ 1314 if (qib_sdma_descq_freecnt(ppd) < ndesc) { 1315 qib_user_sdma_hwqueue_clean(ppd); 1316 if (pq->num_sending) 1317 qib_user_sdma_queue_clean(ppd, pq); 1318 } 1319 1320 ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp); 1321 if (ret < 0) 1322 goto done_unlock; 1323 else { 1324 npkts += mxp; 1325 pq->counter += mxp; 1326 } 1327 } 1328 } 1329 1330 done_unlock: 1331 if (!list_empty(&list)) 1332 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list); 1333 mutex_unlock(&pq->lock); 1334 1335 return (ret < 0) ? ret : npkts; 1336 } 1337 1338 int qib_user_sdma_make_progress(struct qib_pportdata *ppd, 1339 struct qib_user_sdma_queue *pq) 1340 { 1341 int ret = 0; 1342 1343 mutex_lock(&pq->lock); 1344 qib_user_sdma_hwqueue_clean(ppd); 1345 ret = qib_user_sdma_queue_clean(ppd, pq); 1346 mutex_unlock(&pq->lock); 1347 1348 return ret; 1349 } 1350 1351 u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq) 1352 { 1353 return pq ? pq->sent_counter : 0; 1354 } 1355 1356 u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq) 1357 { 1358 return pq ? pq->counter : 0; 1359 } 1360