1 /* 2 * Copyright(c) 2015 - 2018 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/mm.h> 48 #include <linux/types.h> 49 #include <linux/device.h> 50 #include <linux/dmapool.h> 51 #include <linux/slab.h> 52 #include <linux/list.h> 53 #include <linux/highmem.h> 54 #include <linux/io.h> 55 #include <linux/uio.h> 56 #include <linux/rbtree.h> 57 #include <linux/spinlock.h> 58 #include <linux/delay.h> 59 #include <linux/kthread.h> 60 #include <linux/mmu_context.h> 61 #include <linux/module.h> 62 #include <linux/vmalloc.h> 63 #include <linux/string.h> 64 65 #include "hfi.h" 66 #include "sdma.h" 67 #include "mmu_rb.h" 68 #include "user_sdma.h" 69 #include "verbs.h" /* for the headers */ 70 #include "common.h" /* for struct hfi1_tid_info */ 71 #include "trace.h" 72 73 static uint hfi1_sdma_comp_ring_size = 128; 74 module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); 75 MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128"); 76 77 static unsigned initial_pkt_count = 8; 78 79 static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts); 80 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); 81 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); 82 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); 83 static int pin_vector_pages(struct user_sdma_request *req, 84 struct user_sdma_iovec *iovec); 85 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 86 unsigned start, unsigned npages); 87 static int check_header_template(struct user_sdma_request *req, 88 struct hfi1_pkt_header *hdr, u32 lrhlen, 89 u32 datalen); 90 static int set_txreq_header(struct user_sdma_request *req, 91 struct user_sdma_txreq *tx, u32 datalen); 92 static int set_txreq_header_ahg(struct user_sdma_request *req, 93 struct user_sdma_txreq *tx, u32 len); 94 static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, 95 struct hfi1_user_sdma_comp_q *cq, 96 u16 idx, enum hfi1_sdma_comp_state state, 97 int ret); 98 static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags); 99 static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); 100 101 static int defer_packet_queue( 102 struct sdma_engine *sde, 103 struct iowait_work *wait, 104 struct sdma_txreq *txreq, 105 uint seq, 106 bool pkts_sent); 107 static void activate_packet_queue(struct iowait *wait, int reason); 108 static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, 109 unsigned long len); 110 static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); 111 static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 112 void *arg2, bool *stop); 113 static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); 114 static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); 115 116 static struct mmu_rb_ops sdma_rb_ops = { 117 .filter = sdma_rb_filter, 118 .insert = sdma_rb_insert, 119 .evict = sdma_rb_evict, 120 .remove = sdma_rb_remove, 121 .invalidate = sdma_rb_invalidate 122 }; 123 124 static int defer_packet_queue( 125 struct sdma_engine *sde, 126 struct iowait_work *wait, 127 struct sdma_txreq *txreq, 128 uint seq, 129 bool pkts_sent) 130 { 131 struct hfi1_user_sdma_pkt_q *pq = 132 container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); 133 134 write_seqlock(&sde->waitlock); 135 if (sdma_progress(sde, seq, txreq)) 136 goto eagain; 137 /* 138 * We are assuming that if the list is enqueued somewhere, it 139 * is to the dmawait list since that is the only place where 140 * it is supposed to be enqueued. 141 */ 142 xchg(&pq->state, SDMA_PKT_Q_DEFERRED); 143 if (list_empty(&pq->busy.list)) { 144 pq->busy.lock = &sde->waitlock; 145 iowait_get_priority(&pq->busy); 146 iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); 147 } 148 write_sequnlock(&sde->waitlock); 149 return -EBUSY; 150 eagain: 151 write_sequnlock(&sde->waitlock); 152 return -EAGAIN; 153 } 154 155 static void activate_packet_queue(struct iowait *wait, int reason) 156 { 157 struct hfi1_user_sdma_pkt_q *pq = 158 container_of(wait, struct hfi1_user_sdma_pkt_q, busy); 159 pq->busy.lock = NULL; 160 xchg(&pq->state, SDMA_PKT_Q_ACTIVE); 161 wake_up(&wait->wait_dma); 162 }; 163 164 int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, 165 struct hfi1_filedata *fd) 166 { 167 int ret = -ENOMEM; 168 char buf[64]; 169 struct hfi1_devdata *dd; 170 struct hfi1_user_sdma_comp_q *cq; 171 struct hfi1_user_sdma_pkt_q *pq; 172 173 if (!uctxt || !fd) 174 return -EBADF; 175 176 if (!hfi1_sdma_comp_ring_size) 177 return -EINVAL; 178 179 dd = uctxt->dd; 180 181 pq = kzalloc(sizeof(*pq), GFP_KERNEL); 182 if (!pq) 183 return -ENOMEM; 184 pq->dd = dd; 185 pq->ctxt = uctxt->ctxt; 186 pq->subctxt = fd->subctxt; 187 pq->n_max_reqs = hfi1_sdma_comp_ring_size; 188 atomic_set(&pq->n_reqs, 0); 189 init_waitqueue_head(&pq->wait); 190 atomic_set(&pq->n_locked, 0); 191 pq->mm = fd->mm; 192 193 iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue, 194 activate_packet_queue, NULL, NULL); 195 pq->reqidx = 0; 196 197 pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, 198 sizeof(*pq->reqs), 199 GFP_KERNEL); 200 if (!pq->reqs) 201 goto pq_reqs_nomem; 202 203 pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), 204 sizeof(*pq->req_in_use), 205 GFP_KERNEL); 206 if (!pq->req_in_use) 207 goto pq_reqs_no_in_use; 208 209 snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt, 210 fd->subctxt); 211 pq->txreq_cache = kmem_cache_create(buf, 212 sizeof(struct user_sdma_txreq), 213 L1_CACHE_BYTES, 214 SLAB_HWCACHE_ALIGN, 215 NULL); 216 if (!pq->txreq_cache) { 217 dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n", 218 uctxt->ctxt); 219 goto pq_txreq_nomem; 220 } 221 222 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 223 if (!cq) 224 goto cq_nomem; 225 226 cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps) 227 * hfi1_sdma_comp_ring_size)); 228 if (!cq->comps) 229 goto cq_comps_nomem; 230 231 cq->nentries = hfi1_sdma_comp_ring_size; 232 233 ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, 234 &pq->handler); 235 if (ret) { 236 dd_dev_err(dd, "Failed to register with MMU %d", ret); 237 goto pq_mmu_fail; 238 } 239 240 rcu_assign_pointer(fd->pq, pq); 241 fd->cq = cq; 242 243 return 0; 244 245 pq_mmu_fail: 246 vfree(cq->comps); 247 cq_comps_nomem: 248 kfree(cq); 249 cq_nomem: 250 kmem_cache_destroy(pq->txreq_cache); 251 pq_txreq_nomem: 252 kfree(pq->req_in_use); 253 pq_reqs_no_in_use: 254 kfree(pq->reqs); 255 pq_reqs_nomem: 256 kfree(pq); 257 258 return ret; 259 } 260 261 static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq) 262 { 263 unsigned long flags; 264 seqlock_t *lock = pq->busy.lock; 265 266 if (!lock) 267 return; 268 write_seqlock_irqsave(lock, flags); 269 if (!list_empty(&pq->busy.list)) { 270 list_del_init(&pq->busy.list); 271 pq->busy.lock = NULL; 272 } 273 write_sequnlock_irqrestore(lock, flags); 274 } 275 276 int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, 277 struct hfi1_ctxtdata *uctxt) 278 { 279 struct hfi1_user_sdma_pkt_q *pq; 280 281 trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); 282 283 spin_lock(&fd->pq_rcu_lock); 284 pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, 285 lockdep_is_held(&fd->pq_rcu_lock)); 286 if (pq) { 287 rcu_assign_pointer(fd->pq, NULL); 288 spin_unlock(&fd->pq_rcu_lock); 289 synchronize_srcu(&fd->pq_srcu); 290 /* at this point there can be no more new requests */ 291 if (pq->handler) 292 hfi1_mmu_rb_unregister(pq->handler); 293 iowait_sdma_drain(&pq->busy); 294 /* Wait until all requests have been freed. */ 295 wait_event_interruptible( 296 pq->wait, 297 !atomic_read(&pq->n_reqs)); 298 kfree(pq->reqs); 299 kfree(pq->req_in_use); 300 kmem_cache_destroy(pq->txreq_cache); 301 flush_pq_iowait(pq); 302 kfree(pq); 303 } else { 304 spin_unlock(&fd->pq_rcu_lock); 305 } 306 if (fd->cq) { 307 vfree(fd->cq->comps); 308 kfree(fd->cq); 309 fd->cq = NULL; 310 } 311 return 0; 312 } 313 314 static u8 dlid_to_selector(u16 dlid) 315 { 316 static u8 mapping[256]; 317 static int initialized; 318 static u8 next; 319 int hash; 320 321 if (!initialized) { 322 memset(mapping, 0xFF, 256); 323 initialized = 1; 324 } 325 326 hash = ((dlid >> 8) ^ dlid) & 0xFF; 327 if (mapping[hash] == 0xFF) { 328 mapping[hash] = next; 329 next = (next + 1) & 0x7F; 330 } 331 332 return mapping[hash]; 333 } 334 335 /** 336 * hfi1_user_sdma_process_request() - Process and start a user sdma request 337 * @fd: valid file descriptor 338 * @iovec: array of io vectors to process 339 * @dim: overall iovec array size 340 * @count: number of io vector array entries processed 341 */ 342 int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, 343 struct iovec *iovec, unsigned long dim, 344 unsigned long *count) 345 { 346 int ret = 0, i; 347 struct hfi1_ctxtdata *uctxt = fd->uctxt; 348 struct hfi1_user_sdma_pkt_q *pq = 349 srcu_dereference(fd->pq, &fd->pq_srcu); 350 struct hfi1_user_sdma_comp_q *cq = fd->cq; 351 struct hfi1_devdata *dd = pq->dd; 352 unsigned long idx = 0; 353 u8 pcount = initial_pkt_count; 354 struct sdma_req_info info; 355 struct user_sdma_request *req; 356 u8 opcode, sc, vl; 357 u16 pkey; 358 u32 slid; 359 u16 dlid; 360 u32 selector; 361 362 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { 363 hfi1_cdbg( 364 SDMA, 365 "[%u:%u:%u] First vector not big enough for header %lu/%lu", 366 dd->unit, uctxt->ctxt, fd->subctxt, 367 iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr)); 368 return -EINVAL; 369 } 370 ret = copy_from_user(&info, iovec[idx].iov_base, sizeof(info)); 371 if (ret) { 372 hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)", 373 dd->unit, uctxt->ctxt, fd->subctxt, ret); 374 return -EFAULT; 375 } 376 377 trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, 378 (u16 *)&info); 379 if (info.comp_idx >= hfi1_sdma_comp_ring_size) { 380 hfi1_cdbg(SDMA, 381 "[%u:%u:%u:%u] Invalid comp index", 382 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 383 return -EINVAL; 384 } 385 386 /* 387 * Sanity check the header io vector count. Need at least 1 vector 388 * (header) and cannot be larger than the actual io vector count. 389 */ 390 if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) { 391 hfi1_cdbg(SDMA, 392 "[%u:%u:%u:%u] Invalid iov count %d, dim %ld", 393 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx, 394 req_iovcnt(info.ctrl), dim); 395 return -EINVAL; 396 } 397 398 if (!info.fragsize) { 399 hfi1_cdbg(SDMA, 400 "[%u:%u:%u:%u] Request does not specify fragsize", 401 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 402 return -EINVAL; 403 } 404 405 /* Try to claim the request. */ 406 if (test_and_set_bit(info.comp_idx, pq->req_in_use)) { 407 hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use", 408 dd->unit, uctxt->ctxt, fd->subctxt, 409 info.comp_idx); 410 return -EBADSLT; 411 } 412 /* 413 * All safety checks have been done and this request has been claimed. 414 */ 415 trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt, 416 info.comp_idx); 417 req = pq->reqs + info.comp_idx; 418 req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ 419 req->data_len = 0; 420 req->pq = pq; 421 req->cq = cq; 422 req->ahg_idx = -1; 423 req->iov_idx = 0; 424 req->sent = 0; 425 req->seqnum = 0; 426 req->seqcomp = 0; 427 req->seqsubmitted = 0; 428 req->tids = NULL; 429 req->has_error = 0; 430 INIT_LIST_HEAD(&req->txps); 431 432 memcpy(&req->info, &info, sizeof(info)); 433 434 /* The request is initialized, count it */ 435 atomic_inc(&pq->n_reqs); 436 437 if (req_opcode(info.ctrl) == EXPECTED) { 438 /* expected must have a TID info and at least one data vector */ 439 if (req->data_iovs < 2) { 440 SDMA_DBG(req, 441 "Not enough vectors for expected request"); 442 ret = -EINVAL; 443 goto free_req; 444 } 445 req->data_iovs--; 446 } 447 448 if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) { 449 SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs, 450 MAX_VECTORS_PER_REQ); 451 ret = -EINVAL; 452 goto free_req; 453 } 454 /* Copy the header from the user buffer */ 455 ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), 456 sizeof(req->hdr)); 457 if (ret) { 458 SDMA_DBG(req, "Failed to copy header template (%d)", ret); 459 ret = -EFAULT; 460 goto free_req; 461 } 462 463 /* If Static rate control is not enabled, sanitize the header. */ 464 if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL)) 465 req->hdr.pbc[2] = 0; 466 467 /* Validate the opcode. Do not trust packets from user space blindly. */ 468 opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff; 469 if ((opcode & USER_OPCODE_CHECK_MASK) != 470 USER_OPCODE_CHECK_VAL) { 471 SDMA_DBG(req, "Invalid opcode (%d)", opcode); 472 ret = -EINVAL; 473 goto free_req; 474 } 475 /* 476 * Validate the vl. Do not trust packets from user space blindly. 477 * VL comes from PBC, SC comes from LRH, and the VL needs to 478 * match the SC look up. 479 */ 480 vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF; 481 sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) | 482 (((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4)); 483 if (vl >= dd->pport->vls_operational || 484 vl != sc_to_vlt(dd, sc)) { 485 SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl); 486 ret = -EINVAL; 487 goto free_req; 488 } 489 490 /* Checking P_KEY for requests from user-space */ 491 pkey = (u16)be32_to_cpu(req->hdr.bth[0]); 492 slid = be16_to_cpu(req->hdr.lrh[3]); 493 if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) { 494 ret = -EINVAL; 495 goto free_req; 496 } 497 498 /* 499 * Also should check the BTH.lnh. If it says the next header is GRH then 500 * the RXE parsing will be off and will land in the middle of the KDETH 501 * or miss it entirely. 502 */ 503 if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) { 504 SDMA_DBG(req, "User tried to pass in a GRH"); 505 ret = -EINVAL; 506 goto free_req; 507 } 508 509 req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]); 510 /* 511 * Calculate the initial TID offset based on the values of 512 * KDETH.OFFSET and KDETH.OM that are passed in. 513 */ 514 req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * 515 (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? 516 KDETH_OM_LARGE : KDETH_OM_SMALL); 517 trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt, 518 info.comp_idx, req->tidoffset); 519 idx++; 520 521 /* Save all the IO vector structures */ 522 for (i = 0; i < req->data_iovs; i++) { 523 req->iovs[i].offset = 0; 524 INIT_LIST_HEAD(&req->iovs[i].list); 525 memcpy(&req->iovs[i].iov, 526 iovec + idx++, 527 sizeof(req->iovs[i].iov)); 528 ret = pin_vector_pages(req, &req->iovs[i]); 529 if (ret) { 530 req->data_iovs = i; 531 goto free_req; 532 } 533 req->data_len += req->iovs[i].iov.iov_len; 534 } 535 trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt, 536 info.comp_idx, req->data_len); 537 if (pcount > req->info.npkts) 538 pcount = req->info.npkts; 539 /* 540 * Copy any TID info 541 * User space will provide the TID info only when the 542 * request type is EXPECTED. This is true even if there is 543 * only one packet in the request and the header is already 544 * setup. The reason for the singular TID case is that the 545 * driver needs to perform safety checks. 546 */ 547 if (req_opcode(req->info.ctrl) == EXPECTED) { 548 u16 ntids = iovec[idx].iov_len / sizeof(*req->tids); 549 u32 *tmp; 550 551 if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) { 552 ret = -EINVAL; 553 goto free_req; 554 } 555 556 /* 557 * We have to copy all of the tids because they may vary 558 * in size and, therefore, the TID count might not be 559 * equal to the pkt count. However, there is no way to 560 * tell at this point. 561 */ 562 tmp = memdup_user(iovec[idx].iov_base, 563 ntids * sizeof(*req->tids)); 564 if (IS_ERR(tmp)) { 565 ret = PTR_ERR(tmp); 566 SDMA_DBG(req, "Failed to copy %d TIDs (%d)", 567 ntids, ret); 568 goto free_req; 569 } 570 req->tids = tmp; 571 req->n_tids = ntids; 572 req->tididx = 0; 573 idx++; 574 } 575 576 dlid = be16_to_cpu(req->hdr.lrh[1]); 577 selector = dlid_to_selector(dlid); 578 selector += uctxt->ctxt + fd->subctxt; 579 req->sde = sdma_select_user_engine(dd, selector, vl); 580 581 if (!req->sde || !sdma_running(req->sde)) { 582 ret = -ECOMM; 583 goto free_req; 584 } 585 586 /* We don't need an AHG entry if the request contains only one packet */ 587 if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) 588 req->ahg_idx = sdma_ahg_alloc(req->sde); 589 590 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); 591 pq->state = SDMA_PKT_Q_ACTIVE; 592 /* Send the first N packets in the request to buy us some time */ 593 ret = user_sdma_send_pkts(req, pcount); 594 if (unlikely(ret < 0 && ret != -EBUSY)) 595 goto free_req; 596 597 /* 598 * This is a somewhat blocking send implementation. 599 * The driver will block the caller until all packets of the 600 * request have been submitted to the SDMA engine. However, it 601 * will not wait for send completions. 602 */ 603 while (req->seqsubmitted != req->info.npkts) { 604 ret = user_sdma_send_pkts(req, pcount); 605 if (ret < 0) { 606 if (ret != -EBUSY) 607 goto free_req; 608 if (wait_event_interruptible_timeout( 609 pq->busy.wait_dma, 610 pq->state == SDMA_PKT_Q_ACTIVE, 611 msecs_to_jiffies( 612 SDMA_IOWAIT_TIMEOUT)) <= 0) 613 flush_pq_iowait(pq); 614 } 615 } 616 *count += idx; 617 return 0; 618 free_req: 619 /* 620 * If the submitted seqsubmitted == npkts, the completion routine 621 * controls the final state. If sequbmitted < npkts, wait for any 622 * outstanding packets to finish before cleaning up. 623 */ 624 if (req->seqsubmitted < req->info.npkts) { 625 if (req->seqsubmitted) 626 wait_event(pq->busy.wait_dma, 627 (req->seqcomp == req->seqsubmitted - 1)); 628 user_sdma_free_request(req, true); 629 pq_update(pq); 630 set_comp_state(pq, cq, info.comp_idx, ERROR, ret); 631 } 632 return ret; 633 } 634 635 static inline u32 compute_data_length(struct user_sdma_request *req, 636 struct user_sdma_txreq *tx) 637 { 638 /* 639 * Determine the proper size of the packet data. 640 * The size of the data of the first packet is in the header 641 * template. However, it includes the header and ICRC, which need 642 * to be subtracted. 643 * The minimum representable packet data length in a header is 4 bytes, 644 * therefore, when the data length request is less than 4 bytes, there's 645 * only one packet, and the packet data length is equal to that of the 646 * request data length. 647 * The size of the remaining packets is the minimum of the frag 648 * size (MTU) or remaining data in the request. 649 */ 650 u32 len; 651 652 if (!req->seqnum) { 653 if (req->data_len < sizeof(u32)) 654 len = req->data_len; 655 else 656 len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - 657 (sizeof(tx->hdr) - 4)); 658 } else if (req_opcode(req->info.ctrl) == EXPECTED) { 659 u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * 660 PAGE_SIZE; 661 /* 662 * Get the data length based on the remaining space in the 663 * TID pair. 664 */ 665 len = min(tidlen - req->tidoffset, (u32)req->info.fragsize); 666 /* If we've filled up the TID pair, move to the next one. */ 667 if (unlikely(!len) && ++req->tididx < req->n_tids && 668 req->tids[req->tididx]) { 669 tidlen = EXP_TID_GET(req->tids[req->tididx], 670 LEN) * PAGE_SIZE; 671 req->tidoffset = 0; 672 len = min_t(u32, tidlen, req->info.fragsize); 673 } 674 /* 675 * Since the TID pairs map entire pages, make sure that we 676 * are not going to try to send more data that we have 677 * remaining. 678 */ 679 len = min(len, req->data_len - req->sent); 680 } else { 681 len = min(req->data_len - req->sent, (u32)req->info.fragsize); 682 } 683 trace_hfi1_sdma_user_compute_length(req->pq->dd, 684 req->pq->ctxt, 685 req->pq->subctxt, 686 req->info.comp_idx, 687 len); 688 return len; 689 } 690 691 static inline u32 pad_len(u32 len) 692 { 693 if (len & (sizeof(u32) - 1)) 694 len += sizeof(u32) - (len & (sizeof(u32) - 1)); 695 return len; 696 } 697 698 static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) 699 { 700 /* (Size of complete header - size of PBC) + 4B ICRC + data length */ 701 return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len); 702 } 703 704 static int user_sdma_txadd_ahg(struct user_sdma_request *req, 705 struct user_sdma_txreq *tx, 706 u32 datalen) 707 { 708 int ret; 709 u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); 710 u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen)); 711 struct hfi1_user_sdma_pkt_q *pq = req->pq; 712 713 /* 714 * Copy the request header into the tx header 715 * because the HW needs a cacheline-aligned 716 * address. 717 * This copy can be optimized out if the hdr 718 * member of user_sdma_request were also 719 * cacheline aligned. 720 */ 721 memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); 722 if (PBC2LRH(pbclen) != lrhlen) { 723 pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); 724 tx->hdr.pbc[0] = cpu_to_le16(pbclen); 725 } 726 ret = check_header_template(req, &tx->hdr, lrhlen, datalen); 727 if (ret) 728 return ret; 729 ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, 730 sizeof(tx->hdr) + datalen, req->ahg_idx, 731 0, NULL, 0, user_sdma_txreq_cb); 732 if (ret) 733 return ret; 734 ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr)); 735 if (ret) 736 sdma_txclean(pq->dd, &tx->txreq); 737 return ret; 738 } 739 740 static int user_sdma_txadd(struct user_sdma_request *req, 741 struct user_sdma_txreq *tx, 742 struct user_sdma_iovec *iovec, u32 datalen, 743 u32 *queued_ptr, u32 *data_sent_ptr, 744 u64 *iov_offset_ptr) 745 { 746 int ret; 747 unsigned int pageidx, len; 748 unsigned long base, offset; 749 u64 iov_offset = *iov_offset_ptr; 750 u32 queued = *queued_ptr, data_sent = *data_sent_ptr; 751 struct hfi1_user_sdma_pkt_q *pq = req->pq; 752 753 base = (unsigned long)iovec->iov.iov_base; 754 offset = offset_in_page(base + iovec->offset + iov_offset); 755 pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >> 756 PAGE_SHIFT); 757 len = offset + req->info.fragsize > PAGE_SIZE ? 758 PAGE_SIZE - offset : req->info.fragsize; 759 len = min((datalen - queued), len); 760 ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx], 761 offset, len); 762 if (ret) { 763 SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); 764 return ret; 765 } 766 iov_offset += len; 767 queued += len; 768 data_sent += len; 769 if (unlikely(queued < datalen && pageidx == iovec->npages && 770 req->iov_idx < req->data_iovs - 1)) { 771 iovec->offset += iov_offset; 772 iovec = &req->iovs[++req->iov_idx]; 773 iov_offset = 0; 774 } 775 776 *queued_ptr = queued; 777 *data_sent_ptr = data_sent; 778 *iov_offset_ptr = iov_offset; 779 return ret; 780 } 781 782 static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) 783 { 784 int ret = 0; 785 u16 count; 786 unsigned npkts = 0; 787 struct user_sdma_txreq *tx = NULL; 788 struct hfi1_user_sdma_pkt_q *pq = NULL; 789 struct user_sdma_iovec *iovec = NULL; 790 791 if (!req->pq) 792 return -EINVAL; 793 794 pq = req->pq; 795 796 /* If tx completion has reported an error, we are done. */ 797 if (READ_ONCE(req->has_error)) 798 return -EFAULT; 799 800 /* 801 * Check if we might have sent the entire request already 802 */ 803 if (unlikely(req->seqnum == req->info.npkts)) { 804 if (!list_empty(&req->txps)) 805 goto dosend; 806 return ret; 807 } 808 809 if (!maxpkts || maxpkts > req->info.npkts - req->seqnum) 810 maxpkts = req->info.npkts - req->seqnum; 811 812 while (npkts < maxpkts) { 813 u32 datalen = 0, queued = 0, data_sent = 0; 814 u64 iov_offset = 0; 815 816 /* 817 * Check whether any of the completions have come back 818 * with errors. If so, we are not going to process any 819 * more packets from this request. 820 */ 821 if (READ_ONCE(req->has_error)) 822 return -EFAULT; 823 824 tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); 825 if (!tx) 826 return -ENOMEM; 827 828 tx->flags = 0; 829 tx->req = req; 830 INIT_LIST_HEAD(&tx->list); 831 832 /* 833 * For the last packet set the ACK request 834 * and disable header suppression. 835 */ 836 if (req->seqnum == req->info.npkts - 1) 837 tx->flags |= (TXREQ_FLAGS_REQ_ACK | 838 TXREQ_FLAGS_REQ_DISABLE_SH); 839 840 /* 841 * Calculate the payload size - this is min of the fragment 842 * (MTU) size or the remaining bytes in the request but only 843 * if we have payload data. 844 */ 845 if (req->data_len) { 846 iovec = &req->iovs[req->iov_idx]; 847 if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { 848 if (++req->iov_idx == req->data_iovs) { 849 ret = -EFAULT; 850 goto free_tx; 851 } 852 iovec = &req->iovs[req->iov_idx]; 853 WARN_ON(iovec->offset); 854 } 855 856 datalen = compute_data_length(req, tx); 857 858 /* 859 * Disable header suppression for the payload <= 8DWS. 860 * If there is an uncorrectable error in the receive 861 * data FIFO when the received payload size is less than 862 * or equal to 8DWS then the RxDmaDataFifoRdUncErr is 863 * not reported.There is set RHF.EccErr if the header 864 * is not suppressed. 865 */ 866 if (!datalen) { 867 SDMA_DBG(req, 868 "Request has data but pkt len is 0"); 869 ret = -EFAULT; 870 goto free_tx; 871 } else if (datalen <= 32) { 872 tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; 873 } 874 } 875 876 if (req->ahg_idx >= 0) { 877 if (!req->seqnum) { 878 ret = user_sdma_txadd_ahg(req, tx, datalen); 879 if (ret) 880 goto free_tx; 881 } else { 882 int changes; 883 884 changes = set_txreq_header_ahg(req, tx, 885 datalen); 886 if (changes < 0) { 887 ret = changes; 888 goto free_tx; 889 } 890 } 891 } else { 892 ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + 893 datalen, user_sdma_txreq_cb); 894 if (ret) 895 goto free_tx; 896 /* 897 * Modify the header for this packet. This only needs 898 * to be done if we are not going to use AHG. Otherwise, 899 * the HW will do it based on the changes we gave it 900 * during sdma_txinit_ahg(). 901 */ 902 ret = set_txreq_header(req, tx, datalen); 903 if (ret) 904 goto free_txreq; 905 } 906 907 /* 908 * If the request contains any data vectors, add up to 909 * fragsize bytes to the descriptor. 910 */ 911 while (queued < datalen && 912 (req->sent + data_sent) < req->data_len) { 913 ret = user_sdma_txadd(req, tx, iovec, datalen, 914 &queued, &data_sent, &iov_offset); 915 if (ret) 916 goto free_txreq; 917 } 918 /* 919 * The txreq was submitted successfully so we can update 920 * the counters. 921 */ 922 req->koffset += datalen; 923 if (req_opcode(req->info.ctrl) == EXPECTED) 924 req->tidoffset += datalen; 925 req->sent += data_sent; 926 if (req->data_len) 927 iovec->offset += iov_offset; 928 list_add_tail(&tx->txreq.list, &req->txps); 929 /* 930 * It is important to increment this here as it is used to 931 * generate the BTH.PSN and, therefore, can't be bulk-updated 932 * outside of the loop. 933 */ 934 tx->seqnum = req->seqnum++; 935 npkts++; 936 } 937 dosend: 938 ret = sdma_send_txlist(req->sde, 939 iowait_get_ib_work(&pq->busy), 940 &req->txps, &count); 941 req->seqsubmitted += count; 942 if (req->seqsubmitted == req->info.npkts) { 943 /* 944 * The txreq has already been submitted to the HW queue 945 * so we can free the AHG entry now. Corruption will not 946 * happen due to the sequential manner in which 947 * descriptors are processed. 948 */ 949 if (req->ahg_idx >= 0) 950 sdma_ahg_free(req->sde, req->ahg_idx); 951 } 952 return ret; 953 954 free_txreq: 955 sdma_txclean(pq->dd, &tx->txreq); 956 free_tx: 957 kmem_cache_free(pq->txreq_cache, tx); 958 return ret; 959 } 960 961 static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) 962 { 963 struct evict_data evict_data; 964 965 evict_data.cleared = 0; 966 evict_data.target = npages; 967 hfi1_mmu_rb_evict(pq->handler, &evict_data); 968 return evict_data.cleared; 969 } 970 971 static int pin_sdma_pages(struct user_sdma_request *req, 972 struct user_sdma_iovec *iovec, 973 struct sdma_mmu_node *node, 974 int npages) 975 { 976 int pinned, cleared; 977 struct page **pages; 978 struct hfi1_user_sdma_pkt_q *pq = req->pq; 979 980 pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); 981 if (!pages) 982 return -ENOMEM; 983 memcpy(pages, node->pages, node->npages * sizeof(*pages)); 984 985 npages -= node->npages; 986 retry: 987 if (!hfi1_can_pin_pages(pq->dd, pq->mm, 988 atomic_read(&pq->n_locked), npages)) { 989 cleared = sdma_cache_evict(pq, npages); 990 if (cleared >= npages) 991 goto retry; 992 } 993 pinned = hfi1_acquire_user_pages(pq->mm, 994 ((unsigned long)iovec->iov.iov_base + 995 (node->npages * PAGE_SIZE)), npages, 0, 996 pages + node->npages); 997 if (pinned < 0) { 998 kfree(pages); 999 return pinned; 1000 } 1001 if (pinned != npages) { 1002 unpin_vector_pages(pq->mm, pages, node->npages, pinned); 1003 return -EFAULT; 1004 } 1005 kfree(node->pages); 1006 node->rb.len = iovec->iov.iov_len; 1007 node->pages = pages; 1008 atomic_add(pinned, &pq->n_locked); 1009 return pinned; 1010 } 1011 1012 static void unpin_sdma_pages(struct sdma_mmu_node *node) 1013 { 1014 if (node->npages) { 1015 unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); 1016 atomic_sub(node->npages, &node->pq->n_locked); 1017 } 1018 } 1019 1020 static int pin_vector_pages(struct user_sdma_request *req, 1021 struct user_sdma_iovec *iovec) 1022 { 1023 int ret = 0, pinned, npages; 1024 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1025 struct sdma_mmu_node *node = NULL; 1026 struct mmu_rb_node *rb_node; 1027 struct iovec *iov; 1028 bool extracted; 1029 1030 extracted = 1031 hfi1_mmu_rb_remove_unless_exact(pq->handler, 1032 (unsigned long) 1033 iovec->iov.iov_base, 1034 iovec->iov.iov_len, &rb_node); 1035 if (rb_node) { 1036 node = container_of(rb_node, struct sdma_mmu_node, rb); 1037 if (!extracted) { 1038 atomic_inc(&node->refcount); 1039 iovec->pages = node->pages; 1040 iovec->npages = node->npages; 1041 iovec->node = node; 1042 return 0; 1043 } 1044 } 1045 1046 if (!node) { 1047 node = kzalloc(sizeof(*node), GFP_KERNEL); 1048 if (!node) 1049 return -ENOMEM; 1050 1051 node->rb.addr = (unsigned long)iovec->iov.iov_base; 1052 node->pq = pq; 1053 atomic_set(&node->refcount, 0); 1054 } 1055 1056 iov = &iovec->iov; 1057 npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len); 1058 if (node->npages < npages) { 1059 pinned = pin_sdma_pages(req, iovec, node, npages); 1060 if (pinned < 0) { 1061 ret = pinned; 1062 goto bail; 1063 } 1064 node->npages += pinned; 1065 npages = node->npages; 1066 } 1067 iovec->pages = node->pages; 1068 iovec->npages = npages; 1069 iovec->node = node; 1070 1071 ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); 1072 if (ret) { 1073 iovec->node = NULL; 1074 goto bail; 1075 } 1076 return 0; 1077 bail: 1078 unpin_sdma_pages(node); 1079 kfree(node); 1080 return ret; 1081 } 1082 1083 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 1084 unsigned start, unsigned npages) 1085 { 1086 hfi1_release_user_pages(mm, pages + start, npages, false); 1087 kfree(pages); 1088 } 1089 1090 static int check_header_template(struct user_sdma_request *req, 1091 struct hfi1_pkt_header *hdr, u32 lrhlen, 1092 u32 datalen) 1093 { 1094 /* 1095 * Perform safety checks for any type of packet: 1096 * - transfer size is multiple of 64bytes 1097 * - packet length is multiple of 4 bytes 1098 * - packet length is not larger than MTU size 1099 * 1100 * These checks are only done for the first packet of the 1101 * transfer since the header is "given" to us by user space. 1102 * For the remainder of the packets we compute the values. 1103 */ 1104 if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 || 1105 lrhlen > get_lrh_len(*hdr, req->info.fragsize)) 1106 return -EINVAL; 1107 1108 if (req_opcode(req->info.ctrl) == EXPECTED) { 1109 /* 1110 * The header is checked only on the first packet. Furthermore, 1111 * we ensure that at least one TID entry is copied when the 1112 * request is submitted. Therefore, we don't have to verify that 1113 * tididx points to something sane. 1114 */ 1115 u32 tidval = req->tids[req->tididx], 1116 tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE, 1117 tididx = EXP_TID_GET(tidval, IDX), 1118 tidctrl = EXP_TID_GET(tidval, CTRL), 1119 tidoff; 1120 __le32 kval = hdr->kdeth.ver_tid_offset; 1121 1122 tidoff = KDETH_GET(kval, OFFSET) * 1123 (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? 1124 KDETH_OM_LARGE : KDETH_OM_SMALL); 1125 /* 1126 * Expected receive packets have the following 1127 * additional checks: 1128 * - offset is not larger than the TID size 1129 * - TIDCtrl values match between header and TID array 1130 * - TID indexes match between header and TID array 1131 */ 1132 if ((tidoff + datalen > tidlen) || 1133 KDETH_GET(kval, TIDCTRL) != tidctrl || 1134 KDETH_GET(kval, TID) != tididx) 1135 return -EINVAL; 1136 } 1137 return 0; 1138 } 1139 1140 /* 1141 * Correctly set the BTH.PSN field based on type of 1142 * transfer - eager packets can just increment the PSN but 1143 * expected packets encode generation and sequence in the 1144 * BTH.PSN field so just incrementing will result in errors. 1145 */ 1146 static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags) 1147 { 1148 u32 val = be32_to_cpu(bthpsn), 1149 mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull : 1150 0xffffffull), 1151 psn = val & mask; 1152 if (expct) 1153 psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) | 1154 ((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK); 1155 else 1156 psn = psn + frags; 1157 return psn & mask; 1158 } 1159 1160 static int set_txreq_header(struct user_sdma_request *req, 1161 struct user_sdma_txreq *tx, u32 datalen) 1162 { 1163 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1164 struct hfi1_pkt_header *hdr = &tx->hdr; 1165 u8 omfactor; /* KDETH.OM */ 1166 u16 pbclen; 1167 int ret; 1168 u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); 1169 1170 /* Copy the header template to the request before modification */ 1171 memcpy(hdr, &req->hdr, sizeof(*hdr)); 1172 1173 /* 1174 * Check if the PBC and LRH length are mismatched. If so 1175 * adjust both in the header. 1176 */ 1177 pbclen = le16_to_cpu(hdr->pbc[0]); 1178 if (PBC2LRH(pbclen) != lrhlen) { 1179 pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); 1180 hdr->pbc[0] = cpu_to_le16(pbclen); 1181 hdr->lrh[2] = cpu_to_be16(lrhlen >> 2); 1182 /* 1183 * Third packet 1184 * This is the first packet in the sequence that has 1185 * a "static" size that can be used for the rest of 1186 * the packets (besides the last one). 1187 */ 1188 if (unlikely(req->seqnum == 2)) { 1189 /* 1190 * From this point on the lengths in both the 1191 * PBC and LRH are the same until the last 1192 * packet. 1193 * Adjust the template so we don't have to update 1194 * every packet 1195 */ 1196 req->hdr.pbc[0] = hdr->pbc[0]; 1197 req->hdr.lrh[2] = hdr->lrh[2]; 1198 } 1199 } 1200 /* 1201 * We only have to modify the header if this is not the 1202 * first packet in the request. Otherwise, we use the 1203 * header given to us. 1204 */ 1205 if (unlikely(!req->seqnum)) { 1206 ret = check_header_template(req, hdr, lrhlen, datalen); 1207 if (ret) 1208 return ret; 1209 goto done; 1210 } 1211 1212 hdr->bth[2] = cpu_to_be32( 1213 set_pkt_bth_psn(hdr->bth[2], 1214 (req_opcode(req->info.ctrl) == EXPECTED), 1215 req->seqnum)); 1216 1217 /* Set ACK request on last packet */ 1218 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) 1219 hdr->bth[2] |= cpu_to_be32(1UL << 31); 1220 1221 /* Set the new offset */ 1222 hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset); 1223 /* Expected packets have to fill in the new TID information */ 1224 if (req_opcode(req->info.ctrl) == EXPECTED) { 1225 tidval = req->tids[req->tididx]; 1226 /* 1227 * If the offset puts us at the end of the current TID, 1228 * advance everything. 1229 */ 1230 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * 1231 PAGE_SIZE)) { 1232 req->tidoffset = 0; 1233 /* 1234 * Since we don't copy all the TIDs, all at once, 1235 * we have to check again. 1236 */ 1237 if (++req->tididx > req->n_tids - 1 || 1238 !req->tids[req->tididx]) { 1239 return -EINVAL; 1240 } 1241 tidval = req->tids[req->tididx]; 1242 } 1243 omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= 1244 KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : 1245 KDETH_OM_SMALL_SHIFT; 1246 /* Set KDETH.TIDCtrl based on value for this TID. */ 1247 KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, 1248 EXP_TID_GET(tidval, CTRL)); 1249 /* Set KDETH.TID based on value for this TID */ 1250 KDETH_SET(hdr->kdeth.ver_tid_offset, TID, 1251 EXP_TID_GET(tidval, IDX)); 1252 /* Clear KDETH.SH when DISABLE_SH flag is set */ 1253 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) 1254 KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); 1255 /* 1256 * Set the KDETH.OFFSET and KDETH.OM based on size of 1257 * transfer. 1258 */ 1259 trace_hfi1_sdma_user_tid_info( 1260 pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, 1261 req->tidoffset, req->tidoffset >> omfactor, 1262 omfactor != KDETH_OM_SMALL_SHIFT); 1263 KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, 1264 req->tidoffset >> omfactor); 1265 KDETH_SET(hdr->kdeth.ver_tid_offset, OM, 1266 omfactor != KDETH_OM_SMALL_SHIFT); 1267 } 1268 done: 1269 trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, 1270 req->info.comp_idx, hdr, tidval); 1271 return sdma_txadd_kvaddr(pq->dd, &tx->txreq, hdr, sizeof(*hdr)); 1272 } 1273 1274 static int set_txreq_header_ahg(struct user_sdma_request *req, 1275 struct user_sdma_txreq *tx, u32 datalen) 1276 { 1277 u32 ahg[AHG_KDETH_ARRAY_SIZE]; 1278 int idx = 0; 1279 u8 omfactor; /* KDETH.OM */ 1280 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1281 struct hfi1_pkt_header *hdr = &req->hdr; 1282 u16 pbclen = le16_to_cpu(hdr->pbc[0]); 1283 u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); 1284 size_t array_size = ARRAY_SIZE(ahg); 1285 1286 if (PBC2LRH(pbclen) != lrhlen) { 1287 /* PBC.PbcLengthDWs */ 1288 idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12, 1289 (__force u16)cpu_to_le16(LRH2PBC(lrhlen))); 1290 if (idx < 0) 1291 return idx; 1292 /* LRH.PktLen (we need the full 16 bits due to byte swap) */ 1293 idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16, 1294 (__force u16)cpu_to_be16(lrhlen >> 2)); 1295 if (idx < 0) 1296 return idx; 1297 } 1298 1299 /* 1300 * Do the common updates 1301 */ 1302 /* BTH.PSN and BTH.A */ 1303 val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & 1304 (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); 1305 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) 1306 val32 |= 1UL << 31; 1307 idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16, 1308 (__force u16)cpu_to_be16(val32 >> 16)); 1309 if (idx < 0) 1310 return idx; 1311 idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16, 1312 (__force u16)cpu_to_be16(val32 & 0xffff)); 1313 if (idx < 0) 1314 return idx; 1315 /* KDETH.Offset */ 1316 idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16, 1317 (__force u16)cpu_to_le16(req->koffset & 0xffff)); 1318 if (idx < 0) 1319 return idx; 1320 idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16, 1321 (__force u16)cpu_to_le16(req->koffset >> 16)); 1322 if (idx < 0) 1323 return idx; 1324 if (req_opcode(req->info.ctrl) == EXPECTED) { 1325 __le16 val; 1326 1327 tidval = req->tids[req->tididx]; 1328 1329 /* 1330 * If the offset puts us at the end of the current TID, 1331 * advance everything. 1332 */ 1333 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * 1334 PAGE_SIZE)) { 1335 req->tidoffset = 0; 1336 /* 1337 * Since we don't copy all the TIDs, all at once, 1338 * we have to check again. 1339 */ 1340 if (++req->tididx > req->n_tids - 1 || 1341 !req->tids[req->tididx]) 1342 return -EINVAL; 1343 tidval = req->tids[req->tididx]; 1344 } 1345 omfactor = ((EXP_TID_GET(tidval, LEN) * 1346 PAGE_SIZE) >= 1347 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : 1348 KDETH_OM_SMALL_SHIFT; 1349 /* KDETH.OM and KDETH.OFFSET (TID) */ 1350 idx = ahg_header_set( 1351 ahg, idx, array_size, 7, 0, 16, 1352 ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | 1353 ((req->tidoffset >> omfactor) 1354 & 0x7fff))); 1355 if (idx < 0) 1356 return idx; 1357 /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ 1358 val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | 1359 (EXP_TID_GET(tidval, IDX) & 0x3ff)); 1360 1361 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { 1362 val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, 1363 INTR) << 1364 AHG_KDETH_INTR_SHIFT)); 1365 } else { 1366 val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? 1367 cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : 1368 cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, 1369 INTR) << 1370 AHG_KDETH_INTR_SHIFT)); 1371 } 1372 1373 idx = ahg_header_set(ahg, idx, array_size, 1374 7, 16, 14, (__force u16)val); 1375 if (idx < 0) 1376 return idx; 1377 } 1378 1379 trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, 1380 req->info.comp_idx, req->sde->this_idx, 1381 req->ahg_idx, ahg, idx, tidval); 1382 sdma_txinit_ahg(&tx->txreq, 1383 SDMA_TXREQ_F_USE_AHG, 1384 datalen, req->ahg_idx, idx, 1385 ahg, sizeof(req->hdr), 1386 user_sdma_txreq_cb); 1387 1388 return idx; 1389 } 1390 1391 /** 1392 * user_sdma_txreq_cb() - SDMA tx request completion callback. 1393 * @txreq: valid sdma tx request 1394 * @status: success/failure of request 1395 * 1396 * Called when the SDMA progress state machine gets notification that 1397 * the SDMA descriptors for this tx request have been processed by the 1398 * DMA engine. Called in interrupt context. 1399 * Only do work on completed sequences. 1400 */ 1401 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) 1402 { 1403 struct user_sdma_txreq *tx = 1404 container_of(txreq, struct user_sdma_txreq, txreq); 1405 struct user_sdma_request *req; 1406 struct hfi1_user_sdma_pkt_q *pq; 1407 struct hfi1_user_sdma_comp_q *cq; 1408 enum hfi1_sdma_comp_state state = COMPLETE; 1409 1410 if (!tx->req) 1411 return; 1412 1413 req = tx->req; 1414 pq = req->pq; 1415 cq = req->cq; 1416 1417 if (status != SDMA_TXREQ_S_OK) { 1418 SDMA_DBG(req, "SDMA completion with error %d", 1419 status); 1420 WRITE_ONCE(req->has_error, 1); 1421 state = ERROR; 1422 } 1423 1424 req->seqcomp = tx->seqnum; 1425 kmem_cache_free(pq->txreq_cache, tx); 1426 1427 /* sequence isn't complete? We are done */ 1428 if (req->seqcomp != req->info.npkts - 1) 1429 return; 1430 1431 user_sdma_free_request(req, false); 1432 set_comp_state(pq, cq, req->info.comp_idx, state, status); 1433 pq_update(pq); 1434 } 1435 1436 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) 1437 { 1438 if (atomic_dec_and_test(&pq->n_reqs)) 1439 wake_up(&pq->wait); 1440 } 1441 1442 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) 1443 { 1444 int i; 1445 1446 if (!list_empty(&req->txps)) { 1447 struct sdma_txreq *t, *p; 1448 1449 list_for_each_entry_safe(t, p, &req->txps, list) { 1450 struct user_sdma_txreq *tx = 1451 container_of(t, struct user_sdma_txreq, txreq); 1452 list_del_init(&t->list); 1453 sdma_txclean(req->pq->dd, t); 1454 kmem_cache_free(req->pq->txreq_cache, tx); 1455 } 1456 } 1457 1458 for (i = 0; i < req->data_iovs; i++) { 1459 struct sdma_mmu_node *node = req->iovs[i].node; 1460 1461 if (!node) 1462 continue; 1463 1464 req->iovs[i].node = NULL; 1465 1466 if (unpin) 1467 hfi1_mmu_rb_remove(req->pq->handler, 1468 &node->rb); 1469 else 1470 atomic_dec(&node->refcount); 1471 } 1472 1473 kfree(req->tids); 1474 clear_bit(req->info.comp_idx, req->pq->req_in_use); 1475 } 1476 1477 static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, 1478 struct hfi1_user_sdma_comp_q *cq, 1479 u16 idx, enum hfi1_sdma_comp_state state, 1480 int ret) 1481 { 1482 if (state == ERROR) 1483 cq->comps[idx].errcode = -ret; 1484 smp_wmb(); /* make sure errcode is visible first */ 1485 cq->comps[idx].status = state; 1486 trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, 1487 idx, state, ret); 1488 } 1489 1490 static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, 1491 unsigned long len) 1492 { 1493 return (bool)(node->addr == addr); 1494 } 1495 1496 static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) 1497 { 1498 struct sdma_mmu_node *node = 1499 container_of(mnode, struct sdma_mmu_node, rb); 1500 1501 atomic_inc(&node->refcount); 1502 return 0; 1503 } 1504 1505 /* 1506 * Return 1 to remove the node from the rb tree and call the remove op. 1507 * 1508 * Called with the rb tree lock held. 1509 */ 1510 static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 1511 void *evict_arg, bool *stop) 1512 { 1513 struct sdma_mmu_node *node = 1514 container_of(mnode, struct sdma_mmu_node, rb); 1515 struct evict_data *evict_data = evict_arg; 1516 1517 /* is this node still being used? */ 1518 if (atomic_read(&node->refcount)) 1519 return 0; /* keep this node */ 1520 1521 /* this node will be evicted, add its pages to our count */ 1522 evict_data->cleared += node->npages; 1523 1524 /* have enough pages been cleared? */ 1525 if (evict_data->cleared >= evict_data->target) 1526 *stop = true; 1527 1528 return 1; /* remove this node */ 1529 } 1530 1531 static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) 1532 { 1533 struct sdma_mmu_node *node = 1534 container_of(mnode, struct sdma_mmu_node, rb); 1535 1536 unpin_sdma_pages(node); 1537 kfree(node); 1538 } 1539 1540 static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) 1541 { 1542 struct sdma_mmu_node *node = 1543 container_of(mnode, struct sdma_mmu_node, rb); 1544 1545 if (!atomic_read(&node->refcount)) 1546 return 1; 1547 return 0; 1548 } 1549