1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 /* 3 * Copyright(c) 2020 - Cornelis Networks, Inc. 4 * Copyright(c) 2015 - 2018 Intel Corporation. 5 */ 6 7 #include <linux/mm.h> 8 #include <linux/types.h> 9 #include <linux/device.h> 10 #include <linux/dmapool.h> 11 #include <linux/slab.h> 12 #include <linux/list.h> 13 #include <linux/highmem.h> 14 #include <linux/io.h> 15 #include <linux/uio.h> 16 #include <linux/rbtree.h> 17 #include <linux/spinlock.h> 18 #include <linux/delay.h> 19 #include <linux/kthread.h> 20 #include <linux/mmu_context.h> 21 #include <linux/module.h> 22 #include <linux/vmalloc.h> 23 #include <linux/string.h> 24 25 #include "hfi.h" 26 #include "sdma.h" 27 #include "mmu_rb.h" 28 #include "user_sdma.h" 29 #include "verbs.h" /* for the headers */ 30 #include "common.h" /* for struct hfi1_tid_info */ 31 #include "trace.h" 32 33 static uint hfi1_sdma_comp_ring_size = 128; 34 module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); 35 MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128"); 36 37 static unsigned initial_pkt_count = 8; 38 39 static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts); 40 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); 41 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); 42 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); 43 static int pin_vector_pages(struct user_sdma_request *req, 44 struct user_sdma_iovec *iovec); 45 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 46 unsigned start, unsigned npages); 47 static int check_header_template(struct user_sdma_request *req, 48 struct hfi1_pkt_header *hdr, u32 lrhlen, 49 u32 datalen); 50 static int set_txreq_header(struct user_sdma_request *req, 51 struct user_sdma_txreq *tx, u32 datalen); 52 static int set_txreq_header_ahg(struct user_sdma_request *req, 53 struct user_sdma_txreq *tx, u32 len); 54 static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, 55 struct hfi1_user_sdma_comp_q *cq, 56 u16 idx, enum hfi1_sdma_comp_state state, 57 int ret); 58 static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags); 59 static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); 60 61 static int defer_packet_queue( 62 struct sdma_engine *sde, 63 struct iowait_work *wait, 64 struct sdma_txreq *txreq, 65 uint seq, 66 bool pkts_sent); 67 static void activate_packet_queue(struct iowait *wait, int reason); 68 static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, 69 unsigned long len); 70 static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); 71 static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 72 void *arg2, bool *stop); 73 static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); 74 static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); 75 76 static struct mmu_rb_ops sdma_rb_ops = { 77 .filter = sdma_rb_filter, 78 .insert = sdma_rb_insert, 79 .evict = sdma_rb_evict, 80 .remove = sdma_rb_remove, 81 .invalidate = sdma_rb_invalidate 82 }; 83 84 static int defer_packet_queue( 85 struct sdma_engine *sde, 86 struct iowait_work *wait, 87 struct sdma_txreq *txreq, 88 uint seq, 89 bool pkts_sent) 90 { 91 struct hfi1_user_sdma_pkt_q *pq = 92 container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); 93 94 write_seqlock(&sde->waitlock); 95 trace_hfi1_usdma_defer(pq, sde, &pq->busy); 96 if (sdma_progress(sde, seq, txreq)) 97 goto eagain; 98 /* 99 * We are assuming that if the list is enqueued somewhere, it 100 * is to the dmawait list since that is the only place where 101 * it is supposed to be enqueued. 102 */ 103 xchg(&pq->state, SDMA_PKT_Q_DEFERRED); 104 if (list_empty(&pq->busy.list)) { 105 pq->busy.lock = &sde->waitlock; 106 iowait_get_priority(&pq->busy); 107 iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); 108 } 109 write_sequnlock(&sde->waitlock); 110 return -EBUSY; 111 eagain: 112 write_sequnlock(&sde->waitlock); 113 return -EAGAIN; 114 } 115 116 static void activate_packet_queue(struct iowait *wait, int reason) 117 { 118 struct hfi1_user_sdma_pkt_q *pq = 119 container_of(wait, struct hfi1_user_sdma_pkt_q, busy); 120 121 trace_hfi1_usdma_activate(pq, wait, reason); 122 xchg(&pq->state, SDMA_PKT_Q_ACTIVE); 123 wake_up(&wait->wait_dma); 124 }; 125 126 int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, 127 struct hfi1_filedata *fd) 128 { 129 int ret = -ENOMEM; 130 char buf[64]; 131 struct hfi1_devdata *dd; 132 struct hfi1_user_sdma_comp_q *cq; 133 struct hfi1_user_sdma_pkt_q *pq; 134 135 if (!uctxt || !fd) 136 return -EBADF; 137 138 if (!hfi1_sdma_comp_ring_size) 139 return -EINVAL; 140 141 dd = uctxt->dd; 142 143 pq = kzalloc(sizeof(*pq), GFP_KERNEL); 144 if (!pq) 145 return -ENOMEM; 146 pq->dd = dd; 147 pq->ctxt = uctxt->ctxt; 148 pq->subctxt = fd->subctxt; 149 pq->n_max_reqs = hfi1_sdma_comp_ring_size; 150 atomic_set(&pq->n_reqs, 0); 151 init_waitqueue_head(&pq->wait); 152 atomic_set(&pq->n_locked, 0); 153 154 iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue, 155 activate_packet_queue, NULL, NULL); 156 pq->reqidx = 0; 157 158 pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, 159 sizeof(*pq->reqs), 160 GFP_KERNEL); 161 if (!pq->reqs) 162 goto pq_reqs_nomem; 163 164 pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), 165 sizeof(*pq->req_in_use), 166 GFP_KERNEL); 167 if (!pq->req_in_use) 168 goto pq_reqs_no_in_use; 169 170 snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt, 171 fd->subctxt); 172 pq->txreq_cache = kmem_cache_create(buf, 173 sizeof(struct user_sdma_txreq), 174 L1_CACHE_BYTES, 175 SLAB_HWCACHE_ALIGN, 176 NULL); 177 if (!pq->txreq_cache) { 178 dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n", 179 uctxt->ctxt); 180 goto pq_txreq_nomem; 181 } 182 183 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 184 if (!cq) 185 goto cq_nomem; 186 187 cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps) 188 * hfi1_sdma_comp_ring_size)); 189 if (!cq->comps) 190 goto cq_comps_nomem; 191 192 cq->nentries = hfi1_sdma_comp_ring_size; 193 194 ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq, 195 &pq->handler); 196 if (ret) { 197 dd_dev_err(dd, "Failed to register with MMU %d", ret); 198 goto pq_mmu_fail; 199 } 200 201 rcu_assign_pointer(fd->pq, pq); 202 fd->cq = cq; 203 204 return 0; 205 206 pq_mmu_fail: 207 vfree(cq->comps); 208 cq_comps_nomem: 209 kfree(cq); 210 cq_nomem: 211 kmem_cache_destroy(pq->txreq_cache); 212 pq_txreq_nomem: 213 kfree(pq->req_in_use); 214 pq_reqs_no_in_use: 215 kfree(pq->reqs); 216 pq_reqs_nomem: 217 kfree(pq); 218 219 return ret; 220 } 221 222 static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq) 223 { 224 unsigned long flags; 225 seqlock_t *lock = pq->busy.lock; 226 227 if (!lock) 228 return; 229 write_seqlock_irqsave(lock, flags); 230 if (!list_empty(&pq->busy.list)) { 231 list_del_init(&pq->busy.list); 232 pq->busy.lock = NULL; 233 } 234 write_sequnlock_irqrestore(lock, flags); 235 } 236 237 int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, 238 struct hfi1_ctxtdata *uctxt) 239 { 240 struct hfi1_user_sdma_pkt_q *pq; 241 242 trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); 243 244 spin_lock(&fd->pq_rcu_lock); 245 pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, 246 lockdep_is_held(&fd->pq_rcu_lock)); 247 if (pq) { 248 rcu_assign_pointer(fd->pq, NULL); 249 spin_unlock(&fd->pq_rcu_lock); 250 synchronize_srcu(&fd->pq_srcu); 251 /* at this point there can be no more new requests */ 252 if (pq->handler) 253 hfi1_mmu_rb_unregister(pq->handler); 254 iowait_sdma_drain(&pq->busy); 255 /* Wait until all requests have been freed. */ 256 wait_event_interruptible( 257 pq->wait, 258 !atomic_read(&pq->n_reqs)); 259 kfree(pq->reqs); 260 kfree(pq->req_in_use); 261 kmem_cache_destroy(pq->txreq_cache); 262 flush_pq_iowait(pq); 263 kfree(pq); 264 } else { 265 spin_unlock(&fd->pq_rcu_lock); 266 } 267 if (fd->cq) { 268 vfree(fd->cq->comps); 269 kfree(fd->cq); 270 fd->cq = NULL; 271 } 272 return 0; 273 } 274 275 static u8 dlid_to_selector(u16 dlid) 276 { 277 static u8 mapping[256]; 278 static int initialized; 279 static u8 next; 280 int hash; 281 282 if (!initialized) { 283 memset(mapping, 0xFF, 256); 284 initialized = 1; 285 } 286 287 hash = ((dlid >> 8) ^ dlid) & 0xFF; 288 if (mapping[hash] == 0xFF) { 289 mapping[hash] = next; 290 next = (next + 1) & 0x7F; 291 } 292 293 return mapping[hash]; 294 } 295 296 /** 297 * hfi1_user_sdma_process_request() - Process and start a user sdma request 298 * @fd: valid file descriptor 299 * @iovec: array of io vectors to process 300 * @dim: overall iovec array size 301 * @count: number of io vector array entries processed 302 */ 303 int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, 304 struct iovec *iovec, unsigned long dim, 305 unsigned long *count) 306 { 307 int ret = 0, i; 308 struct hfi1_ctxtdata *uctxt = fd->uctxt; 309 struct hfi1_user_sdma_pkt_q *pq = 310 srcu_dereference(fd->pq, &fd->pq_srcu); 311 struct hfi1_user_sdma_comp_q *cq = fd->cq; 312 struct hfi1_devdata *dd = pq->dd; 313 unsigned long idx = 0; 314 u8 pcount = initial_pkt_count; 315 struct sdma_req_info info; 316 struct user_sdma_request *req; 317 u8 opcode, sc, vl; 318 u16 pkey; 319 u32 slid; 320 u16 dlid; 321 u32 selector; 322 323 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { 324 hfi1_cdbg( 325 SDMA, 326 "[%u:%u:%u] First vector not big enough for header %lu/%lu", 327 dd->unit, uctxt->ctxt, fd->subctxt, 328 iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr)); 329 return -EINVAL; 330 } 331 ret = copy_from_user(&info, iovec[idx].iov_base, sizeof(info)); 332 if (ret) { 333 hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)", 334 dd->unit, uctxt->ctxt, fd->subctxt, ret); 335 return -EFAULT; 336 } 337 338 trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, 339 (u16 *)&info); 340 if (info.comp_idx >= hfi1_sdma_comp_ring_size) { 341 hfi1_cdbg(SDMA, 342 "[%u:%u:%u:%u] Invalid comp index", 343 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 344 return -EINVAL; 345 } 346 347 /* 348 * Sanity check the header io vector count. Need at least 1 vector 349 * (header) and cannot be larger than the actual io vector count. 350 */ 351 if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) { 352 hfi1_cdbg(SDMA, 353 "[%u:%u:%u:%u] Invalid iov count %d, dim %ld", 354 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx, 355 req_iovcnt(info.ctrl), dim); 356 return -EINVAL; 357 } 358 359 if (!info.fragsize) { 360 hfi1_cdbg(SDMA, 361 "[%u:%u:%u:%u] Request does not specify fragsize", 362 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 363 return -EINVAL; 364 } 365 366 /* Try to claim the request. */ 367 if (test_and_set_bit(info.comp_idx, pq->req_in_use)) { 368 hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use", 369 dd->unit, uctxt->ctxt, fd->subctxt, 370 info.comp_idx); 371 return -EBADSLT; 372 } 373 /* 374 * All safety checks have been done and this request has been claimed. 375 */ 376 trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt, 377 info.comp_idx); 378 req = pq->reqs + info.comp_idx; 379 req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ 380 req->data_len = 0; 381 req->pq = pq; 382 req->cq = cq; 383 req->ahg_idx = -1; 384 req->iov_idx = 0; 385 req->sent = 0; 386 req->seqnum = 0; 387 req->seqcomp = 0; 388 req->seqsubmitted = 0; 389 req->tids = NULL; 390 req->has_error = 0; 391 INIT_LIST_HEAD(&req->txps); 392 393 memcpy(&req->info, &info, sizeof(info)); 394 395 /* The request is initialized, count it */ 396 atomic_inc(&pq->n_reqs); 397 398 if (req_opcode(info.ctrl) == EXPECTED) { 399 /* expected must have a TID info and at least one data vector */ 400 if (req->data_iovs < 2) { 401 SDMA_DBG(req, 402 "Not enough vectors for expected request"); 403 ret = -EINVAL; 404 goto free_req; 405 } 406 req->data_iovs--; 407 } 408 409 if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) { 410 SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs, 411 MAX_VECTORS_PER_REQ); 412 ret = -EINVAL; 413 goto free_req; 414 } 415 /* Copy the header from the user buffer */ 416 ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), 417 sizeof(req->hdr)); 418 if (ret) { 419 SDMA_DBG(req, "Failed to copy header template (%d)", ret); 420 ret = -EFAULT; 421 goto free_req; 422 } 423 424 /* If Static rate control is not enabled, sanitize the header. */ 425 if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL)) 426 req->hdr.pbc[2] = 0; 427 428 /* Validate the opcode. Do not trust packets from user space blindly. */ 429 opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff; 430 if ((opcode & USER_OPCODE_CHECK_MASK) != 431 USER_OPCODE_CHECK_VAL) { 432 SDMA_DBG(req, "Invalid opcode (%d)", opcode); 433 ret = -EINVAL; 434 goto free_req; 435 } 436 /* 437 * Validate the vl. Do not trust packets from user space blindly. 438 * VL comes from PBC, SC comes from LRH, and the VL needs to 439 * match the SC look up. 440 */ 441 vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF; 442 sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) | 443 (((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4)); 444 if (vl >= dd->pport->vls_operational || 445 vl != sc_to_vlt(dd, sc)) { 446 SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl); 447 ret = -EINVAL; 448 goto free_req; 449 } 450 451 /* Checking P_KEY for requests from user-space */ 452 pkey = (u16)be32_to_cpu(req->hdr.bth[0]); 453 slid = be16_to_cpu(req->hdr.lrh[3]); 454 if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) { 455 ret = -EINVAL; 456 goto free_req; 457 } 458 459 /* 460 * Also should check the BTH.lnh. If it says the next header is GRH then 461 * the RXE parsing will be off and will land in the middle of the KDETH 462 * or miss it entirely. 463 */ 464 if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) { 465 SDMA_DBG(req, "User tried to pass in a GRH"); 466 ret = -EINVAL; 467 goto free_req; 468 } 469 470 req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]); 471 /* 472 * Calculate the initial TID offset based on the values of 473 * KDETH.OFFSET and KDETH.OM that are passed in. 474 */ 475 req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * 476 (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? 477 KDETH_OM_LARGE : KDETH_OM_SMALL); 478 trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt, 479 info.comp_idx, req->tidoffset); 480 idx++; 481 482 /* Save all the IO vector structures */ 483 for (i = 0; i < req->data_iovs; i++) { 484 req->iovs[i].offset = 0; 485 INIT_LIST_HEAD(&req->iovs[i].list); 486 memcpy(&req->iovs[i].iov, 487 iovec + idx++, 488 sizeof(req->iovs[i].iov)); 489 ret = pin_vector_pages(req, &req->iovs[i]); 490 if (ret) { 491 req->data_iovs = i; 492 goto free_req; 493 } 494 req->data_len += req->iovs[i].iov.iov_len; 495 } 496 trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt, 497 info.comp_idx, req->data_len); 498 if (pcount > req->info.npkts) 499 pcount = req->info.npkts; 500 /* 501 * Copy any TID info 502 * User space will provide the TID info only when the 503 * request type is EXPECTED. This is true even if there is 504 * only one packet in the request and the header is already 505 * setup. The reason for the singular TID case is that the 506 * driver needs to perform safety checks. 507 */ 508 if (req_opcode(req->info.ctrl) == EXPECTED) { 509 u16 ntids = iovec[idx].iov_len / sizeof(*req->tids); 510 u32 *tmp; 511 512 if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) { 513 ret = -EINVAL; 514 goto free_req; 515 } 516 517 /* 518 * We have to copy all of the tids because they may vary 519 * in size and, therefore, the TID count might not be 520 * equal to the pkt count. However, there is no way to 521 * tell at this point. 522 */ 523 tmp = memdup_user(iovec[idx].iov_base, 524 ntids * sizeof(*req->tids)); 525 if (IS_ERR(tmp)) { 526 ret = PTR_ERR(tmp); 527 SDMA_DBG(req, "Failed to copy %d TIDs (%d)", 528 ntids, ret); 529 goto free_req; 530 } 531 req->tids = tmp; 532 req->n_tids = ntids; 533 req->tididx = 0; 534 idx++; 535 } 536 537 dlid = be16_to_cpu(req->hdr.lrh[1]); 538 selector = dlid_to_selector(dlid); 539 selector += uctxt->ctxt + fd->subctxt; 540 req->sde = sdma_select_user_engine(dd, selector, vl); 541 542 if (!req->sde || !sdma_running(req->sde)) { 543 ret = -ECOMM; 544 goto free_req; 545 } 546 547 /* We don't need an AHG entry if the request contains only one packet */ 548 if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) 549 req->ahg_idx = sdma_ahg_alloc(req->sde); 550 551 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); 552 pq->state = SDMA_PKT_Q_ACTIVE; 553 554 /* 555 * This is a somewhat blocking send implementation. 556 * The driver will block the caller until all packets of the 557 * request have been submitted to the SDMA engine. However, it 558 * will not wait for send completions. 559 */ 560 while (req->seqsubmitted != req->info.npkts) { 561 ret = user_sdma_send_pkts(req, pcount); 562 if (ret < 0) { 563 int we_ret; 564 565 if (ret != -EBUSY) 566 goto free_req; 567 we_ret = wait_event_interruptible_timeout( 568 pq->busy.wait_dma, 569 pq->state == SDMA_PKT_Q_ACTIVE, 570 msecs_to_jiffies( 571 SDMA_IOWAIT_TIMEOUT)); 572 trace_hfi1_usdma_we(pq, we_ret); 573 if (we_ret <= 0) 574 flush_pq_iowait(pq); 575 } 576 } 577 *count += idx; 578 return 0; 579 free_req: 580 /* 581 * If the submitted seqsubmitted == npkts, the completion routine 582 * controls the final state. If sequbmitted < npkts, wait for any 583 * outstanding packets to finish before cleaning up. 584 */ 585 if (req->seqsubmitted < req->info.npkts) { 586 if (req->seqsubmitted) 587 wait_event(pq->busy.wait_dma, 588 (req->seqcomp == req->seqsubmitted - 1)); 589 user_sdma_free_request(req, true); 590 pq_update(pq); 591 set_comp_state(pq, cq, info.comp_idx, ERROR, ret); 592 } 593 return ret; 594 } 595 596 static inline u32 compute_data_length(struct user_sdma_request *req, 597 struct user_sdma_txreq *tx) 598 { 599 /* 600 * Determine the proper size of the packet data. 601 * The size of the data of the first packet is in the header 602 * template. However, it includes the header and ICRC, which need 603 * to be subtracted. 604 * The minimum representable packet data length in a header is 4 bytes, 605 * therefore, when the data length request is less than 4 bytes, there's 606 * only one packet, and the packet data length is equal to that of the 607 * request data length. 608 * The size of the remaining packets is the minimum of the frag 609 * size (MTU) or remaining data in the request. 610 */ 611 u32 len; 612 613 if (!req->seqnum) { 614 if (req->data_len < sizeof(u32)) 615 len = req->data_len; 616 else 617 len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - 618 (sizeof(tx->hdr) - 4)); 619 } else if (req_opcode(req->info.ctrl) == EXPECTED) { 620 u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * 621 PAGE_SIZE; 622 /* 623 * Get the data length based on the remaining space in the 624 * TID pair. 625 */ 626 len = min(tidlen - req->tidoffset, (u32)req->info.fragsize); 627 /* If we've filled up the TID pair, move to the next one. */ 628 if (unlikely(!len) && ++req->tididx < req->n_tids && 629 req->tids[req->tididx]) { 630 tidlen = EXP_TID_GET(req->tids[req->tididx], 631 LEN) * PAGE_SIZE; 632 req->tidoffset = 0; 633 len = min_t(u32, tidlen, req->info.fragsize); 634 } 635 /* 636 * Since the TID pairs map entire pages, make sure that we 637 * are not going to try to send more data that we have 638 * remaining. 639 */ 640 len = min(len, req->data_len - req->sent); 641 } else { 642 len = min(req->data_len - req->sent, (u32)req->info.fragsize); 643 } 644 trace_hfi1_sdma_user_compute_length(req->pq->dd, 645 req->pq->ctxt, 646 req->pq->subctxt, 647 req->info.comp_idx, 648 len); 649 return len; 650 } 651 652 static inline u32 pad_len(u32 len) 653 { 654 if (len & (sizeof(u32) - 1)) 655 len += sizeof(u32) - (len & (sizeof(u32) - 1)); 656 return len; 657 } 658 659 static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) 660 { 661 /* (Size of complete header - size of PBC) + 4B ICRC + data length */ 662 return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len); 663 } 664 665 static int user_sdma_txadd_ahg(struct user_sdma_request *req, 666 struct user_sdma_txreq *tx, 667 u32 datalen) 668 { 669 int ret; 670 u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); 671 u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen)); 672 struct hfi1_user_sdma_pkt_q *pq = req->pq; 673 674 /* 675 * Copy the request header into the tx header 676 * because the HW needs a cacheline-aligned 677 * address. 678 * This copy can be optimized out if the hdr 679 * member of user_sdma_request were also 680 * cacheline aligned. 681 */ 682 memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); 683 if (PBC2LRH(pbclen) != lrhlen) { 684 pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); 685 tx->hdr.pbc[0] = cpu_to_le16(pbclen); 686 } 687 ret = check_header_template(req, &tx->hdr, lrhlen, datalen); 688 if (ret) 689 return ret; 690 ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, 691 sizeof(tx->hdr) + datalen, req->ahg_idx, 692 0, NULL, 0, user_sdma_txreq_cb); 693 if (ret) 694 return ret; 695 ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr)); 696 if (ret) 697 sdma_txclean(pq->dd, &tx->txreq); 698 return ret; 699 } 700 701 static int user_sdma_txadd(struct user_sdma_request *req, 702 struct user_sdma_txreq *tx, 703 struct user_sdma_iovec *iovec, u32 datalen, 704 u32 *queued_ptr, u32 *data_sent_ptr, 705 u64 *iov_offset_ptr) 706 { 707 int ret; 708 unsigned int pageidx, len; 709 unsigned long base, offset; 710 u64 iov_offset = *iov_offset_ptr; 711 u32 queued = *queued_ptr, data_sent = *data_sent_ptr; 712 struct hfi1_user_sdma_pkt_q *pq = req->pq; 713 714 base = (unsigned long)iovec->iov.iov_base; 715 offset = offset_in_page(base + iovec->offset + iov_offset); 716 pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >> 717 PAGE_SHIFT); 718 len = offset + req->info.fragsize > PAGE_SIZE ? 719 PAGE_SIZE - offset : req->info.fragsize; 720 len = min((datalen - queued), len); 721 ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx], 722 offset, len); 723 if (ret) { 724 SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); 725 return ret; 726 } 727 iov_offset += len; 728 queued += len; 729 data_sent += len; 730 if (unlikely(queued < datalen && pageidx == iovec->npages && 731 req->iov_idx < req->data_iovs - 1)) { 732 iovec->offset += iov_offset; 733 iovec = &req->iovs[++req->iov_idx]; 734 iov_offset = 0; 735 } 736 737 *queued_ptr = queued; 738 *data_sent_ptr = data_sent; 739 *iov_offset_ptr = iov_offset; 740 return ret; 741 } 742 743 static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) 744 { 745 int ret = 0; 746 u16 count; 747 unsigned npkts = 0; 748 struct user_sdma_txreq *tx = NULL; 749 struct hfi1_user_sdma_pkt_q *pq = NULL; 750 struct user_sdma_iovec *iovec = NULL; 751 752 if (!req->pq) 753 return -EINVAL; 754 755 pq = req->pq; 756 757 /* If tx completion has reported an error, we are done. */ 758 if (READ_ONCE(req->has_error)) 759 return -EFAULT; 760 761 /* 762 * Check if we might have sent the entire request already 763 */ 764 if (unlikely(req->seqnum == req->info.npkts)) { 765 if (!list_empty(&req->txps)) 766 goto dosend; 767 return ret; 768 } 769 770 if (!maxpkts || maxpkts > req->info.npkts - req->seqnum) 771 maxpkts = req->info.npkts - req->seqnum; 772 773 while (npkts < maxpkts) { 774 u32 datalen = 0, queued = 0, data_sent = 0; 775 u64 iov_offset = 0; 776 777 /* 778 * Check whether any of the completions have come back 779 * with errors. If so, we are not going to process any 780 * more packets from this request. 781 */ 782 if (READ_ONCE(req->has_error)) 783 return -EFAULT; 784 785 tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); 786 if (!tx) 787 return -ENOMEM; 788 789 tx->flags = 0; 790 tx->req = req; 791 INIT_LIST_HEAD(&tx->list); 792 793 /* 794 * For the last packet set the ACK request 795 * and disable header suppression. 796 */ 797 if (req->seqnum == req->info.npkts - 1) 798 tx->flags |= (TXREQ_FLAGS_REQ_ACK | 799 TXREQ_FLAGS_REQ_DISABLE_SH); 800 801 /* 802 * Calculate the payload size - this is min of the fragment 803 * (MTU) size or the remaining bytes in the request but only 804 * if we have payload data. 805 */ 806 if (req->data_len) { 807 iovec = &req->iovs[req->iov_idx]; 808 if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { 809 if (++req->iov_idx == req->data_iovs) { 810 ret = -EFAULT; 811 goto free_tx; 812 } 813 iovec = &req->iovs[req->iov_idx]; 814 WARN_ON(iovec->offset); 815 } 816 817 datalen = compute_data_length(req, tx); 818 819 /* 820 * Disable header suppression for the payload <= 8DWS. 821 * If there is an uncorrectable error in the receive 822 * data FIFO when the received payload size is less than 823 * or equal to 8DWS then the RxDmaDataFifoRdUncErr is 824 * not reported.There is set RHF.EccErr if the header 825 * is not suppressed. 826 */ 827 if (!datalen) { 828 SDMA_DBG(req, 829 "Request has data but pkt len is 0"); 830 ret = -EFAULT; 831 goto free_tx; 832 } else if (datalen <= 32) { 833 tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; 834 } 835 } 836 837 if (req->ahg_idx >= 0) { 838 if (!req->seqnum) { 839 ret = user_sdma_txadd_ahg(req, tx, datalen); 840 if (ret) 841 goto free_tx; 842 } else { 843 int changes; 844 845 changes = set_txreq_header_ahg(req, tx, 846 datalen); 847 if (changes < 0) { 848 ret = changes; 849 goto free_tx; 850 } 851 } 852 } else { 853 ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + 854 datalen, user_sdma_txreq_cb); 855 if (ret) 856 goto free_tx; 857 /* 858 * Modify the header for this packet. This only needs 859 * to be done if we are not going to use AHG. Otherwise, 860 * the HW will do it based on the changes we gave it 861 * during sdma_txinit_ahg(). 862 */ 863 ret = set_txreq_header(req, tx, datalen); 864 if (ret) 865 goto free_txreq; 866 } 867 868 /* 869 * If the request contains any data vectors, add up to 870 * fragsize bytes to the descriptor. 871 */ 872 while (queued < datalen && 873 (req->sent + data_sent) < req->data_len) { 874 ret = user_sdma_txadd(req, tx, iovec, datalen, 875 &queued, &data_sent, &iov_offset); 876 if (ret) 877 goto free_txreq; 878 } 879 /* 880 * The txreq was submitted successfully so we can update 881 * the counters. 882 */ 883 req->koffset += datalen; 884 if (req_opcode(req->info.ctrl) == EXPECTED) 885 req->tidoffset += datalen; 886 req->sent += data_sent; 887 if (req->data_len) 888 iovec->offset += iov_offset; 889 list_add_tail(&tx->txreq.list, &req->txps); 890 /* 891 * It is important to increment this here as it is used to 892 * generate the BTH.PSN and, therefore, can't be bulk-updated 893 * outside of the loop. 894 */ 895 tx->seqnum = req->seqnum++; 896 npkts++; 897 } 898 dosend: 899 ret = sdma_send_txlist(req->sde, 900 iowait_get_ib_work(&pq->busy), 901 &req->txps, &count); 902 req->seqsubmitted += count; 903 if (req->seqsubmitted == req->info.npkts) { 904 /* 905 * The txreq has already been submitted to the HW queue 906 * so we can free the AHG entry now. Corruption will not 907 * happen due to the sequential manner in which 908 * descriptors are processed. 909 */ 910 if (req->ahg_idx >= 0) 911 sdma_ahg_free(req->sde, req->ahg_idx); 912 } 913 return ret; 914 915 free_txreq: 916 sdma_txclean(pq->dd, &tx->txreq); 917 free_tx: 918 kmem_cache_free(pq->txreq_cache, tx); 919 return ret; 920 } 921 922 static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) 923 { 924 struct evict_data evict_data; 925 926 evict_data.cleared = 0; 927 evict_data.target = npages; 928 hfi1_mmu_rb_evict(pq->handler, &evict_data); 929 return evict_data.cleared; 930 } 931 932 static int pin_sdma_pages(struct user_sdma_request *req, 933 struct user_sdma_iovec *iovec, 934 struct sdma_mmu_node *node, 935 int npages) 936 { 937 int pinned, cleared; 938 struct page **pages; 939 struct hfi1_user_sdma_pkt_q *pq = req->pq; 940 941 pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); 942 if (!pages) 943 return -ENOMEM; 944 memcpy(pages, node->pages, node->npages * sizeof(*pages)); 945 946 npages -= node->npages; 947 retry: 948 if (!hfi1_can_pin_pages(pq->dd, current->mm, 949 atomic_read(&pq->n_locked), npages)) { 950 cleared = sdma_cache_evict(pq, npages); 951 if (cleared >= npages) 952 goto retry; 953 } 954 pinned = hfi1_acquire_user_pages(current->mm, 955 ((unsigned long)iovec->iov.iov_base + 956 (node->npages * PAGE_SIZE)), npages, 0, 957 pages + node->npages); 958 if (pinned < 0) { 959 kfree(pages); 960 return pinned; 961 } 962 if (pinned != npages) { 963 unpin_vector_pages(current->mm, pages, node->npages, pinned); 964 return -EFAULT; 965 } 966 kfree(node->pages); 967 node->rb.len = iovec->iov.iov_len; 968 node->pages = pages; 969 atomic_add(pinned, &pq->n_locked); 970 return pinned; 971 } 972 973 static void unpin_sdma_pages(struct sdma_mmu_node *node) 974 { 975 if (node->npages) { 976 unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, 977 node->npages); 978 atomic_sub(node->npages, &node->pq->n_locked); 979 } 980 } 981 982 static int pin_vector_pages(struct user_sdma_request *req, 983 struct user_sdma_iovec *iovec) 984 { 985 int ret = 0, pinned, npages; 986 struct hfi1_user_sdma_pkt_q *pq = req->pq; 987 struct sdma_mmu_node *node = NULL; 988 struct mmu_rb_node *rb_node; 989 struct iovec *iov; 990 bool extracted; 991 992 extracted = 993 hfi1_mmu_rb_remove_unless_exact(pq->handler, 994 (unsigned long) 995 iovec->iov.iov_base, 996 iovec->iov.iov_len, &rb_node); 997 if (rb_node) { 998 node = container_of(rb_node, struct sdma_mmu_node, rb); 999 if (!extracted) { 1000 atomic_inc(&node->refcount); 1001 iovec->pages = node->pages; 1002 iovec->npages = node->npages; 1003 iovec->node = node; 1004 return 0; 1005 } 1006 } 1007 1008 if (!node) { 1009 node = kzalloc(sizeof(*node), GFP_KERNEL); 1010 if (!node) 1011 return -ENOMEM; 1012 1013 node->rb.addr = (unsigned long)iovec->iov.iov_base; 1014 node->pq = pq; 1015 atomic_set(&node->refcount, 0); 1016 } 1017 1018 iov = &iovec->iov; 1019 npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len); 1020 if (node->npages < npages) { 1021 pinned = pin_sdma_pages(req, iovec, node, npages); 1022 if (pinned < 0) { 1023 ret = pinned; 1024 goto bail; 1025 } 1026 node->npages += pinned; 1027 npages = node->npages; 1028 } 1029 iovec->pages = node->pages; 1030 iovec->npages = npages; 1031 iovec->node = node; 1032 1033 ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); 1034 if (ret) { 1035 iovec->node = NULL; 1036 goto bail; 1037 } 1038 return 0; 1039 bail: 1040 unpin_sdma_pages(node); 1041 kfree(node); 1042 return ret; 1043 } 1044 1045 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 1046 unsigned start, unsigned npages) 1047 { 1048 hfi1_release_user_pages(mm, pages + start, npages, false); 1049 kfree(pages); 1050 } 1051 1052 static int check_header_template(struct user_sdma_request *req, 1053 struct hfi1_pkt_header *hdr, u32 lrhlen, 1054 u32 datalen) 1055 { 1056 /* 1057 * Perform safety checks for any type of packet: 1058 * - transfer size is multiple of 64bytes 1059 * - packet length is multiple of 4 bytes 1060 * - packet length is not larger than MTU size 1061 * 1062 * These checks are only done for the first packet of the 1063 * transfer since the header is "given" to us by user space. 1064 * For the remainder of the packets we compute the values. 1065 */ 1066 if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 || 1067 lrhlen > get_lrh_len(*hdr, req->info.fragsize)) 1068 return -EINVAL; 1069 1070 if (req_opcode(req->info.ctrl) == EXPECTED) { 1071 /* 1072 * The header is checked only on the first packet. Furthermore, 1073 * we ensure that at least one TID entry is copied when the 1074 * request is submitted. Therefore, we don't have to verify that 1075 * tididx points to something sane. 1076 */ 1077 u32 tidval = req->tids[req->tididx], 1078 tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE, 1079 tididx = EXP_TID_GET(tidval, IDX), 1080 tidctrl = EXP_TID_GET(tidval, CTRL), 1081 tidoff; 1082 __le32 kval = hdr->kdeth.ver_tid_offset; 1083 1084 tidoff = KDETH_GET(kval, OFFSET) * 1085 (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? 1086 KDETH_OM_LARGE : KDETH_OM_SMALL); 1087 /* 1088 * Expected receive packets have the following 1089 * additional checks: 1090 * - offset is not larger than the TID size 1091 * - TIDCtrl values match between header and TID array 1092 * - TID indexes match between header and TID array 1093 */ 1094 if ((tidoff + datalen > tidlen) || 1095 KDETH_GET(kval, TIDCTRL) != tidctrl || 1096 KDETH_GET(kval, TID) != tididx) 1097 return -EINVAL; 1098 } 1099 return 0; 1100 } 1101 1102 /* 1103 * Correctly set the BTH.PSN field based on type of 1104 * transfer - eager packets can just increment the PSN but 1105 * expected packets encode generation and sequence in the 1106 * BTH.PSN field so just incrementing will result in errors. 1107 */ 1108 static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags) 1109 { 1110 u32 val = be32_to_cpu(bthpsn), 1111 mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull : 1112 0xffffffull), 1113 psn = val & mask; 1114 if (expct) 1115 psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) | 1116 ((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK); 1117 else 1118 psn = psn + frags; 1119 return psn & mask; 1120 } 1121 1122 static int set_txreq_header(struct user_sdma_request *req, 1123 struct user_sdma_txreq *tx, u32 datalen) 1124 { 1125 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1126 struct hfi1_pkt_header *hdr = &tx->hdr; 1127 u8 omfactor; /* KDETH.OM */ 1128 u16 pbclen; 1129 int ret; 1130 u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); 1131 1132 /* Copy the header template to the request before modification */ 1133 memcpy(hdr, &req->hdr, sizeof(*hdr)); 1134 1135 /* 1136 * Check if the PBC and LRH length are mismatched. If so 1137 * adjust both in the header. 1138 */ 1139 pbclen = le16_to_cpu(hdr->pbc[0]); 1140 if (PBC2LRH(pbclen) != lrhlen) { 1141 pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); 1142 hdr->pbc[0] = cpu_to_le16(pbclen); 1143 hdr->lrh[2] = cpu_to_be16(lrhlen >> 2); 1144 /* 1145 * Third packet 1146 * This is the first packet in the sequence that has 1147 * a "static" size that can be used for the rest of 1148 * the packets (besides the last one). 1149 */ 1150 if (unlikely(req->seqnum == 2)) { 1151 /* 1152 * From this point on the lengths in both the 1153 * PBC and LRH are the same until the last 1154 * packet. 1155 * Adjust the template so we don't have to update 1156 * every packet 1157 */ 1158 req->hdr.pbc[0] = hdr->pbc[0]; 1159 req->hdr.lrh[2] = hdr->lrh[2]; 1160 } 1161 } 1162 /* 1163 * We only have to modify the header if this is not the 1164 * first packet in the request. Otherwise, we use the 1165 * header given to us. 1166 */ 1167 if (unlikely(!req->seqnum)) { 1168 ret = check_header_template(req, hdr, lrhlen, datalen); 1169 if (ret) 1170 return ret; 1171 goto done; 1172 } 1173 1174 hdr->bth[2] = cpu_to_be32( 1175 set_pkt_bth_psn(hdr->bth[2], 1176 (req_opcode(req->info.ctrl) == EXPECTED), 1177 req->seqnum)); 1178 1179 /* Set ACK request on last packet */ 1180 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) 1181 hdr->bth[2] |= cpu_to_be32(1UL << 31); 1182 1183 /* Set the new offset */ 1184 hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset); 1185 /* Expected packets have to fill in the new TID information */ 1186 if (req_opcode(req->info.ctrl) == EXPECTED) { 1187 tidval = req->tids[req->tididx]; 1188 /* 1189 * If the offset puts us at the end of the current TID, 1190 * advance everything. 1191 */ 1192 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * 1193 PAGE_SIZE)) { 1194 req->tidoffset = 0; 1195 /* 1196 * Since we don't copy all the TIDs, all at once, 1197 * we have to check again. 1198 */ 1199 if (++req->tididx > req->n_tids - 1 || 1200 !req->tids[req->tididx]) { 1201 return -EINVAL; 1202 } 1203 tidval = req->tids[req->tididx]; 1204 } 1205 omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= 1206 KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : 1207 KDETH_OM_SMALL_SHIFT; 1208 /* Set KDETH.TIDCtrl based on value for this TID. */ 1209 KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, 1210 EXP_TID_GET(tidval, CTRL)); 1211 /* Set KDETH.TID based on value for this TID */ 1212 KDETH_SET(hdr->kdeth.ver_tid_offset, TID, 1213 EXP_TID_GET(tidval, IDX)); 1214 /* Clear KDETH.SH when DISABLE_SH flag is set */ 1215 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) 1216 KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); 1217 /* 1218 * Set the KDETH.OFFSET and KDETH.OM based on size of 1219 * transfer. 1220 */ 1221 trace_hfi1_sdma_user_tid_info( 1222 pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, 1223 req->tidoffset, req->tidoffset >> omfactor, 1224 omfactor != KDETH_OM_SMALL_SHIFT); 1225 KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, 1226 req->tidoffset >> omfactor); 1227 KDETH_SET(hdr->kdeth.ver_tid_offset, OM, 1228 omfactor != KDETH_OM_SMALL_SHIFT); 1229 } 1230 done: 1231 trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, 1232 req->info.comp_idx, hdr, tidval); 1233 return sdma_txadd_kvaddr(pq->dd, &tx->txreq, hdr, sizeof(*hdr)); 1234 } 1235 1236 static int set_txreq_header_ahg(struct user_sdma_request *req, 1237 struct user_sdma_txreq *tx, u32 datalen) 1238 { 1239 u32 ahg[AHG_KDETH_ARRAY_SIZE]; 1240 int idx = 0; 1241 u8 omfactor; /* KDETH.OM */ 1242 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1243 struct hfi1_pkt_header *hdr = &req->hdr; 1244 u16 pbclen = le16_to_cpu(hdr->pbc[0]); 1245 u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); 1246 size_t array_size = ARRAY_SIZE(ahg); 1247 1248 if (PBC2LRH(pbclen) != lrhlen) { 1249 /* PBC.PbcLengthDWs */ 1250 idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12, 1251 (__force u16)cpu_to_le16(LRH2PBC(lrhlen))); 1252 if (idx < 0) 1253 return idx; 1254 /* LRH.PktLen (we need the full 16 bits due to byte swap) */ 1255 idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16, 1256 (__force u16)cpu_to_be16(lrhlen >> 2)); 1257 if (idx < 0) 1258 return idx; 1259 } 1260 1261 /* 1262 * Do the common updates 1263 */ 1264 /* BTH.PSN and BTH.A */ 1265 val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & 1266 (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); 1267 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) 1268 val32 |= 1UL << 31; 1269 idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16, 1270 (__force u16)cpu_to_be16(val32 >> 16)); 1271 if (idx < 0) 1272 return idx; 1273 idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16, 1274 (__force u16)cpu_to_be16(val32 & 0xffff)); 1275 if (idx < 0) 1276 return idx; 1277 /* KDETH.Offset */ 1278 idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16, 1279 (__force u16)cpu_to_le16(req->koffset & 0xffff)); 1280 if (idx < 0) 1281 return idx; 1282 idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16, 1283 (__force u16)cpu_to_le16(req->koffset >> 16)); 1284 if (idx < 0) 1285 return idx; 1286 if (req_opcode(req->info.ctrl) == EXPECTED) { 1287 __le16 val; 1288 1289 tidval = req->tids[req->tididx]; 1290 1291 /* 1292 * If the offset puts us at the end of the current TID, 1293 * advance everything. 1294 */ 1295 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * 1296 PAGE_SIZE)) { 1297 req->tidoffset = 0; 1298 /* 1299 * Since we don't copy all the TIDs, all at once, 1300 * we have to check again. 1301 */ 1302 if (++req->tididx > req->n_tids - 1 || 1303 !req->tids[req->tididx]) 1304 return -EINVAL; 1305 tidval = req->tids[req->tididx]; 1306 } 1307 omfactor = ((EXP_TID_GET(tidval, LEN) * 1308 PAGE_SIZE) >= 1309 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : 1310 KDETH_OM_SMALL_SHIFT; 1311 /* KDETH.OM and KDETH.OFFSET (TID) */ 1312 idx = ahg_header_set( 1313 ahg, idx, array_size, 7, 0, 16, 1314 ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | 1315 ((req->tidoffset >> omfactor) 1316 & 0x7fff))); 1317 if (idx < 0) 1318 return idx; 1319 /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ 1320 val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | 1321 (EXP_TID_GET(tidval, IDX) & 0x3ff)); 1322 1323 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { 1324 val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, 1325 INTR) << 1326 AHG_KDETH_INTR_SHIFT)); 1327 } else { 1328 val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? 1329 cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : 1330 cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, 1331 INTR) << 1332 AHG_KDETH_INTR_SHIFT)); 1333 } 1334 1335 idx = ahg_header_set(ahg, idx, array_size, 1336 7, 16, 14, (__force u16)val); 1337 if (idx < 0) 1338 return idx; 1339 } 1340 1341 trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, 1342 req->info.comp_idx, req->sde->this_idx, 1343 req->ahg_idx, ahg, idx, tidval); 1344 sdma_txinit_ahg(&tx->txreq, 1345 SDMA_TXREQ_F_USE_AHG, 1346 datalen, req->ahg_idx, idx, 1347 ahg, sizeof(req->hdr), 1348 user_sdma_txreq_cb); 1349 1350 return idx; 1351 } 1352 1353 /** 1354 * user_sdma_txreq_cb() - SDMA tx request completion callback. 1355 * @txreq: valid sdma tx request 1356 * @status: success/failure of request 1357 * 1358 * Called when the SDMA progress state machine gets notification that 1359 * the SDMA descriptors for this tx request have been processed by the 1360 * DMA engine. Called in interrupt context. 1361 * Only do work on completed sequences. 1362 */ 1363 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) 1364 { 1365 struct user_sdma_txreq *tx = 1366 container_of(txreq, struct user_sdma_txreq, txreq); 1367 struct user_sdma_request *req; 1368 struct hfi1_user_sdma_pkt_q *pq; 1369 struct hfi1_user_sdma_comp_q *cq; 1370 enum hfi1_sdma_comp_state state = COMPLETE; 1371 1372 if (!tx->req) 1373 return; 1374 1375 req = tx->req; 1376 pq = req->pq; 1377 cq = req->cq; 1378 1379 if (status != SDMA_TXREQ_S_OK) { 1380 SDMA_DBG(req, "SDMA completion with error %d", 1381 status); 1382 WRITE_ONCE(req->has_error, 1); 1383 state = ERROR; 1384 } 1385 1386 req->seqcomp = tx->seqnum; 1387 kmem_cache_free(pq->txreq_cache, tx); 1388 1389 /* sequence isn't complete? We are done */ 1390 if (req->seqcomp != req->info.npkts - 1) 1391 return; 1392 1393 user_sdma_free_request(req, false); 1394 set_comp_state(pq, cq, req->info.comp_idx, state, status); 1395 pq_update(pq); 1396 } 1397 1398 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) 1399 { 1400 if (atomic_dec_and_test(&pq->n_reqs)) 1401 wake_up(&pq->wait); 1402 } 1403 1404 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) 1405 { 1406 int i; 1407 1408 if (!list_empty(&req->txps)) { 1409 struct sdma_txreq *t, *p; 1410 1411 list_for_each_entry_safe(t, p, &req->txps, list) { 1412 struct user_sdma_txreq *tx = 1413 container_of(t, struct user_sdma_txreq, txreq); 1414 list_del_init(&t->list); 1415 sdma_txclean(req->pq->dd, t); 1416 kmem_cache_free(req->pq->txreq_cache, tx); 1417 } 1418 } 1419 1420 for (i = 0; i < req->data_iovs; i++) { 1421 struct sdma_mmu_node *node = req->iovs[i].node; 1422 1423 if (!node) 1424 continue; 1425 1426 req->iovs[i].node = NULL; 1427 1428 if (unpin) 1429 hfi1_mmu_rb_remove(req->pq->handler, 1430 &node->rb); 1431 else 1432 atomic_dec(&node->refcount); 1433 } 1434 1435 kfree(req->tids); 1436 clear_bit(req->info.comp_idx, req->pq->req_in_use); 1437 } 1438 1439 static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, 1440 struct hfi1_user_sdma_comp_q *cq, 1441 u16 idx, enum hfi1_sdma_comp_state state, 1442 int ret) 1443 { 1444 if (state == ERROR) 1445 cq->comps[idx].errcode = -ret; 1446 smp_wmb(); /* make sure errcode is visible first */ 1447 cq->comps[idx].status = state; 1448 trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, 1449 idx, state, ret); 1450 } 1451 1452 static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, 1453 unsigned long len) 1454 { 1455 return (bool)(node->addr == addr); 1456 } 1457 1458 static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) 1459 { 1460 struct sdma_mmu_node *node = 1461 container_of(mnode, struct sdma_mmu_node, rb); 1462 1463 atomic_inc(&node->refcount); 1464 return 0; 1465 } 1466 1467 /* 1468 * Return 1 to remove the node from the rb tree and call the remove op. 1469 * 1470 * Called with the rb tree lock held. 1471 */ 1472 static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 1473 void *evict_arg, bool *stop) 1474 { 1475 struct sdma_mmu_node *node = 1476 container_of(mnode, struct sdma_mmu_node, rb); 1477 struct evict_data *evict_data = evict_arg; 1478 1479 /* is this node still being used? */ 1480 if (atomic_read(&node->refcount)) 1481 return 0; /* keep this node */ 1482 1483 /* this node will be evicted, add its pages to our count */ 1484 evict_data->cleared += node->npages; 1485 1486 /* have enough pages been cleared? */ 1487 if (evict_data->cleared >= evict_data->target) 1488 *stop = true; 1489 1490 return 1; /* remove this node */ 1491 } 1492 1493 static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) 1494 { 1495 struct sdma_mmu_node *node = 1496 container_of(mnode, struct sdma_mmu_node, rb); 1497 1498 unpin_sdma_pages(node); 1499 kfree(node); 1500 } 1501 1502 static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) 1503 { 1504 struct sdma_mmu_node *node = 1505 container_of(mnode, struct sdma_mmu_node, rb); 1506 1507 if (!atomic_read(&node->refcount)) 1508 return 1; 1509 return 0; 1510 } 1511