1 /* 2 * Copyright(c) 2020 - Cornelis Networks, Inc. 3 * Copyright(c) 2015 - 2018 Intel Corporation. 4 * 5 * This file is provided under a dual BSD/GPLv2 license. When using or 6 * redistributing this file, you may do so under either license. 7 * 8 * GPL LICENSE SUMMARY 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * BSD LICENSE 20 * 21 * Redistribution and use in source and binary forms, with or without 22 * modification, are permitted provided that the following conditions 23 * are met: 24 * 25 * - Redistributions of source code must retain the above copyright 26 * notice, this list of conditions and the following disclaimer. 27 * - Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in 29 * the documentation and/or other materials provided with the 30 * distribution. 31 * - Neither the name of Intel Corporation nor the names of its 32 * contributors may be used to endorse or promote products derived 33 * from this software without specific prior written permission. 34 * 35 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 38 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 39 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 45 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 * 47 */ 48 #include <linux/mm.h> 49 #include <linux/types.h> 50 #include <linux/device.h> 51 #include <linux/dmapool.h> 52 #include <linux/slab.h> 53 #include <linux/list.h> 54 #include <linux/highmem.h> 55 #include <linux/io.h> 56 #include <linux/uio.h> 57 #include <linux/rbtree.h> 58 #include <linux/spinlock.h> 59 #include <linux/delay.h> 60 #include <linux/kthread.h> 61 #include <linux/mmu_context.h> 62 #include <linux/module.h> 63 #include <linux/vmalloc.h> 64 #include <linux/string.h> 65 66 #include "hfi.h" 67 #include "sdma.h" 68 #include "mmu_rb.h" 69 #include "user_sdma.h" 70 #include "verbs.h" /* for the headers */ 71 #include "common.h" /* for struct hfi1_tid_info */ 72 #include "trace.h" 73 74 static uint hfi1_sdma_comp_ring_size = 128; 75 module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); 76 MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128"); 77 78 static unsigned initial_pkt_count = 8; 79 80 static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts); 81 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); 82 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); 83 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); 84 static int pin_vector_pages(struct user_sdma_request *req, 85 struct user_sdma_iovec *iovec); 86 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 87 unsigned start, unsigned npages); 88 static int check_header_template(struct user_sdma_request *req, 89 struct hfi1_pkt_header *hdr, u32 lrhlen, 90 u32 datalen); 91 static int set_txreq_header(struct user_sdma_request *req, 92 struct user_sdma_txreq *tx, u32 datalen); 93 static int set_txreq_header_ahg(struct user_sdma_request *req, 94 struct user_sdma_txreq *tx, u32 len); 95 static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, 96 struct hfi1_user_sdma_comp_q *cq, 97 u16 idx, enum hfi1_sdma_comp_state state, 98 int ret); 99 static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags); 100 static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); 101 102 static int defer_packet_queue( 103 struct sdma_engine *sde, 104 struct iowait_work *wait, 105 struct sdma_txreq *txreq, 106 uint seq, 107 bool pkts_sent); 108 static void activate_packet_queue(struct iowait *wait, int reason); 109 static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, 110 unsigned long len); 111 static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); 112 static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 113 void *arg2, bool *stop); 114 static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); 115 static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); 116 117 static struct mmu_rb_ops sdma_rb_ops = { 118 .filter = sdma_rb_filter, 119 .insert = sdma_rb_insert, 120 .evict = sdma_rb_evict, 121 .remove = sdma_rb_remove, 122 .invalidate = sdma_rb_invalidate 123 }; 124 125 static int defer_packet_queue( 126 struct sdma_engine *sde, 127 struct iowait_work *wait, 128 struct sdma_txreq *txreq, 129 uint seq, 130 bool pkts_sent) 131 { 132 struct hfi1_user_sdma_pkt_q *pq = 133 container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); 134 135 write_seqlock(&sde->waitlock); 136 trace_hfi1_usdma_defer(pq, sde, &pq->busy); 137 if (sdma_progress(sde, seq, txreq)) 138 goto eagain; 139 /* 140 * We are assuming that if the list is enqueued somewhere, it 141 * is to the dmawait list since that is the only place where 142 * it is supposed to be enqueued. 143 */ 144 xchg(&pq->state, SDMA_PKT_Q_DEFERRED); 145 if (list_empty(&pq->busy.list)) { 146 pq->busy.lock = &sde->waitlock; 147 iowait_get_priority(&pq->busy); 148 iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); 149 } 150 write_sequnlock(&sde->waitlock); 151 return -EBUSY; 152 eagain: 153 write_sequnlock(&sde->waitlock); 154 return -EAGAIN; 155 } 156 157 static void activate_packet_queue(struct iowait *wait, int reason) 158 { 159 struct hfi1_user_sdma_pkt_q *pq = 160 container_of(wait, struct hfi1_user_sdma_pkt_q, busy); 161 162 trace_hfi1_usdma_activate(pq, wait, reason); 163 xchg(&pq->state, SDMA_PKT_Q_ACTIVE); 164 wake_up(&wait->wait_dma); 165 }; 166 167 int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, 168 struct hfi1_filedata *fd) 169 { 170 int ret = -ENOMEM; 171 char buf[64]; 172 struct hfi1_devdata *dd; 173 struct hfi1_user_sdma_comp_q *cq; 174 struct hfi1_user_sdma_pkt_q *pq; 175 176 if (!uctxt || !fd) 177 return -EBADF; 178 179 if (!hfi1_sdma_comp_ring_size) 180 return -EINVAL; 181 182 dd = uctxt->dd; 183 184 pq = kzalloc(sizeof(*pq), GFP_KERNEL); 185 if (!pq) 186 return -ENOMEM; 187 pq->dd = dd; 188 pq->ctxt = uctxt->ctxt; 189 pq->subctxt = fd->subctxt; 190 pq->n_max_reqs = hfi1_sdma_comp_ring_size; 191 atomic_set(&pq->n_reqs, 0); 192 init_waitqueue_head(&pq->wait); 193 atomic_set(&pq->n_locked, 0); 194 195 iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue, 196 activate_packet_queue, NULL, NULL); 197 pq->reqidx = 0; 198 199 pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, 200 sizeof(*pq->reqs), 201 GFP_KERNEL); 202 if (!pq->reqs) 203 goto pq_reqs_nomem; 204 205 pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), 206 sizeof(*pq->req_in_use), 207 GFP_KERNEL); 208 if (!pq->req_in_use) 209 goto pq_reqs_no_in_use; 210 211 snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt, 212 fd->subctxt); 213 pq->txreq_cache = kmem_cache_create(buf, 214 sizeof(struct user_sdma_txreq), 215 L1_CACHE_BYTES, 216 SLAB_HWCACHE_ALIGN, 217 NULL); 218 if (!pq->txreq_cache) { 219 dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n", 220 uctxt->ctxt); 221 goto pq_txreq_nomem; 222 } 223 224 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 225 if (!cq) 226 goto cq_nomem; 227 228 cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps) 229 * hfi1_sdma_comp_ring_size)); 230 if (!cq->comps) 231 goto cq_comps_nomem; 232 233 cq->nentries = hfi1_sdma_comp_ring_size; 234 235 ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq, 236 &pq->handler); 237 if (ret) { 238 dd_dev_err(dd, "Failed to register with MMU %d", ret); 239 goto pq_mmu_fail; 240 } 241 242 rcu_assign_pointer(fd->pq, pq); 243 fd->cq = cq; 244 245 return 0; 246 247 pq_mmu_fail: 248 vfree(cq->comps); 249 cq_comps_nomem: 250 kfree(cq); 251 cq_nomem: 252 kmem_cache_destroy(pq->txreq_cache); 253 pq_txreq_nomem: 254 kfree(pq->req_in_use); 255 pq_reqs_no_in_use: 256 kfree(pq->reqs); 257 pq_reqs_nomem: 258 kfree(pq); 259 260 return ret; 261 } 262 263 static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq) 264 { 265 unsigned long flags; 266 seqlock_t *lock = pq->busy.lock; 267 268 if (!lock) 269 return; 270 write_seqlock_irqsave(lock, flags); 271 if (!list_empty(&pq->busy.list)) { 272 list_del_init(&pq->busy.list); 273 pq->busy.lock = NULL; 274 } 275 write_sequnlock_irqrestore(lock, flags); 276 } 277 278 int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, 279 struct hfi1_ctxtdata *uctxt) 280 { 281 struct hfi1_user_sdma_pkt_q *pq; 282 283 trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); 284 285 spin_lock(&fd->pq_rcu_lock); 286 pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, 287 lockdep_is_held(&fd->pq_rcu_lock)); 288 if (pq) { 289 rcu_assign_pointer(fd->pq, NULL); 290 spin_unlock(&fd->pq_rcu_lock); 291 synchronize_srcu(&fd->pq_srcu); 292 /* at this point there can be no more new requests */ 293 if (pq->handler) 294 hfi1_mmu_rb_unregister(pq->handler); 295 iowait_sdma_drain(&pq->busy); 296 /* Wait until all requests have been freed. */ 297 wait_event_interruptible( 298 pq->wait, 299 !atomic_read(&pq->n_reqs)); 300 kfree(pq->reqs); 301 kfree(pq->req_in_use); 302 kmem_cache_destroy(pq->txreq_cache); 303 flush_pq_iowait(pq); 304 kfree(pq); 305 } else { 306 spin_unlock(&fd->pq_rcu_lock); 307 } 308 if (fd->cq) { 309 vfree(fd->cq->comps); 310 kfree(fd->cq); 311 fd->cq = NULL; 312 } 313 return 0; 314 } 315 316 static u8 dlid_to_selector(u16 dlid) 317 { 318 static u8 mapping[256]; 319 static int initialized; 320 static u8 next; 321 int hash; 322 323 if (!initialized) { 324 memset(mapping, 0xFF, 256); 325 initialized = 1; 326 } 327 328 hash = ((dlid >> 8) ^ dlid) & 0xFF; 329 if (mapping[hash] == 0xFF) { 330 mapping[hash] = next; 331 next = (next + 1) & 0x7F; 332 } 333 334 return mapping[hash]; 335 } 336 337 /** 338 * hfi1_user_sdma_process_request() - Process and start a user sdma request 339 * @fd: valid file descriptor 340 * @iovec: array of io vectors to process 341 * @dim: overall iovec array size 342 * @count: number of io vector array entries processed 343 */ 344 int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, 345 struct iovec *iovec, unsigned long dim, 346 unsigned long *count) 347 { 348 int ret = 0, i; 349 struct hfi1_ctxtdata *uctxt = fd->uctxt; 350 struct hfi1_user_sdma_pkt_q *pq = 351 srcu_dereference(fd->pq, &fd->pq_srcu); 352 struct hfi1_user_sdma_comp_q *cq = fd->cq; 353 struct hfi1_devdata *dd = pq->dd; 354 unsigned long idx = 0; 355 u8 pcount = initial_pkt_count; 356 struct sdma_req_info info; 357 struct user_sdma_request *req; 358 u8 opcode, sc, vl; 359 u16 pkey; 360 u32 slid; 361 u16 dlid; 362 u32 selector; 363 364 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { 365 hfi1_cdbg( 366 SDMA, 367 "[%u:%u:%u] First vector not big enough for header %lu/%lu", 368 dd->unit, uctxt->ctxt, fd->subctxt, 369 iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr)); 370 return -EINVAL; 371 } 372 ret = copy_from_user(&info, iovec[idx].iov_base, sizeof(info)); 373 if (ret) { 374 hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)", 375 dd->unit, uctxt->ctxt, fd->subctxt, ret); 376 return -EFAULT; 377 } 378 379 trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, 380 (u16 *)&info); 381 if (info.comp_idx >= hfi1_sdma_comp_ring_size) { 382 hfi1_cdbg(SDMA, 383 "[%u:%u:%u:%u] Invalid comp index", 384 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 385 return -EINVAL; 386 } 387 388 /* 389 * Sanity check the header io vector count. Need at least 1 vector 390 * (header) and cannot be larger than the actual io vector count. 391 */ 392 if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) { 393 hfi1_cdbg(SDMA, 394 "[%u:%u:%u:%u] Invalid iov count %d, dim %ld", 395 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx, 396 req_iovcnt(info.ctrl), dim); 397 return -EINVAL; 398 } 399 400 if (!info.fragsize) { 401 hfi1_cdbg(SDMA, 402 "[%u:%u:%u:%u] Request does not specify fragsize", 403 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 404 return -EINVAL; 405 } 406 407 /* Try to claim the request. */ 408 if (test_and_set_bit(info.comp_idx, pq->req_in_use)) { 409 hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use", 410 dd->unit, uctxt->ctxt, fd->subctxt, 411 info.comp_idx); 412 return -EBADSLT; 413 } 414 /* 415 * All safety checks have been done and this request has been claimed. 416 */ 417 trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt, 418 info.comp_idx); 419 req = pq->reqs + info.comp_idx; 420 req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ 421 req->data_len = 0; 422 req->pq = pq; 423 req->cq = cq; 424 req->ahg_idx = -1; 425 req->iov_idx = 0; 426 req->sent = 0; 427 req->seqnum = 0; 428 req->seqcomp = 0; 429 req->seqsubmitted = 0; 430 req->tids = NULL; 431 req->has_error = 0; 432 INIT_LIST_HEAD(&req->txps); 433 434 memcpy(&req->info, &info, sizeof(info)); 435 436 /* The request is initialized, count it */ 437 atomic_inc(&pq->n_reqs); 438 439 if (req_opcode(info.ctrl) == EXPECTED) { 440 /* expected must have a TID info and at least one data vector */ 441 if (req->data_iovs < 2) { 442 SDMA_DBG(req, 443 "Not enough vectors for expected request"); 444 ret = -EINVAL; 445 goto free_req; 446 } 447 req->data_iovs--; 448 } 449 450 if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) { 451 SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs, 452 MAX_VECTORS_PER_REQ); 453 ret = -EINVAL; 454 goto free_req; 455 } 456 /* Copy the header from the user buffer */ 457 ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), 458 sizeof(req->hdr)); 459 if (ret) { 460 SDMA_DBG(req, "Failed to copy header template (%d)", ret); 461 ret = -EFAULT; 462 goto free_req; 463 } 464 465 /* If Static rate control is not enabled, sanitize the header. */ 466 if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL)) 467 req->hdr.pbc[2] = 0; 468 469 /* Validate the opcode. Do not trust packets from user space blindly. */ 470 opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff; 471 if ((opcode & USER_OPCODE_CHECK_MASK) != 472 USER_OPCODE_CHECK_VAL) { 473 SDMA_DBG(req, "Invalid opcode (%d)", opcode); 474 ret = -EINVAL; 475 goto free_req; 476 } 477 /* 478 * Validate the vl. Do not trust packets from user space blindly. 479 * VL comes from PBC, SC comes from LRH, and the VL needs to 480 * match the SC look up. 481 */ 482 vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF; 483 sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) | 484 (((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4)); 485 if (vl >= dd->pport->vls_operational || 486 vl != sc_to_vlt(dd, sc)) { 487 SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl); 488 ret = -EINVAL; 489 goto free_req; 490 } 491 492 /* Checking P_KEY for requests from user-space */ 493 pkey = (u16)be32_to_cpu(req->hdr.bth[0]); 494 slid = be16_to_cpu(req->hdr.lrh[3]); 495 if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) { 496 ret = -EINVAL; 497 goto free_req; 498 } 499 500 /* 501 * Also should check the BTH.lnh. If it says the next header is GRH then 502 * the RXE parsing will be off and will land in the middle of the KDETH 503 * or miss it entirely. 504 */ 505 if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) { 506 SDMA_DBG(req, "User tried to pass in a GRH"); 507 ret = -EINVAL; 508 goto free_req; 509 } 510 511 req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]); 512 /* 513 * Calculate the initial TID offset based on the values of 514 * KDETH.OFFSET and KDETH.OM that are passed in. 515 */ 516 req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * 517 (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? 518 KDETH_OM_LARGE : KDETH_OM_SMALL); 519 trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt, 520 info.comp_idx, req->tidoffset); 521 idx++; 522 523 /* Save all the IO vector structures */ 524 for (i = 0; i < req->data_iovs; i++) { 525 req->iovs[i].offset = 0; 526 INIT_LIST_HEAD(&req->iovs[i].list); 527 memcpy(&req->iovs[i].iov, 528 iovec + idx++, 529 sizeof(req->iovs[i].iov)); 530 ret = pin_vector_pages(req, &req->iovs[i]); 531 if (ret) { 532 req->data_iovs = i; 533 goto free_req; 534 } 535 req->data_len += req->iovs[i].iov.iov_len; 536 } 537 trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt, 538 info.comp_idx, req->data_len); 539 if (pcount > req->info.npkts) 540 pcount = req->info.npkts; 541 /* 542 * Copy any TID info 543 * User space will provide the TID info only when the 544 * request type is EXPECTED. This is true even if there is 545 * only one packet in the request and the header is already 546 * setup. The reason for the singular TID case is that the 547 * driver needs to perform safety checks. 548 */ 549 if (req_opcode(req->info.ctrl) == EXPECTED) { 550 u16 ntids = iovec[idx].iov_len / sizeof(*req->tids); 551 u32 *tmp; 552 553 if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) { 554 ret = -EINVAL; 555 goto free_req; 556 } 557 558 /* 559 * We have to copy all of the tids because they may vary 560 * in size and, therefore, the TID count might not be 561 * equal to the pkt count. However, there is no way to 562 * tell at this point. 563 */ 564 tmp = memdup_user(iovec[idx].iov_base, 565 ntids * sizeof(*req->tids)); 566 if (IS_ERR(tmp)) { 567 ret = PTR_ERR(tmp); 568 SDMA_DBG(req, "Failed to copy %d TIDs (%d)", 569 ntids, ret); 570 goto free_req; 571 } 572 req->tids = tmp; 573 req->n_tids = ntids; 574 req->tididx = 0; 575 idx++; 576 } 577 578 dlid = be16_to_cpu(req->hdr.lrh[1]); 579 selector = dlid_to_selector(dlid); 580 selector += uctxt->ctxt + fd->subctxt; 581 req->sde = sdma_select_user_engine(dd, selector, vl); 582 583 if (!req->sde || !sdma_running(req->sde)) { 584 ret = -ECOMM; 585 goto free_req; 586 } 587 588 /* We don't need an AHG entry if the request contains only one packet */ 589 if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) 590 req->ahg_idx = sdma_ahg_alloc(req->sde); 591 592 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); 593 pq->state = SDMA_PKT_Q_ACTIVE; 594 595 /* 596 * This is a somewhat blocking send implementation. 597 * The driver will block the caller until all packets of the 598 * request have been submitted to the SDMA engine. However, it 599 * will not wait for send completions. 600 */ 601 while (req->seqsubmitted != req->info.npkts) { 602 ret = user_sdma_send_pkts(req, pcount); 603 if (ret < 0) { 604 int we_ret; 605 606 if (ret != -EBUSY) 607 goto free_req; 608 we_ret = wait_event_interruptible_timeout( 609 pq->busy.wait_dma, 610 pq->state == SDMA_PKT_Q_ACTIVE, 611 msecs_to_jiffies( 612 SDMA_IOWAIT_TIMEOUT)); 613 trace_hfi1_usdma_we(pq, we_ret); 614 if (we_ret <= 0) 615 flush_pq_iowait(pq); 616 } 617 } 618 *count += idx; 619 return 0; 620 free_req: 621 /* 622 * If the submitted seqsubmitted == npkts, the completion routine 623 * controls the final state. If sequbmitted < npkts, wait for any 624 * outstanding packets to finish before cleaning up. 625 */ 626 if (req->seqsubmitted < req->info.npkts) { 627 if (req->seqsubmitted) 628 wait_event(pq->busy.wait_dma, 629 (req->seqcomp == req->seqsubmitted - 1)); 630 user_sdma_free_request(req, true); 631 pq_update(pq); 632 set_comp_state(pq, cq, info.comp_idx, ERROR, ret); 633 } 634 return ret; 635 } 636 637 static inline u32 compute_data_length(struct user_sdma_request *req, 638 struct user_sdma_txreq *tx) 639 { 640 /* 641 * Determine the proper size of the packet data. 642 * The size of the data of the first packet is in the header 643 * template. However, it includes the header and ICRC, which need 644 * to be subtracted. 645 * The minimum representable packet data length in a header is 4 bytes, 646 * therefore, when the data length request is less than 4 bytes, there's 647 * only one packet, and the packet data length is equal to that of the 648 * request data length. 649 * The size of the remaining packets is the minimum of the frag 650 * size (MTU) or remaining data in the request. 651 */ 652 u32 len; 653 654 if (!req->seqnum) { 655 if (req->data_len < sizeof(u32)) 656 len = req->data_len; 657 else 658 len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - 659 (sizeof(tx->hdr) - 4)); 660 } else if (req_opcode(req->info.ctrl) == EXPECTED) { 661 u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * 662 PAGE_SIZE; 663 /* 664 * Get the data length based on the remaining space in the 665 * TID pair. 666 */ 667 len = min(tidlen - req->tidoffset, (u32)req->info.fragsize); 668 /* If we've filled up the TID pair, move to the next one. */ 669 if (unlikely(!len) && ++req->tididx < req->n_tids && 670 req->tids[req->tididx]) { 671 tidlen = EXP_TID_GET(req->tids[req->tididx], 672 LEN) * PAGE_SIZE; 673 req->tidoffset = 0; 674 len = min_t(u32, tidlen, req->info.fragsize); 675 } 676 /* 677 * Since the TID pairs map entire pages, make sure that we 678 * are not going to try to send more data that we have 679 * remaining. 680 */ 681 len = min(len, req->data_len - req->sent); 682 } else { 683 len = min(req->data_len - req->sent, (u32)req->info.fragsize); 684 } 685 trace_hfi1_sdma_user_compute_length(req->pq->dd, 686 req->pq->ctxt, 687 req->pq->subctxt, 688 req->info.comp_idx, 689 len); 690 return len; 691 } 692 693 static inline u32 pad_len(u32 len) 694 { 695 if (len & (sizeof(u32) - 1)) 696 len += sizeof(u32) - (len & (sizeof(u32) - 1)); 697 return len; 698 } 699 700 static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) 701 { 702 /* (Size of complete header - size of PBC) + 4B ICRC + data length */ 703 return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len); 704 } 705 706 static int user_sdma_txadd_ahg(struct user_sdma_request *req, 707 struct user_sdma_txreq *tx, 708 u32 datalen) 709 { 710 int ret; 711 u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); 712 u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen)); 713 struct hfi1_user_sdma_pkt_q *pq = req->pq; 714 715 /* 716 * Copy the request header into the tx header 717 * because the HW needs a cacheline-aligned 718 * address. 719 * This copy can be optimized out if the hdr 720 * member of user_sdma_request were also 721 * cacheline aligned. 722 */ 723 memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); 724 if (PBC2LRH(pbclen) != lrhlen) { 725 pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); 726 tx->hdr.pbc[0] = cpu_to_le16(pbclen); 727 } 728 ret = check_header_template(req, &tx->hdr, lrhlen, datalen); 729 if (ret) 730 return ret; 731 ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, 732 sizeof(tx->hdr) + datalen, req->ahg_idx, 733 0, NULL, 0, user_sdma_txreq_cb); 734 if (ret) 735 return ret; 736 ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr)); 737 if (ret) 738 sdma_txclean(pq->dd, &tx->txreq); 739 return ret; 740 } 741 742 static int user_sdma_txadd(struct user_sdma_request *req, 743 struct user_sdma_txreq *tx, 744 struct user_sdma_iovec *iovec, u32 datalen, 745 u32 *queued_ptr, u32 *data_sent_ptr, 746 u64 *iov_offset_ptr) 747 { 748 int ret; 749 unsigned int pageidx, len; 750 unsigned long base, offset; 751 u64 iov_offset = *iov_offset_ptr; 752 u32 queued = *queued_ptr, data_sent = *data_sent_ptr; 753 struct hfi1_user_sdma_pkt_q *pq = req->pq; 754 755 base = (unsigned long)iovec->iov.iov_base; 756 offset = offset_in_page(base + iovec->offset + iov_offset); 757 pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >> 758 PAGE_SHIFT); 759 len = offset + req->info.fragsize > PAGE_SIZE ? 760 PAGE_SIZE - offset : req->info.fragsize; 761 len = min((datalen - queued), len); 762 ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx], 763 offset, len); 764 if (ret) { 765 SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); 766 return ret; 767 } 768 iov_offset += len; 769 queued += len; 770 data_sent += len; 771 if (unlikely(queued < datalen && pageidx == iovec->npages && 772 req->iov_idx < req->data_iovs - 1)) { 773 iovec->offset += iov_offset; 774 iovec = &req->iovs[++req->iov_idx]; 775 iov_offset = 0; 776 } 777 778 *queued_ptr = queued; 779 *data_sent_ptr = data_sent; 780 *iov_offset_ptr = iov_offset; 781 return ret; 782 } 783 784 static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) 785 { 786 int ret = 0; 787 u16 count; 788 unsigned npkts = 0; 789 struct user_sdma_txreq *tx = NULL; 790 struct hfi1_user_sdma_pkt_q *pq = NULL; 791 struct user_sdma_iovec *iovec = NULL; 792 793 if (!req->pq) 794 return -EINVAL; 795 796 pq = req->pq; 797 798 /* If tx completion has reported an error, we are done. */ 799 if (READ_ONCE(req->has_error)) 800 return -EFAULT; 801 802 /* 803 * Check if we might have sent the entire request already 804 */ 805 if (unlikely(req->seqnum == req->info.npkts)) { 806 if (!list_empty(&req->txps)) 807 goto dosend; 808 return ret; 809 } 810 811 if (!maxpkts || maxpkts > req->info.npkts - req->seqnum) 812 maxpkts = req->info.npkts - req->seqnum; 813 814 while (npkts < maxpkts) { 815 u32 datalen = 0, queued = 0, data_sent = 0; 816 u64 iov_offset = 0; 817 818 /* 819 * Check whether any of the completions have come back 820 * with errors. If so, we are not going to process any 821 * more packets from this request. 822 */ 823 if (READ_ONCE(req->has_error)) 824 return -EFAULT; 825 826 tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); 827 if (!tx) 828 return -ENOMEM; 829 830 tx->flags = 0; 831 tx->req = req; 832 INIT_LIST_HEAD(&tx->list); 833 834 /* 835 * For the last packet set the ACK request 836 * and disable header suppression. 837 */ 838 if (req->seqnum == req->info.npkts - 1) 839 tx->flags |= (TXREQ_FLAGS_REQ_ACK | 840 TXREQ_FLAGS_REQ_DISABLE_SH); 841 842 /* 843 * Calculate the payload size - this is min of the fragment 844 * (MTU) size or the remaining bytes in the request but only 845 * if we have payload data. 846 */ 847 if (req->data_len) { 848 iovec = &req->iovs[req->iov_idx]; 849 if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { 850 if (++req->iov_idx == req->data_iovs) { 851 ret = -EFAULT; 852 goto free_tx; 853 } 854 iovec = &req->iovs[req->iov_idx]; 855 WARN_ON(iovec->offset); 856 } 857 858 datalen = compute_data_length(req, tx); 859 860 /* 861 * Disable header suppression for the payload <= 8DWS. 862 * If there is an uncorrectable error in the receive 863 * data FIFO when the received payload size is less than 864 * or equal to 8DWS then the RxDmaDataFifoRdUncErr is 865 * not reported.There is set RHF.EccErr if the header 866 * is not suppressed. 867 */ 868 if (!datalen) { 869 SDMA_DBG(req, 870 "Request has data but pkt len is 0"); 871 ret = -EFAULT; 872 goto free_tx; 873 } else if (datalen <= 32) { 874 tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; 875 } 876 } 877 878 if (req->ahg_idx >= 0) { 879 if (!req->seqnum) { 880 ret = user_sdma_txadd_ahg(req, tx, datalen); 881 if (ret) 882 goto free_tx; 883 } else { 884 int changes; 885 886 changes = set_txreq_header_ahg(req, tx, 887 datalen); 888 if (changes < 0) { 889 ret = changes; 890 goto free_tx; 891 } 892 } 893 } else { 894 ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + 895 datalen, user_sdma_txreq_cb); 896 if (ret) 897 goto free_tx; 898 /* 899 * Modify the header for this packet. This only needs 900 * to be done if we are not going to use AHG. Otherwise, 901 * the HW will do it based on the changes we gave it 902 * during sdma_txinit_ahg(). 903 */ 904 ret = set_txreq_header(req, tx, datalen); 905 if (ret) 906 goto free_txreq; 907 } 908 909 /* 910 * If the request contains any data vectors, add up to 911 * fragsize bytes to the descriptor. 912 */ 913 while (queued < datalen && 914 (req->sent + data_sent) < req->data_len) { 915 ret = user_sdma_txadd(req, tx, iovec, datalen, 916 &queued, &data_sent, &iov_offset); 917 if (ret) 918 goto free_txreq; 919 } 920 /* 921 * The txreq was submitted successfully so we can update 922 * the counters. 923 */ 924 req->koffset += datalen; 925 if (req_opcode(req->info.ctrl) == EXPECTED) 926 req->tidoffset += datalen; 927 req->sent += data_sent; 928 if (req->data_len) 929 iovec->offset += iov_offset; 930 list_add_tail(&tx->txreq.list, &req->txps); 931 /* 932 * It is important to increment this here as it is used to 933 * generate the BTH.PSN and, therefore, can't be bulk-updated 934 * outside of the loop. 935 */ 936 tx->seqnum = req->seqnum++; 937 npkts++; 938 } 939 dosend: 940 ret = sdma_send_txlist(req->sde, 941 iowait_get_ib_work(&pq->busy), 942 &req->txps, &count); 943 req->seqsubmitted += count; 944 if (req->seqsubmitted == req->info.npkts) { 945 /* 946 * The txreq has already been submitted to the HW queue 947 * so we can free the AHG entry now. Corruption will not 948 * happen due to the sequential manner in which 949 * descriptors are processed. 950 */ 951 if (req->ahg_idx >= 0) 952 sdma_ahg_free(req->sde, req->ahg_idx); 953 } 954 return ret; 955 956 free_txreq: 957 sdma_txclean(pq->dd, &tx->txreq); 958 free_tx: 959 kmem_cache_free(pq->txreq_cache, tx); 960 return ret; 961 } 962 963 static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) 964 { 965 struct evict_data evict_data; 966 967 evict_data.cleared = 0; 968 evict_data.target = npages; 969 hfi1_mmu_rb_evict(pq->handler, &evict_data); 970 return evict_data.cleared; 971 } 972 973 static int pin_sdma_pages(struct user_sdma_request *req, 974 struct user_sdma_iovec *iovec, 975 struct sdma_mmu_node *node, 976 int npages) 977 { 978 int pinned, cleared; 979 struct page **pages; 980 struct hfi1_user_sdma_pkt_q *pq = req->pq; 981 982 pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); 983 if (!pages) 984 return -ENOMEM; 985 memcpy(pages, node->pages, node->npages * sizeof(*pages)); 986 987 npages -= node->npages; 988 retry: 989 if (!hfi1_can_pin_pages(pq->dd, current->mm, 990 atomic_read(&pq->n_locked), npages)) { 991 cleared = sdma_cache_evict(pq, npages); 992 if (cleared >= npages) 993 goto retry; 994 } 995 pinned = hfi1_acquire_user_pages(current->mm, 996 ((unsigned long)iovec->iov.iov_base + 997 (node->npages * PAGE_SIZE)), npages, 0, 998 pages + node->npages); 999 if (pinned < 0) { 1000 kfree(pages); 1001 return pinned; 1002 } 1003 if (pinned != npages) { 1004 unpin_vector_pages(current->mm, pages, node->npages, pinned); 1005 return -EFAULT; 1006 } 1007 kfree(node->pages); 1008 node->rb.len = iovec->iov.iov_len; 1009 node->pages = pages; 1010 atomic_add(pinned, &pq->n_locked); 1011 return pinned; 1012 } 1013 1014 static void unpin_sdma_pages(struct sdma_mmu_node *node) 1015 { 1016 if (node->npages) { 1017 unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, 1018 node->npages); 1019 atomic_sub(node->npages, &node->pq->n_locked); 1020 } 1021 } 1022 1023 static int pin_vector_pages(struct user_sdma_request *req, 1024 struct user_sdma_iovec *iovec) 1025 { 1026 int ret = 0, pinned, npages; 1027 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1028 struct sdma_mmu_node *node = NULL; 1029 struct mmu_rb_node *rb_node; 1030 struct iovec *iov; 1031 bool extracted; 1032 1033 extracted = 1034 hfi1_mmu_rb_remove_unless_exact(pq->handler, 1035 (unsigned long) 1036 iovec->iov.iov_base, 1037 iovec->iov.iov_len, &rb_node); 1038 if (rb_node) { 1039 node = container_of(rb_node, struct sdma_mmu_node, rb); 1040 if (!extracted) { 1041 atomic_inc(&node->refcount); 1042 iovec->pages = node->pages; 1043 iovec->npages = node->npages; 1044 iovec->node = node; 1045 return 0; 1046 } 1047 } 1048 1049 if (!node) { 1050 node = kzalloc(sizeof(*node), GFP_KERNEL); 1051 if (!node) 1052 return -ENOMEM; 1053 1054 node->rb.addr = (unsigned long)iovec->iov.iov_base; 1055 node->pq = pq; 1056 atomic_set(&node->refcount, 0); 1057 } 1058 1059 iov = &iovec->iov; 1060 npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len); 1061 if (node->npages < npages) { 1062 pinned = pin_sdma_pages(req, iovec, node, npages); 1063 if (pinned < 0) { 1064 ret = pinned; 1065 goto bail; 1066 } 1067 node->npages += pinned; 1068 npages = node->npages; 1069 } 1070 iovec->pages = node->pages; 1071 iovec->npages = npages; 1072 iovec->node = node; 1073 1074 ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); 1075 if (ret) { 1076 iovec->node = NULL; 1077 goto bail; 1078 } 1079 return 0; 1080 bail: 1081 unpin_sdma_pages(node); 1082 kfree(node); 1083 return ret; 1084 } 1085 1086 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 1087 unsigned start, unsigned npages) 1088 { 1089 hfi1_release_user_pages(mm, pages + start, npages, false); 1090 kfree(pages); 1091 } 1092 1093 static int check_header_template(struct user_sdma_request *req, 1094 struct hfi1_pkt_header *hdr, u32 lrhlen, 1095 u32 datalen) 1096 { 1097 /* 1098 * Perform safety checks for any type of packet: 1099 * - transfer size is multiple of 64bytes 1100 * - packet length is multiple of 4 bytes 1101 * - packet length is not larger than MTU size 1102 * 1103 * These checks are only done for the first packet of the 1104 * transfer since the header is "given" to us by user space. 1105 * For the remainder of the packets we compute the values. 1106 */ 1107 if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 || 1108 lrhlen > get_lrh_len(*hdr, req->info.fragsize)) 1109 return -EINVAL; 1110 1111 if (req_opcode(req->info.ctrl) == EXPECTED) { 1112 /* 1113 * The header is checked only on the first packet. Furthermore, 1114 * we ensure that at least one TID entry is copied when the 1115 * request is submitted. Therefore, we don't have to verify that 1116 * tididx points to something sane. 1117 */ 1118 u32 tidval = req->tids[req->tididx], 1119 tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE, 1120 tididx = EXP_TID_GET(tidval, IDX), 1121 tidctrl = EXP_TID_GET(tidval, CTRL), 1122 tidoff; 1123 __le32 kval = hdr->kdeth.ver_tid_offset; 1124 1125 tidoff = KDETH_GET(kval, OFFSET) * 1126 (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? 1127 KDETH_OM_LARGE : KDETH_OM_SMALL); 1128 /* 1129 * Expected receive packets have the following 1130 * additional checks: 1131 * - offset is not larger than the TID size 1132 * - TIDCtrl values match between header and TID array 1133 * - TID indexes match between header and TID array 1134 */ 1135 if ((tidoff + datalen > tidlen) || 1136 KDETH_GET(kval, TIDCTRL) != tidctrl || 1137 KDETH_GET(kval, TID) != tididx) 1138 return -EINVAL; 1139 } 1140 return 0; 1141 } 1142 1143 /* 1144 * Correctly set the BTH.PSN field based on type of 1145 * transfer - eager packets can just increment the PSN but 1146 * expected packets encode generation and sequence in the 1147 * BTH.PSN field so just incrementing will result in errors. 1148 */ 1149 static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags) 1150 { 1151 u32 val = be32_to_cpu(bthpsn), 1152 mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull : 1153 0xffffffull), 1154 psn = val & mask; 1155 if (expct) 1156 psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) | 1157 ((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK); 1158 else 1159 psn = psn + frags; 1160 return psn & mask; 1161 } 1162 1163 static int set_txreq_header(struct user_sdma_request *req, 1164 struct user_sdma_txreq *tx, u32 datalen) 1165 { 1166 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1167 struct hfi1_pkt_header *hdr = &tx->hdr; 1168 u8 omfactor; /* KDETH.OM */ 1169 u16 pbclen; 1170 int ret; 1171 u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); 1172 1173 /* Copy the header template to the request before modification */ 1174 memcpy(hdr, &req->hdr, sizeof(*hdr)); 1175 1176 /* 1177 * Check if the PBC and LRH length are mismatched. If so 1178 * adjust both in the header. 1179 */ 1180 pbclen = le16_to_cpu(hdr->pbc[0]); 1181 if (PBC2LRH(pbclen) != lrhlen) { 1182 pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); 1183 hdr->pbc[0] = cpu_to_le16(pbclen); 1184 hdr->lrh[2] = cpu_to_be16(lrhlen >> 2); 1185 /* 1186 * Third packet 1187 * This is the first packet in the sequence that has 1188 * a "static" size that can be used for the rest of 1189 * the packets (besides the last one). 1190 */ 1191 if (unlikely(req->seqnum == 2)) { 1192 /* 1193 * From this point on the lengths in both the 1194 * PBC and LRH are the same until the last 1195 * packet. 1196 * Adjust the template so we don't have to update 1197 * every packet 1198 */ 1199 req->hdr.pbc[0] = hdr->pbc[0]; 1200 req->hdr.lrh[2] = hdr->lrh[2]; 1201 } 1202 } 1203 /* 1204 * We only have to modify the header if this is not the 1205 * first packet in the request. Otherwise, we use the 1206 * header given to us. 1207 */ 1208 if (unlikely(!req->seqnum)) { 1209 ret = check_header_template(req, hdr, lrhlen, datalen); 1210 if (ret) 1211 return ret; 1212 goto done; 1213 } 1214 1215 hdr->bth[2] = cpu_to_be32( 1216 set_pkt_bth_psn(hdr->bth[2], 1217 (req_opcode(req->info.ctrl) == EXPECTED), 1218 req->seqnum)); 1219 1220 /* Set ACK request on last packet */ 1221 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) 1222 hdr->bth[2] |= cpu_to_be32(1UL << 31); 1223 1224 /* Set the new offset */ 1225 hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset); 1226 /* Expected packets have to fill in the new TID information */ 1227 if (req_opcode(req->info.ctrl) == EXPECTED) { 1228 tidval = req->tids[req->tididx]; 1229 /* 1230 * If the offset puts us at the end of the current TID, 1231 * advance everything. 1232 */ 1233 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * 1234 PAGE_SIZE)) { 1235 req->tidoffset = 0; 1236 /* 1237 * Since we don't copy all the TIDs, all at once, 1238 * we have to check again. 1239 */ 1240 if (++req->tididx > req->n_tids - 1 || 1241 !req->tids[req->tididx]) { 1242 return -EINVAL; 1243 } 1244 tidval = req->tids[req->tididx]; 1245 } 1246 omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= 1247 KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : 1248 KDETH_OM_SMALL_SHIFT; 1249 /* Set KDETH.TIDCtrl based on value for this TID. */ 1250 KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, 1251 EXP_TID_GET(tidval, CTRL)); 1252 /* Set KDETH.TID based on value for this TID */ 1253 KDETH_SET(hdr->kdeth.ver_tid_offset, TID, 1254 EXP_TID_GET(tidval, IDX)); 1255 /* Clear KDETH.SH when DISABLE_SH flag is set */ 1256 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) 1257 KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); 1258 /* 1259 * Set the KDETH.OFFSET and KDETH.OM based on size of 1260 * transfer. 1261 */ 1262 trace_hfi1_sdma_user_tid_info( 1263 pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, 1264 req->tidoffset, req->tidoffset >> omfactor, 1265 omfactor != KDETH_OM_SMALL_SHIFT); 1266 KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, 1267 req->tidoffset >> omfactor); 1268 KDETH_SET(hdr->kdeth.ver_tid_offset, OM, 1269 omfactor != KDETH_OM_SMALL_SHIFT); 1270 } 1271 done: 1272 trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, 1273 req->info.comp_idx, hdr, tidval); 1274 return sdma_txadd_kvaddr(pq->dd, &tx->txreq, hdr, sizeof(*hdr)); 1275 } 1276 1277 static int set_txreq_header_ahg(struct user_sdma_request *req, 1278 struct user_sdma_txreq *tx, u32 datalen) 1279 { 1280 u32 ahg[AHG_KDETH_ARRAY_SIZE]; 1281 int idx = 0; 1282 u8 omfactor; /* KDETH.OM */ 1283 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1284 struct hfi1_pkt_header *hdr = &req->hdr; 1285 u16 pbclen = le16_to_cpu(hdr->pbc[0]); 1286 u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); 1287 size_t array_size = ARRAY_SIZE(ahg); 1288 1289 if (PBC2LRH(pbclen) != lrhlen) { 1290 /* PBC.PbcLengthDWs */ 1291 idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12, 1292 (__force u16)cpu_to_le16(LRH2PBC(lrhlen))); 1293 if (idx < 0) 1294 return idx; 1295 /* LRH.PktLen (we need the full 16 bits due to byte swap) */ 1296 idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16, 1297 (__force u16)cpu_to_be16(lrhlen >> 2)); 1298 if (idx < 0) 1299 return idx; 1300 } 1301 1302 /* 1303 * Do the common updates 1304 */ 1305 /* BTH.PSN and BTH.A */ 1306 val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & 1307 (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); 1308 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) 1309 val32 |= 1UL << 31; 1310 idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16, 1311 (__force u16)cpu_to_be16(val32 >> 16)); 1312 if (idx < 0) 1313 return idx; 1314 idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16, 1315 (__force u16)cpu_to_be16(val32 & 0xffff)); 1316 if (idx < 0) 1317 return idx; 1318 /* KDETH.Offset */ 1319 idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16, 1320 (__force u16)cpu_to_le16(req->koffset & 0xffff)); 1321 if (idx < 0) 1322 return idx; 1323 idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16, 1324 (__force u16)cpu_to_le16(req->koffset >> 16)); 1325 if (idx < 0) 1326 return idx; 1327 if (req_opcode(req->info.ctrl) == EXPECTED) { 1328 __le16 val; 1329 1330 tidval = req->tids[req->tididx]; 1331 1332 /* 1333 * If the offset puts us at the end of the current TID, 1334 * advance everything. 1335 */ 1336 if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * 1337 PAGE_SIZE)) { 1338 req->tidoffset = 0; 1339 /* 1340 * Since we don't copy all the TIDs, all at once, 1341 * we have to check again. 1342 */ 1343 if (++req->tididx > req->n_tids - 1 || 1344 !req->tids[req->tididx]) 1345 return -EINVAL; 1346 tidval = req->tids[req->tididx]; 1347 } 1348 omfactor = ((EXP_TID_GET(tidval, LEN) * 1349 PAGE_SIZE) >= 1350 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : 1351 KDETH_OM_SMALL_SHIFT; 1352 /* KDETH.OM and KDETH.OFFSET (TID) */ 1353 idx = ahg_header_set( 1354 ahg, idx, array_size, 7, 0, 16, 1355 ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | 1356 ((req->tidoffset >> omfactor) 1357 & 0x7fff))); 1358 if (idx < 0) 1359 return idx; 1360 /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ 1361 val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | 1362 (EXP_TID_GET(tidval, IDX) & 0x3ff)); 1363 1364 if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { 1365 val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, 1366 INTR) << 1367 AHG_KDETH_INTR_SHIFT)); 1368 } else { 1369 val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? 1370 cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : 1371 cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, 1372 INTR) << 1373 AHG_KDETH_INTR_SHIFT)); 1374 } 1375 1376 idx = ahg_header_set(ahg, idx, array_size, 1377 7, 16, 14, (__force u16)val); 1378 if (idx < 0) 1379 return idx; 1380 } 1381 1382 trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, 1383 req->info.comp_idx, req->sde->this_idx, 1384 req->ahg_idx, ahg, idx, tidval); 1385 sdma_txinit_ahg(&tx->txreq, 1386 SDMA_TXREQ_F_USE_AHG, 1387 datalen, req->ahg_idx, idx, 1388 ahg, sizeof(req->hdr), 1389 user_sdma_txreq_cb); 1390 1391 return idx; 1392 } 1393 1394 /** 1395 * user_sdma_txreq_cb() - SDMA tx request completion callback. 1396 * @txreq: valid sdma tx request 1397 * @status: success/failure of request 1398 * 1399 * Called when the SDMA progress state machine gets notification that 1400 * the SDMA descriptors for this tx request have been processed by the 1401 * DMA engine. Called in interrupt context. 1402 * Only do work on completed sequences. 1403 */ 1404 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) 1405 { 1406 struct user_sdma_txreq *tx = 1407 container_of(txreq, struct user_sdma_txreq, txreq); 1408 struct user_sdma_request *req; 1409 struct hfi1_user_sdma_pkt_q *pq; 1410 struct hfi1_user_sdma_comp_q *cq; 1411 enum hfi1_sdma_comp_state state = COMPLETE; 1412 1413 if (!tx->req) 1414 return; 1415 1416 req = tx->req; 1417 pq = req->pq; 1418 cq = req->cq; 1419 1420 if (status != SDMA_TXREQ_S_OK) { 1421 SDMA_DBG(req, "SDMA completion with error %d", 1422 status); 1423 WRITE_ONCE(req->has_error, 1); 1424 state = ERROR; 1425 } 1426 1427 req->seqcomp = tx->seqnum; 1428 kmem_cache_free(pq->txreq_cache, tx); 1429 1430 /* sequence isn't complete? We are done */ 1431 if (req->seqcomp != req->info.npkts - 1) 1432 return; 1433 1434 user_sdma_free_request(req, false); 1435 set_comp_state(pq, cq, req->info.comp_idx, state, status); 1436 pq_update(pq); 1437 } 1438 1439 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) 1440 { 1441 if (atomic_dec_and_test(&pq->n_reqs)) 1442 wake_up(&pq->wait); 1443 } 1444 1445 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) 1446 { 1447 int i; 1448 1449 if (!list_empty(&req->txps)) { 1450 struct sdma_txreq *t, *p; 1451 1452 list_for_each_entry_safe(t, p, &req->txps, list) { 1453 struct user_sdma_txreq *tx = 1454 container_of(t, struct user_sdma_txreq, txreq); 1455 list_del_init(&t->list); 1456 sdma_txclean(req->pq->dd, t); 1457 kmem_cache_free(req->pq->txreq_cache, tx); 1458 } 1459 } 1460 1461 for (i = 0; i < req->data_iovs; i++) { 1462 struct sdma_mmu_node *node = req->iovs[i].node; 1463 1464 if (!node) 1465 continue; 1466 1467 req->iovs[i].node = NULL; 1468 1469 if (unpin) 1470 hfi1_mmu_rb_remove(req->pq->handler, 1471 &node->rb); 1472 else 1473 atomic_dec(&node->refcount); 1474 } 1475 1476 kfree(req->tids); 1477 clear_bit(req->info.comp_idx, req->pq->req_in_use); 1478 } 1479 1480 static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, 1481 struct hfi1_user_sdma_comp_q *cq, 1482 u16 idx, enum hfi1_sdma_comp_state state, 1483 int ret) 1484 { 1485 if (state == ERROR) 1486 cq->comps[idx].errcode = -ret; 1487 smp_wmb(); /* make sure errcode is visible first */ 1488 cq->comps[idx].status = state; 1489 trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, 1490 idx, state, ret); 1491 } 1492 1493 static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, 1494 unsigned long len) 1495 { 1496 return (bool)(node->addr == addr); 1497 } 1498 1499 static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) 1500 { 1501 struct sdma_mmu_node *node = 1502 container_of(mnode, struct sdma_mmu_node, rb); 1503 1504 atomic_inc(&node->refcount); 1505 return 0; 1506 } 1507 1508 /* 1509 * Return 1 to remove the node from the rb tree and call the remove op. 1510 * 1511 * Called with the rb tree lock held. 1512 */ 1513 static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 1514 void *evict_arg, bool *stop) 1515 { 1516 struct sdma_mmu_node *node = 1517 container_of(mnode, struct sdma_mmu_node, rb); 1518 struct evict_data *evict_data = evict_arg; 1519 1520 /* is this node still being used? */ 1521 if (atomic_read(&node->refcount)) 1522 return 0; /* keep this node */ 1523 1524 /* this node will be evicted, add its pages to our count */ 1525 evict_data->cleared += node->npages; 1526 1527 /* have enough pages been cleared? */ 1528 if (evict_data->cleared >= evict_data->target) 1529 *stop = true; 1530 1531 return 1; /* remove this node */ 1532 } 1533 1534 static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) 1535 { 1536 struct sdma_mmu_node *node = 1537 container_of(mnode, struct sdma_mmu_node, rb); 1538 1539 unpin_sdma_pages(node); 1540 kfree(node); 1541 } 1542 1543 static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) 1544 { 1545 struct sdma_mmu_node *node = 1546 container_of(mnode, struct sdma_mmu_node, rb); 1547 1548 if (!atomic_read(&node->refcount)) 1549 return 1; 1550 return 0; 1551 } 1552