1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <linux/kthread.h> 51 #include "cq.h" 52 #include "vt.h" 53 #include "trace.h" 54 55 /** 56 * rvt_cq_enter - add a new entry to the completion queue 57 * @cq: completion queue 58 * @entry: work completion entry to add 59 * @solicited: true if @entry is solicited 60 * 61 * This may be called with qp->s_lock held. 62 */ 63 void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) 64 { 65 struct rvt_cq_wc *wc; 66 unsigned long flags; 67 u32 head; 68 u32 next; 69 70 spin_lock_irqsave(&cq->lock, flags); 71 72 /* 73 * Note that the head pointer might be writable by user processes. 74 * Take care to verify it is a sane value. 75 */ 76 wc = cq->queue; 77 head = wc->head; 78 if (head >= (unsigned)cq->ibcq.cqe) { 79 head = cq->ibcq.cqe; 80 next = 0; 81 } else { 82 next = head + 1; 83 } 84 85 if (unlikely(next == wc->tail)) { 86 spin_unlock_irqrestore(&cq->lock, flags); 87 if (cq->ibcq.event_handler) { 88 struct ib_event ev; 89 90 ev.device = cq->ibcq.device; 91 ev.element.cq = &cq->ibcq; 92 ev.event = IB_EVENT_CQ_ERR; 93 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 94 } 95 return; 96 } 97 trace_rvt_cq_enter(cq, entry, head); 98 if (cq->ip) { 99 wc->uqueue[head].wr_id = entry->wr_id; 100 wc->uqueue[head].status = entry->status; 101 wc->uqueue[head].opcode = entry->opcode; 102 wc->uqueue[head].vendor_err = entry->vendor_err; 103 wc->uqueue[head].byte_len = entry->byte_len; 104 wc->uqueue[head].ex.imm_data = entry->ex.imm_data; 105 wc->uqueue[head].qp_num = entry->qp->qp_num; 106 wc->uqueue[head].src_qp = entry->src_qp; 107 wc->uqueue[head].wc_flags = entry->wc_flags; 108 wc->uqueue[head].pkey_index = entry->pkey_index; 109 wc->uqueue[head].slid = ib_lid_cpu16(entry->slid); 110 wc->uqueue[head].sl = entry->sl; 111 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; 112 wc->uqueue[head].port_num = entry->port_num; 113 /* Make sure entry is written before the head index. */ 114 smp_wmb(); 115 } else { 116 wc->kqueue[head] = *entry; 117 } 118 wc->head = next; 119 120 if (cq->notify == IB_CQ_NEXT_COMP || 121 (cq->notify == IB_CQ_SOLICITED && 122 (solicited || entry->status != IB_WC_SUCCESS))) { 123 /* 124 * This will cause send_complete() to be called in 125 * another thread. 126 */ 127 spin_lock(&cq->rdi->n_cqs_lock); 128 if (likely(cq->rdi->worker)) { 129 cq->notify = RVT_CQ_NONE; 130 cq->triggered++; 131 kthread_queue_work(cq->rdi->worker, &cq->comptask); 132 } 133 spin_unlock(&cq->rdi->n_cqs_lock); 134 } 135 136 spin_unlock_irqrestore(&cq->lock, flags); 137 } 138 EXPORT_SYMBOL(rvt_cq_enter); 139 140 static void send_complete(struct kthread_work *work) 141 { 142 struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask); 143 144 /* 145 * The completion handler will most likely rearm the notification 146 * and poll for all pending entries. If a new completion entry 147 * is added while we are in this routine, queue_work() 148 * won't call us again until we return so we check triggered to 149 * see if we need to call the handler again. 150 */ 151 for (;;) { 152 u8 triggered = cq->triggered; 153 154 /* 155 * IPoIB connected mode assumes the callback is from a 156 * soft IRQ. We simulate this by blocking "bottom halves". 157 * See the implementation for ipoib_cm_handle_tx_wc(), 158 * netif_tx_lock_bh() and netif_tx_lock(). 159 */ 160 local_bh_disable(); 161 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 162 local_bh_enable(); 163 164 if (cq->triggered == triggered) 165 return; 166 } 167 } 168 169 /** 170 * rvt_create_cq - create a completion queue 171 * @ibdev: the device this completion queue is attached to 172 * @attr: creation attributes 173 * @context: unused by the QLogic_IB driver 174 * @udata: user data for libibverbs.so 175 * 176 * Called by ib_create_cq() in the generic verbs code. 177 * 178 * Return: pointer to the completion queue or negative errno values 179 * for failure. 180 */ 181 struct ib_cq *rvt_create_cq(struct ib_device *ibdev, 182 const struct ib_cq_init_attr *attr, 183 struct ib_ucontext *context, 184 struct ib_udata *udata) 185 { 186 struct rvt_dev_info *rdi = ib_to_rvt(ibdev); 187 struct rvt_cq *cq; 188 struct rvt_cq_wc *wc; 189 struct ib_cq *ret; 190 u32 sz; 191 unsigned int entries = attr->cqe; 192 193 if (attr->flags) 194 return ERR_PTR(-EINVAL); 195 196 if (entries < 1 || entries > rdi->dparms.props.max_cqe) 197 return ERR_PTR(-EINVAL); 198 199 /* Allocate the completion queue structure. */ 200 cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node); 201 if (!cq) 202 return ERR_PTR(-ENOMEM); 203 204 /* 205 * Allocate the completion queue entries and head/tail pointers. 206 * This is allocated separately so that it can be resized and 207 * also mapped into user space. 208 * We need to use vmalloc() in order to support mmap and large 209 * numbers of entries. 210 */ 211 sz = sizeof(*wc); 212 if (udata && udata->outlen >= sizeof(__u64)) 213 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 214 else 215 sz += sizeof(struct ib_wc) * (entries + 1); 216 wc = udata ? 217 vmalloc_user(sz) : 218 vzalloc_node(sz, rdi->dparms.node); 219 if (!wc) { 220 ret = ERR_PTR(-ENOMEM); 221 goto bail_cq; 222 } 223 224 /* 225 * Return the address of the WC as the offset to mmap. 226 * See rvt_mmap() for details. 227 */ 228 if (udata && udata->outlen >= sizeof(__u64)) { 229 int err; 230 231 cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); 232 if (!cq->ip) { 233 ret = ERR_PTR(-ENOMEM); 234 goto bail_wc; 235 } 236 237 err = ib_copy_to_udata(udata, &cq->ip->offset, 238 sizeof(cq->ip->offset)); 239 if (err) { 240 ret = ERR_PTR(err); 241 goto bail_ip; 242 } 243 } 244 245 spin_lock_irq(&rdi->n_cqs_lock); 246 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { 247 spin_unlock_irq(&rdi->n_cqs_lock); 248 ret = ERR_PTR(-ENOMEM); 249 goto bail_ip; 250 } 251 252 rdi->n_cqs_allocated++; 253 spin_unlock_irq(&rdi->n_cqs_lock); 254 255 if (cq->ip) { 256 spin_lock_irq(&rdi->pending_lock); 257 list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps); 258 spin_unlock_irq(&rdi->pending_lock); 259 } 260 261 /* 262 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 263 * The number of entries should be >= the number requested or return 264 * an error. 265 */ 266 cq->rdi = rdi; 267 cq->ibcq.cqe = entries; 268 cq->notify = RVT_CQ_NONE; 269 spin_lock_init(&cq->lock); 270 kthread_init_work(&cq->comptask, send_complete); 271 cq->queue = wc; 272 273 ret = &cq->ibcq; 274 275 goto done; 276 277 bail_ip: 278 kfree(cq->ip); 279 bail_wc: 280 vfree(wc); 281 bail_cq: 282 kfree(cq); 283 done: 284 return ret; 285 } 286 287 /** 288 * rvt_destroy_cq - destroy a completion queue 289 * @ibcq: the completion queue to destroy. 290 * 291 * Called by ib_destroy_cq() in the generic verbs code. 292 * 293 * Return: always 0 294 */ 295 int rvt_destroy_cq(struct ib_cq *ibcq) 296 { 297 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 298 struct rvt_dev_info *rdi = cq->rdi; 299 300 kthread_flush_work(&cq->comptask); 301 spin_lock_irq(&rdi->n_cqs_lock); 302 rdi->n_cqs_allocated--; 303 spin_unlock_irq(&rdi->n_cqs_lock); 304 if (cq->ip) 305 kref_put(&cq->ip->ref, rvt_release_mmap_info); 306 else 307 vfree(cq->queue); 308 kfree(cq); 309 310 return 0; 311 } 312 313 /** 314 * rvt_req_notify_cq - change the notification type for a completion queue 315 * @ibcq: the completion queue 316 * @notify_flags: the type of notification to request 317 * 318 * This may be called from interrupt context. Also called by 319 * ib_req_notify_cq() in the generic verbs code. 320 * 321 * Return: 0 for success. 322 */ 323 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) 324 { 325 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 326 unsigned long flags; 327 int ret = 0; 328 329 spin_lock_irqsave(&cq->lock, flags); 330 /* 331 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 332 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). 333 */ 334 if (cq->notify != IB_CQ_NEXT_COMP) 335 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; 336 337 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 338 cq->queue->head != cq->queue->tail) 339 ret = 1; 340 341 spin_unlock_irqrestore(&cq->lock, flags); 342 343 return ret; 344 } 345 346 /** 347 * rvt_resize_cq - change the size of the CQ 348 * @ibcq: the completion queue 349 * 350 * Return: 0 for success. 351 */ 352 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 353 { 354 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 355 struct rvt_cq_wc *old_wc; 356 struct rvt_cq_wc *wc; 357 u32 head, tail, n; 358 int ret; 359 u32 sz; 360 struct rvt_dev_info *rdi = cq->rdi; 361 362 if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) 363 return -EINVAL; 364 365 /* 366 * Need to use vmalloc() if we want to support large #s of entries. 367 */ 368 sz = sizeof(*wc); 369 if (udata && udata->outlen >= sizeof(__u64)) 370 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 371 else 372 sz += sizeof(struct ib_wc) * (cqe + 1); 373 wc = udata ? 374 vmalloc_user(sz) : 375 vzalloc_node(sz, rdi->dparms.node); 376 if (!wc) 377 return -ENOMEM; 378 379 /* Check that we can write the offset to mmap. */ 380 if (udata && udata->outlen >= sizeof(__u64)) { 381 __u64 offset = 0; 382 383 ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); 384 if (ret) 385 goto bail_free; 386 } 387 388 spin_lock_irq(&cq->lock); 389 /* 390 * Make sure head and tail are sane since they 391 * might be user writable. 392 */ 393 old_wc = cq->queue; 394 head = old_wc->head; 395 if (head > (u32)cq->ibcq.cqe) 396 head = (u32)cq->ibcq.cqe; 397 tail = old_wc->tail; 398 if (tail > (u32)cq->ibcq.cqe) 399 tail = (u32)cq->ibcq.cqe; 400 if (head < tail) 401 n = cq->ibcq.cqe + 1 + head - tail; 402 else 403 n = head - tail; 404 if (unlikely((u32)cqe < n)) { 405 ret = -EINVAL; 406 goto bail_unlock; 407 } 408 for (n = 0; tail != head; n++) { 409 if (cq->ip) 410 wc->uqueue[n] = old_wc->uqueue[tail]; 411 else 412 wc->kqueue[n] = old_wc->kqueue[tail]; 413 if (tail == (u32)cq->ibcq.cqe) 414 tail = 0; 415 else 416 tail++; 417 } 418 cq->ibcq.cqe = cqe; 419 wc->head = n; 420 wc->tail = 0; 421 cq->queue = wc; 422 spin_unlock_irq(&cq->lock); 423 424 vfree(old_wc); 425 426 if (cq->ip) { 427 struct rvt_mmap_info *ip = cq->ip; 428 429 rvt_update_mmap_info(rdi, ip, sz, wc); 430 431 /* 432 * Return the offset to mmap. 433 * See rvt_mmap() for details. 434 */ 435 if (udata && udata->outlen >= sizeof(__u64)) { 436 ret = ib_copy_to_udata(udata, &ip->offset, 437 sizeof(ip->offset)); 438 if (ret) 439 return ret; 440 } 441 442 spin_lock_irq(&rdi->pending_lock); 443 if (list_empty(&ip->pending_mmaps)) 444 list_add(&ip->pending_mmaps, &rdi->pending_mmaps); 445 spin_unlock_irq(&rdi->pending_lock); 446 } 447 448 return 0; 449 450 bail_unlock: 451 spin_unlock_irq(&cq->lock); 452 bail_free: 453 vfree(wc); 454 return ret; 455 } 456 457 /** 458 * rvt_poll_cq - poll for work completion entries 459 * @ibcq: the completion queue to poll 460 * @num_entries: the maximum number of entries to return 461 * @entry: pointer to array where work completions are placed 462 * 463 * This may be called from interrupt context. Also called by ib_poll_cq() 464 * in the generic verbs code. 465 * 466 * Return: the number of completion entries polled. 467 */ 468 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 469 { 470 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 471 struct rvt_cq_wc *wc; 472 unsigned long flags; 473 int npolled; 474 u32 tail; 475 476 /* The kernel can only poll a kernel completion queue */ 477 if (cq->ip) 478 return -EINVAL; 479 480 spin_lock_irqsave(&cq->lock, flags); 481 482 wc = cq->queue; 483 tail = wc->tail; 484 if (tail > (u32)cq->ibcq.cqe) 485 tail = (u32)cq->ibcq.cqe; 486 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 487 if (tail == wc->head) 488 break; 489 /* The kernel doesn't need a RMB since it has the lock. */ 490 trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled); 491 *entry = wc->kqueue[tail]; 492 if (tail >= cq->ibcq.cqe) 493 tail = 0; 494 else 495 tail++; 496 } 497 wc->tail = tail; 498 499 spin_unlock_irqrestore(&cq->lock, flags); 500 501 return npolled; 502 } 503 504 /** 505 * rvt_driver_cq_init - Init cq resources on behalf of driver 506 * @rdi: rvt dev structure 507 * 508 * Return: 0 on success 509 */ 510 int rvt_driver_cq_init(struct rvt_dev_info *rdi) 511 { 512 int cpu; 513 struct kthread_worker *worker; 514 515 if (rdi->worker) 516 return 0; 517 518 spin_lock_init(&rdi->n_cqs_lock); 519 520 cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); 521 worker = kthread_create_worker_on_cpu(cpu, 0, 522 "%s", rdi->dparms.cq_name); 523 if (IS_ERR(worker)) 524 return PTR_ERR(worker); 525 526 set_user_nice(worker->task, MIN_NICE); 527 rdi->worker = worker; 528 return 0; 529 } 530 531 /** 532 * rvt_cq_exit - tear down cq reources 533 * @rdi: rvt dev structure 534 */ 535 void rvt_cq_exit(struct rvt_dev_info *rdi) 536 { 537 struct kthread_worker *worker; 538 539 /* block future queuing from send_complete() */ 540 spin_lock_irq(&rdi->n_cqs_lock); 541 worker = rdi->worker; 542 if (!worker) { 543 spin_unlock_irq(&rdi->n_cqs_lock); 544 return; 545 } 546 rdi->worker = NULL; 547 spin_unlock_irq(&rdi->n_cqs_lock); 548 549 kthread_destroy_worker(worker); 550 } 551