1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <linux/kthread.h> 51 #include "cq.h" 52 #include "vt.h" 53 54 /** 55 * rvt_cq_enter - add a new entry to the completion queue 56 * @cq: completion queue 57 * @entry: work completion entry to add 58 * @sig: true if @entry is solicited 59 * 60 * This may be called with qp->s_lock held. 61 */ 62 void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) 63 { 64 struct rvt_cq_wc *wc; 65 unsigned long flags; 66 u32 head; 67 u32 next; 68 69 spin_lock_irqsave(&cq->lock, flags); 70 71 /* 72 * Note that the head pointer might be writable by user processes. 73 * Take care to verify it is a sane value. 74 */ 75 wc = cq->queue; 76 head = wc->head; 77 if (head >= (unsigned)cq->ibcq.cqe) { 78 head = cq->ibcq.cqe; 79 next = 0; 80 } else { 81 next = head + 1; 82 } 83 84 if (unlikely(next == wc->tail)) { 85 spin_unlock_irqrestore(&cq->lock, flags); 86 if (cq->ibcq.event_handler) { 87 struct ib_event ev; 88 89 ev.device = cq->ibcq.device; 90 ev.element.cq = &cq->ibcq; 91 ev.event = IB_EVENT_CQ_ERR; 92 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 93 } 94 return; 95 } 96 if (cq->ip) { 97 wc->uqueue[head].wr_id = entry->wr_id; 98 wc->uqueue[head].status = entry->status; 99 wc->uqueue[head].opcode = entry->opcode; 100 wc->uqueue[head].vendor_err = entry->vendor_err; 101 wc->uqueue[head].byte_len = entry->byte_len; 102 wc->uqueue[head].ex.imm_data = 103 (__u32 __force)entry->ex.imm_data; 104 wc->uqueue[head].qp_num = entry->qp->qp_num; 105 wc->uqueue[head].src_qp = entry->src_qp; 106 wc->uqueue[head].wc_flags = entry->wc_flags; 107 wc->uqueue[head].pkey_index = entry->pkey_index; 108 wc->uqueue[head].slid = entry->slid; 109 wc->uqueue[head].sl = entry->sl; 110 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; 111 wc->uqueue[head].port_num = entry->port_num; 112 /* Make sure entry is written before the head index. */ 113 smp_wmb(); 114 } else { 115 wc->kqueue[head] = *entry; 116 } 117 wc->head = next; 118 119 if (cq->notify == IB_CQ_NEXT_COMP || 120 (cq->notify == IB_CQ_SOLICITED && 121 (solicited || entry->status != IB_WC_SUCCESS))) { 122 /* 123 * This will cause send_complete() to be called in 124 * another thread. 125 */ 126 spin_lock(&cq->rdi->n_cqs_lock); 127 if (likely(cq->rdi->worker)) { 128 cq->notify = RVT_CQ_NONE; 129 cq->triggered++; 130 kthread_queue_work(cq->rdi->worker, &cq->comptask); 131 } 132 spin_unlock(&cq->rdi->n_cqs_lock); 133 } 134 135 spin_unlock_irqrestore(&cq->lock, flags); 136 } 137 EXPORT_SYMBOL(rvt_cq_enter); 138 139 static void send_complete(struct kthread_work *work) 140 { 141 struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask); 142 143 /* 144 * The completion handler will most likely rearm the notification 145 * and poll for all pending entries. If a new completion entry 146 * is added while we are in this routine, queue_work() 147 * won't call us again until we return so we check triggered to 148 * see if we need to call the handler again. 149 */ 150 for (;;) { 151 u8 triggered = cq->triggered; 152 153 /* 154 * IPoIB connected mode assumes the callback is from a 155 * soft IRQ. We simulate this by blocking "bottom halves". 156 * See the implementation for ipoib_cm_handle_tx_wc(), 157 * netif_tx_lock_bh() and netif_tx_lock(). 158 */ 159 local_bh_disable(); 160 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 161 local_bh_enable(); 162 163 if (cq->triggered == triggered) 164 return; 165 } 166 } 167 168 /** 169 * rvt_create_cq - create a completion queue 170 * @ibdev: the device this completion queue is attached to 171 * @attr: creation attributes 172 * @context: unused by the QLogic_IB driver 173 * @udata: user data for libibverbs.so 174 * 175 * Called by ib_create_cq() in the generic verbs code. 176 * 177 * Return: pointer to the completion queue or negative errno values 178 * for failure. 179 */ 180 struct ib_cq *rvt_create_cq(struct ib_device *ibdev, 181 const struct ib_cq_init_attr *attr, 182 struct ib_ucontext *context, 183 struct ib_udata *udata) 184 { 185 struct rvt_dev_info *rdi = ib_to_rvt(ibdev); 186 struct rvt_cq *cq; 187 struct rvt_cq_wc *wc; 188 struct ib_cq *ret; 189 u32 sz; 190 unsigned int entries = attr->cqe; 191 192 if (attr->flags) 193 return ERR_PTR(-EINVAL); 194 195 if (entries < 1 || entries > rdi->dparms.props.max_cqe) 196 return ERR_PTR(-EINVAL); 197 198 /* Allocate the completion queue structure. */ 199 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 200 if (!cq) 201 return ERR_PTR(-ENOMEM); 202 203 /* 204 * Allocate the completion queue entries and head/tail pointers. 205 * This is allocated separately so that it can be resized and 206 * also mapped into user space. 207 * We need to use vmalloc() in order to support mmap and large 208 * numbers of entries. 209 */ 210 sz = sizeof(*wc); 211 if (udata && udata->outlen >= sizeof(__u64)) 212 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 213 else 214 sz += sizeof(struct ib_wc) * (entries + 1); 215 wc = vmalloc_user(sz); 216 if (!wc) { 217 ret = ERR_PTR(-ENOMEM); 218 goto bail_cq; 219 } 220 221 /* 222 * Return the address of the WC as the offset to mmap. 223 * See rvt_mmap() for details. 224 */ 225 if (udata && udata->outlen >= sizeof(__u64)) { 226 int err; 227 228 cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); 229 if (!cq->ip) { 230 ret = ERR_PTR(-ENOMEM); 231 goto bail_wc; 232 } 233 234 err = ib_copy_to_udata(udata, &cq->ip->offset, 235 sizeof(cq->ip->offset)); 236 if (err) { 237 ret = ERR_PTR(err); 238 goto bail_ip; 239 } 240 } 241 242 spin_lock_irq(&rdi->n_cqs_lock); 243 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { 244 spin_unlock_irq(&rdi->n_cqs_lock); 245 ret = ERR_PTR(-ENOMEM); 246 goto bail_ip; 247 } 248 249 rdi->n_cqs_allocated++; 250 spin_unlock_irq(&rdi->n_cqs_lock); 251 252 if (cq->ip) { 253 spin_lock_irq(&rdi->pending_lock); 254 list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps); 255 spin_unlock_irq(&rdi->pending_lock); 256 } 257 258 /* 259 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 260 * The number of entries should be >= the number requested or return 261 * an error. 262 */ 263 cq->rdi = rdi; 264 cq->ibcq.cqe = entries; 265 cq->notify = RVT_CQ_NONE; 266 spin_lock_init(&cq->lock); 267 kthread_init_work(&cq->comptask, send_complete); 268 cq->queue = wc; 269 270 ret = &cq->ibcq; 271 272 goto done; 273 274 bail_ip: 275 kfree(cq->ip); 276 bail_wc: 277 vfree(wc); 278 bail_cq: 279 kfree(cq); 280 done: 281 return ret; 282 } 283 284 /** 285 * rvt_destroy_cq - destroy a completion queue 286 * @ibcq: the completion queue to destroy. 287 * 288 * Called by ib_destroy_cq() in the generic verbs code. 289 * 290 * Return: always 0 291 */ 292 int rvt_destroy_cq(struct ib_cq *ibcq) 293 { 294 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 295 struct rvt_dev_info *rdi = cq->rdi; 296 297 kthread_flush_work(&cq->comptask); 298 spin_lock_irq(&rdi->n_cqs_lock); 299 rdi->n_cqs_allocated--; 300 spin_unlock_irq(&rdi->n_cqs_lock); 301 if (cq->ip) 302 kref_put(&cq->ip->ref, rvt_release_mmap_info); 303 else 304 vfree(cq->queue); 305 kfree(cq); 306 307 return 0; 308 } 309 310 /** 311 * rvt_req_notify_cq - change the notification type for a completion queue 312 * @ibcq: the completion queue 313 * @notify_flags: the type of notification to request 314 * 315 * This may be called from interrupt context. Also called by 316 * ib_req_notify_cq() in the generic verbs code. 317 * 318 * Return: 0 for success. 319 */ 320 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) 321 { 322 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 323 unsigned long flags; 324 int ret = 0; 325 326 spin_lock_irqsave(&cq->lock, flags); 327 /* 328 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 329 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). 330 */ 331 if (cq->notify != IB_CQ_NEXT_COMP) 332 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; 333 334 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 335 cq->queue->head != cq->queue->tail) 336 ret = 1; 337 338 spin_unlock_irqrestore(&cq->lock, flags); 339 340 return ret; 341 } 342 343 /** 344 * rvt_resize_cq - change the size of the CQ 345 * @ibcq: the completion queue 346 * 347 * Return: 0 for success. 348 */ 349 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 350 { 351 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 352 struct rvt_cq_wc *old_wc; 353 struct rvt_cq_wc *wc; 354 u32 head, tail, n; 355 int ret; 356 u32 sz; 357 struct rvt_dev_info *rdi = cq->rdi; 358 359 if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) 360 return -EINVAL; 361 362 /* 363 * Need to use vmalloc() if we want to support large #s of entries. 364 */ 365 sz = sizeof(*wc); 366 if (udata && udata->outlen >= sizeof(__u64)) 367 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 368 else 369 sz += sizeof(struct ib_wc) * (cqe + 1); 370 wc = vmalloc_user(sz); 371 if (!wc) 372 return -ENOMEM; 373 374 /* Check that we can write the offset to mmap. */ 375 if (udata && udata->outlen >= sizeof(__u64)) { 376 __u64 offset = 0; 377 378 ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); 379 if (ret) 380 goto bail_free; 381 } 382 383 spin_lock_irq(&cq->lock); 384 /* 385 * Make sure head and tail are sane since they 386 * might be user writable. 387 */ 388 old_wc = cq->queue; 389 head = old_wc->head; 390 if (head > (u32)cq->ibcq.cqe) 391 head = (u32)cq->ibcq.cqe; 392 tail = old_wc->tail; 393 if (tail > (u32)cq->ibcq.cqe) 394 tail = (u32)cq->ibcq.cqe; 395 if (head < tail) 396 n = cq->ibcq.cqe + 1 + head - tail; 397 else 398 n = head - tail; 399 if (unlikely((u32)cqe < n)) { 400 ret = -EINVAL; 401 goto bail_unlock; 402 } 403 for (n = 0; tail != head; n++) { 404 if (cq->ip) 405 wc->uqueue[n] = old_wc->uqueue[tail]; 406 else 407 wc->kqueue[n] = old_wc->kqueue[tail]; 408 if (tail == (u32)cq->ibcq.cqe) 409 tail = 0; 410 else 411 tail++; 412 } 413 cq->ibcq.cqe = cqe; 414 wc->head = n; 415 wc->tail = 0; 416 cq->queue = wc; 417 spin_unlock_irq(&cq->lock); 418 419 vfree(old_wc); 420 421 if (cq->ip) { 422 struct rvt_mmap_info *ip = cq->ip; 423 424 rvt_update_mmap_info(rdi, ip, sz, wc); 425 426 /* 427 * Return the offset to mmap. 428 * See rvt_mmap() for details. 429 */ 430 if (udata && udata->outlen >= sizeof(__u64)) { 431 ret = ib_copy_to_udata(udata, &ip->offset, 432 sizeof(ip->offset)); 433 if (ret) 434 return ret; 435 } 436 437 spin_lock_irq(&rdi->pending_lock); 438 if (list_empty(&ip->pending_mmaps)) 439 list_add(&ip->pending_mmaps, &rdi->pending_mmaps); 440 spin_unlock_irq(&rdi->pending_lock); 441 } 442 443 return 0; 444 445 bail_unlock: 446 spin_unlock_irq(&cq->lock); 447 bail_free: 448 vfree(wc); 449 return ret; 450 } 451 452 /** 453 * rvt_poll_cq - poll for work completion entries 454 * @ibcq: the completion queue to poll 455 * @num_entries: the maximum number of entries to return 456 * @entry: pointer to array where work completions are placed 457 * 458 * This may be called from interrupt context. Also called by ib_poll_cq() 459 * in the generic verbs code. 460 * 461 * Return: the number of completion entries polled. 462 */ 463 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 464 { 465 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 466 struct rvt_cq_wc *wc; 467 unsigned long flags; 468 int npolled; 469 u32 tail; 470 471 /* The kernel can only poll a kernel completion queue */ 472 if (cq->ip) 473 return -EINVAL; 474 475 spin_lock_irqsave(&cq->lock, flags); 476 477 wc = cq->queue; 478 tail = wc->tail; 479 if (tail > (u32)cq->ibcq.cqe) 480 tail = (u32)cq->ibcq.cqe; 481 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 482 if (tail == wc->head) 483 break; 484 /* The kernel doesn't need a RMB since it has the lock. */ 485 *entry = wc->kqueue[tail]; 486 if (tail >= cq->ibcq.cqe) 487 tail = 0; 488 else 489 tail++; 490 } 491 wc->tail = tail; 492 493 spin_unlock_irqrestore(&cq->lock, flags); 494 495 return npolled; 496 } 497 498 /** 499 * rvt_driver_cq_init - Init cq resources on behalf of driver 500 * @rdi: rvt dev structure 501 * 502 * Return: 0 on success 503 */ 504 int rvt_driver_cq_init(struct rvt_dev_info *rdi) 505 { 506 int cpu; 507 struct kthread_worker *worker; 508 509 if (rdi->worker) 510 return 0; 511 512 spin_lock_init(&rdi->n_cqs_lock); 513 514 cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); 515 worker = kthread_create_worker_on_cpu(cpu, 0, 516 "%s", rdi->dparms.cq_name); 517 if (IS_ERR(worker)) 518 return PTR_ERR(worker); 519 520 set_user_nice(worker->task, MIN_NICE); 521 rdi->worker = worker; 522 return 0; 523 } 524 525 /** 526 * rvt_cq_exit - tear down cq reources 527 * @rdi: rvt dev structure 528 */ 529 void rvt_cq_exit(struct rvt_dev_info *rdi) 530 { 531 struct kthread_worker *worker; 532 533 /* block future queuing from send_complete() */ 534 spin_lock_irq(&rdi->n_cqs_lock); 535 worker = rdi->worker; 536 if (!worker) { 537 spin_unlock_irq(&rdi->n_cqs_lock); 538 return; 539 } 540 rdi->worker = NULL; 541 spin_unlock_irq(&rdi->n_cqs_lock); 542 543 kthread_destroy_worker(worker); 544 } 545