1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <linux/kthread.h> 51 #include "cq.h" 52 #include "vt.h" 53 54 /** 55 * rvt_cq_enter - add a new entry to the completion queue 56 * @cq: completion queue 57 * @entry: work completion entry to add 58 * @sig: true if @entry is solicited 59 * 60 * This may be called with qp->s_lock held. 61 */ 62 void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) 63 { 64 struct rvt_cq_wc *wc; 65 unsigned long flags; 66 u32 head; 67 u32 next; 68 69 spin_lock_irqsave(&cq->lock, flags); 70 71 /* 72 * Note that the head pointer might be writable by user processes. 73 * Take care to verify it is a sane value. 74 */ 75 wc = cq->queue; 76 head = wc->head; 77 if (head >= (unsigned)cq->ibcq.cqe) { 78 head = cq->ibcq.cqe; 79 next = 0; 80 } else { 81 next = head + 1; 82 } 83 84 if (unlikely(next == wc->tail)) { 85 spin_unlock_irqrestore(&cq->lock, flags); 86 if (cq->ibcq.event_handler) { 87 struct ib_event ev; 88 89 ev.device = cq->ibcq.device; 90 ev.element.cq = &cq->ibcq; 91 ev.event = IB_EVENT_CQ_ERR; 92 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 93 } 94 return; 95 } 96 if (cq->ip) { 97 wc->uqueue[head].wr_id = entry->wr_id; 98 wc->uqueue[head].status = entry->status; 99 wc->uqueue[head].opcode = entry->opcode; 100 wc->uqueue[head].vendor_err = entry->vendor_err; 101 wc->uqueue[head].byte_len = entry->byte_len; 102 wc->uqueue[head].ex.imm_data = 103 (__u32 __force)entry->ex.imm_data; 104 wc->uqueue[head].qp_num = entry->qp->qp_num; 105 wc->uqueue[head].src_qp = entry->src_qp; 106 wc->uqueue[head].wc_flags = entry->wc_flags; 107 wc->uqueue[head].pkey_index = entry->pkey_index; 108 wc->uqueue[head].slid = entry->slid; 109 wc->uqueue[head].sl = entry->sl; 110 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; 111 wc->uqueue[head].port_num = entry->port_num; 112 /* Make sure entry is written before the head index. */ 113 smp_wmb(); 114 } else { 115 wc->kqueue[head] = *entry; 116 } 117 wc->head = next; 118 119 if (cq->notify == IB_CQ_NEXT_COMP || 120 (cq->notify == IB_CQ_SOLICITED && 121 (solicited || entry->status != IB_WC_SUCCESS))) { 122 struct kthread_worker *worker; 123 /* 124 * This will cause send_complete() to be called in 125 * another thread. 126 */ 127 smp_read_barrier_depends(); /* see rvt_cq_exit */ 128 worker = cq->rdi->worker; 129 if (likely(worker)) { 130 cq->notify = RVT_CQ_NONE; 131 cq->triggered++; 132 queue_kthread_work(worker, &cq->comptask); 133 } 134 } 135 136 spin_unlock_irqrestore(&cq->lock, flags); 137 } 138 EXPORT_SYMBOL(rvt_cq_enter); 139 140 static void send_complete(struct kthread_work *work) 141 { 142 struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask); 143 144 /* 145 * The completion handler will most likely rearm the notification 146 * and poll for all pending entries. If a new completion entry 147 * is added while we are in this routine, queue_work() 148 * won't call us again until we return so we check triggered to 149 * see if we need to call the handler again. 150 */ 151 for (;;) { 152 u8 triggered = cq->triggered; 153 154 /* 155 * IPoIB connected mode assumes the callback is from a 156 * soft IRQ. We simulate this by blocking "bottom halves". 157 * See the implementation for ipoib_cm_handle_tx_wc(), 158 * netif_tx_lock_bh() and netif_tx_lock(). 159 */ 160 local_bh_disable(); 161 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 162 local_bh_enable(); 163 164 if (cq->triggered == triggered) 165 return; 166 } 167 } 168 169 /** 170 * rvt_create_cq - create a completion queue 171 * @ibdev: the device this completion queue is attached to 172 * @attr: creation attributes 173 * @context: unused by the QLogic_IB driver 174 * @udata: user data for libibverbs.so 175 * 176 * Called by ib_create_cq() in the generic verbs code. 177 * 178 * Return: pointer to the completion queue or negative errno values 179 * for failure. 180 */ 181 struct ib_cq *rvt_create_cq(struct ib_device *ibdev, 182 const struct ib_cq_init_attr *attr, 183 struct ib_ucontext *context, 184 struct ib_udata *udata) 185 { 186 struct rvt_dev_info *rdi = ib_to_rvt(ibdev); 187 struct rvt_cq *cq; 188 struct rvt_cq_wc *wc; 189 struct ib_cq *ret; 190 u32 sz; 191 unsigned int entries = attr->cqe; 192 193 if (attr->flags) 194 return ERR_PTR(-EINVAL); 195 196 if (entries < 1 || entries > rdi->dparms.props.max_cqe) 197 return ERR_PTR(-EINVAL); 198 199 /* Allocate the completion queue structure. */ 200 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 201 if (!cq) 202 return ERR_PTR(-ENOMEM); 203 204 /* 205 * Allocate the completion queue entries and head/tail pointers. 206 * This is allocated separately so that it can be resized and 207 * also mapped into user space. 208 * We need to use vmalloc() in order to support mmap and large 209 * numbers of entries. 210 */ 211 sz = sizeof(*wc); 212 if (udata && udata->outlen >= sizeof(__u64)) 213 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 214 else 215 sz += sizeof(struct ib_wc) * (entries + 1); 216 wc = vmalloc_user(sz); 217 if (!wc) { 218 ret = ERR_PTR(-ENOMEM); 219 goto bail_cq; 220 } 221 222 /* 223 * Return the address of the WC as the offset to mmap. 224 * See rvt_mmap() for details. 225 */ 226 if (udata && udata->outlen >= sizeof(__u64)) { 227 int err; 228 229 cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); 230 if (!cq->ip) { 231 ret = ERR_PTR(-ENOMEM); 232 goto bail_wc; 233 } 234 235 err = ib_copy_to_udata(udata, &cq->ip->offset, 236 sizeof(cq->ip->offset)); 237 if (err) { 238 ret = ERR_PTR(err); 239 goto bail_ip; 240 } 241 } 242 243 spin_lock(&rdi->n_cqs_lock); 244 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { 245 spin_unlock(&rdi->n_cqs_lock); 246 ret = ERR_PTR(-ENOMEM); 247 goto bail_ip; 248 } 249 250 rdi->n_cqs_allocated++; 251 spin_unlock(&rdi->n_cqs_lock); 252 253 if (cq->ip) { 254 spin_lock_irq(&rdi->pending_lock); 255 list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps); 256 spin_unlock_irq(&rdi->pending_lock); 257 } 258 259 /* 260 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 261 * The number of entries should be >= the number requested or return 262 * an error. 263 */ 264 cq->rdi = rdi; 265 cq->ibcq.cqe = entries; 266 cq->notify = RVT_CQ_NONE; 267 spin_lock_init(&cq->lock); 268 init_kthread_work(&cq->comptask, send_complete); 269 cq->queue = wc; 270 271 ret = &cq->ibcq; 272 273 goto done; 274 275 bail_ip: 276 kfree(cq->ip); 277 bail_wc: 278 vfree(wc); 279 bail_cq: 280 kfree(cq); 281 done: 282 return ret; 283 } 284 285 /** 286 * rvt_destroy_cq - destroy a completion queue 287 * @ibcq: the completion queue to destroy. 288 * 289 * Called by ib_destroy_cq() in the generic verbs code. 290 * 291 * Return: always 0 292 */ 293 int rvt_destroy_cq(struct ib_cq *ibcq) 294 { 295 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 296 struct rvt_dev_info *rdi = cq->rdi; 297 298 flush_kthread_work(&cq->comptask); 299 spin_lock(&rdi->n_cqs_lock); 300 rdi->n_cqs_allocated--; 301 spin_unlock(&rdi->n_cqs_lock); 302 if (cq->ip) 303 kref_put(&cq->ip->ref, rvt_release_mmap_info); 304 else 305 vfree(cq->queue); 306 kfree(cq); 307 308 return 0; 309 } 310 311 /** 312 * rvt_req_notify_cq - change the notification type for a completion queue 313 * @ibcq: the completion queue 314 * @notify_flags: the type of notification to request 315 * 316 * This may be called from interrupt context. Also called by 317 * ib_req_notify_cq() in the generic verbs code. 318 * 319 * Return: 0 for success. 320 */ 321 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) 322 { 323 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 324 unsigned long flags; 325 int ret = 0; 326 327 spin_lock_irqsave(&cq->lock, flags); 328 /* 329 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 330 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). 331 */ 332 if (cq->notify != IB_CQ_NEXT_COMP) 333 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; 334 335 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 336 cq->queue->head != cq->queue->tail) 337 ret = 1; 338 339 spin_unlock_irqrestore(&cq->lock, flags); 340 341 return ret; 342 } 343 344 /** 345 * rvt_resize_cq - change the size of the CQ 346 * @ibcq: the completion queue 347 * 348 * Return: 0 for success. 349 */ 350 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 351 { 352 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 353 struct rvt_cq_wc *old_wc; 354 struct rvt_cq_wc *wc; 355 u32 head, tail, n; 356 int ret; 357 u32 sz; 358 struct rvt_dev_info *rdi = cq->rdi; 359 360 if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) 361 return -EINVAL; 362 363 /* 364 * Need to use vmalloc() if we want to support large #s of entries. 365 */ 366 sz = sizeof(*wc); 367 if (udata && udata->outlen >= sizeof(__u64)) 368 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 369 else 370 sz += sizeof(struct ib_wc) * (cqe + 1); 371 wc = vmalloc_user(sz); 372 if (!wc) 373 return -ENOMEM; 374 375 /* Check that we can write the offset to mmap. */ 376 if (udata && udata->outlen >= sizeof(__u64)) { 377 __u64 offset = 0; 378 379 ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); 380 if (ret) 381 goto bail_free; 382 } 383 384 spin_lock_irq(&cq->lock); 385 /* 386 * Make sure head and tail are sane since they 387 * might be user writable. 388 */ 389 old_wc = cq->queue; 390 head = old_wc->head; 391 if (head > (u32)cq->ibcq.cqe) 392 head = (u32)cq->ibcq.cqe; 393 tail = old_wc->tail; 394 if (tail > (u32)cq->ibcq.cqe) 395 tail = (u32)cq->ibcq.cqe; 396 if (head < tail) 397 n = cq->ibcq.cqe + 1 + head - tail; 398 else 399 n = head - tail; 400 if (unlikely((u32)cqe < n)) { 401 ret = -EINVAL; 402 goto bail_unlock; 403 } 404 for (n = 0; tail != head; n++) { 405 if (cq->ip) 406 wc->uqueue[n] = old_wc->uqueue[tail]; 407 else 408 wc->kqueue[n] = old_wc->kqueue[tail]; 409 if (tail == (u32)cq->ibcq.cqe) 410 tail = 0; 411 else 412 tail++; 413 } 414 cq->ibcq.cqe = cqe; 415 wc->head = n; 416 wc->tail = 0; 417 cq->queue = wc; 418 spin_unlock_irq(&cq->lock); 419 420 vfree(old_wc); 421 422 if (cq->ip) { 423 struct rvt_mmap_info *ip = cq->ip; 424 425 rvt_update_mmap_info(rdi, ip, sz, wc); 426 427 /* 428 * Return the offset to mmap. 429 * See rvt_mmap() for details. 430 */ 431 if (udata && udata->outlen >= sizeof(__u64)) { 432 ret = ib_copy_to_udata(udata, &ip->offset, 433 sizeof(ip->offset)); 434 if (ret) 435 return ret; 436 } 437 438 spin_lock_irq(&rdi->pending_lock); 439 if (list_empty(&ip->pending_mmaps)) 440 list_add(&ip->pending_mmaps, &rdi->pending_mmaps); 441 spin_unlock_irq(&rdi->pending_lock); 442 } 443 444 return 0; 445 446 bail_unlock: 447 spin_unlock_irq(&cq->lock); 448 bail_free: 449 vfree(wc); 450 return ret; 451 } 452 453 /** 454 * rvt_poll_cq - poll for work completion entries 455 * @ibcq: the completion queue to poll 456 * @num_entries: the maximum number of entries to return 457 * @entry: pointer to array where work completions are placed 458 * 459 * This may be called from interrupt context. Also called by ib_poll_cq() 460 * in the generic verbs code. 461 * 462 * Return: the number of completion entries polled. 463 */ 464 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 465 { 466 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 467 struct rvt_cq_wc *wc; 468 unsigned long flags; 469 int npolled; 470 u32 tail; 471 472 /* The kernel can only poll a kernel completion queue */ 473 if (cq->ip) 474 return -EINVAL; 475 476 spin_lock_irqsave(&cq->lock, flags); 477 478 wc = cq->queue; 479 tail = wc->tail; 480 if (tail > (u32)cq->ibcq.cqe) 481 tail = (u32)cq->ibcq.cqe; 482 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 483 if (tail == wc->head) 484 break; 485 /* The kernel doesn't need a RMB since it has the lock. */ 486 *entry = wc->kqueue[tail]; 487 if (tail >= cq->ibcq.cqe) 488 tail = 0; 489 else 490 tail++; 491 } 492 wc->tail = tail; 493 494 spin_unlock_irqrestore(&cq->lock, flags); 495 496 return npolled; 497 } 498 499 /** 500 * rvt_driver_cq_init - Init cq resources on behalf of driver 501 * @rdi: rvt dev structure 502 * 503 * Return: 0 on success 504 */ 505 int rvt_driver_cq_init(struct rvt_dev_info *rdi) 506 { 507 int ret = 0; 508 int cpu; 509 struct task_struct *task; 510 511 if (rdi->worker) 512 return 0; 513 spin_lock_init(&rdi->n_cqs_lock); 514 rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); 515 if (!rdi->worker) 516 return -ENOMEM; 517 init_kthread_worker(rdi->worker); 518 task = kthread_create_on_node( 519 kthread_worker_fn, 520 rdi->worker, 521 rdi->dparms.node, 522 "%s", rdi->dparms.cq_name); 523 if (IS_ERR(task)) { 524 kfree(rdi->worker); 525 rdi->worker = NULL; 526 return PTR_ERR(task); 527 } 528 529 set_user_nice(task, MIN_NICE); 530 cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); 531 kthread_bind(task, cpu); 532 wake_up_process(task); 533 return ret; 534 } 535 536 /** 537 * rvt_cq_exit - tear down cq reources 538 * @rdi: rvt dev structure 539 */ 540 void rvt_cq_exit(struct rvt_dev_info *rdi) 541 { 542 struct kthread_worker *worker; 543 544 worker = rdi->worker; 545 if (!worker) 546 return; 547 /* blocks future queuing from send_complete() */ 548 rdi->worker = NULL; 549 smp_wmb(); /* See rdi_cq_enter */ 550 flush_kthread_worker(worker); 551 kthread_stop(worker->task); 552 kfree(worker); 553 } 554