1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <asm/page.h> 47 #include <linux/io.h> 48 #include <linux/wait.h> 49 #include <rdma/ib_addr.h> 50 #include <rdma/ib_smi.h> 51 #include <rdma/ib_user_verbs.h> 52 53 #include "pvrdma.h" 54 55 /** 56 * pvrdma_req_notify_cq - request notification for a completion queue 57 * @ibcq: the completion queue 58 * @notify_flags: notification flags 59 * 60 * @return: 0 for success. 61 */ 62 int pvrdma_req_notify_cq(struct ib_cq *ibcq, 63 enum ib_cq_notify_flags notify_flags) 64 { 65 struct pvrdma_dev *dev = to_vdev(ibcq->device); 66 struct pvrdma_cq *cq = to_vcq(ibcq); 67 u32 val = cq->cq_handle; 68 69 val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 70 PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM; 71 72 pvrdma_write_uar_cq(dev, val); 73 74 return 0; 75 } 76 77 /** 78 * pvrdma_create_cq - create completion queue 79 * @ibdev: the device 80 * @attr: completion queue attributes 81 * @context: user context 82 * @udata: user data 83 * 84 * @return: ib_cq completion queue pointer on success, 85 * otherwise returns negative errno. 86 */ 87 struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, 88 const struct ib_cq_init_attr *attr, 89 struct ib_ucontext *context, 90 struct ib_udata *udata) 91 { 92 int entries = attr->cqe; 93 struct pvrdma_dev *dev = to_vdev(ibdev); 94 struct pvrdma_cq *cq; 95 int ret; 96 int npages; 97 unsigned long flags; 98 union pvrdma_cmd_req req; 99 union pvrdma_cmd_resp rsp; 100 struct pvrdma_cmd_create_cq *cmd = &req.create_cq; 101 struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; 102 struct pvrdma_create_cq ucmd; 103 104 BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); 105 106 entries = roundup_pow_of_two(entries); 107 if (entries < 1 || entries > dev->dsr->caps.max_cqe) 108 return ERR_PTR(-EINVAL); 109 110 if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq)) 111 return ERR_PTR(-ENOMEM); 112 113 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 114 if (!cq) { 115 atomic_dec(&dev->num_cqs); 116 return ERR_PTR(-ENOMEM); 117 } 118 119 cq->ibcq.cqe = entries; 120 121 if (context) { 122 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { 123 ret = -EFAULT; 124 goto err_cq; 125 } 126 127 cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size, 128 IB_ACCESS_LOCAL_WRITE, 1); 129 if (IS_ERR(cq->umem)) { 130 ret = PTR_ERR(cq->umem); 131 goto err_cq; 132 } 133 134 npages = ib_umem_page_count(cq->umem); 135 } else { 136 cq->is_kernel = true; 137 138 /* One extra page for shared ring state */ 139 npages = 1 + (entries * sizeof(struct pvrdma_cqe) + 140 PAGE_SIZE - 1) / PAGE_SIZE; 141 142 /* Skip header page. */ 143 cq->offset = PAGE_SIZE; 144 } 145 146 if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { 147 dev_warn(&dev->pdev->dev, 148 "overflow pages in completion queue\n"); 149 ret = -EINVAL; 150 goto err_umem; 151 } 152 153 ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel); 154 if (ret) { 155 dev_warn(&dev->pdev->dev, 156 "could not allocate page directory\n"); 157 goto err_umem; 158 } 159 160 /* Ring state is always the first page. Set in library for user cq. */ 161 if (cq->is_kernel) 162 cq->ring_state = cq->pdir.pages[0]; 163 else 164 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); 165 166 atomic_set(&cq->refcnt, 1); 167 init_waitqueue_head(&cq->wait); 168 spin_lock_init(&cq->cq_lock); 169 170 memset(cmd, 0, sizeof(*cmd)); 171 cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ; 172 cmd->nchunks = npages; 173 cmd->ctx_handle = (context) ? 174 (u64)to_vucontext(context)->ctx_handle : 0; 175 cmd->cqe = entries; 176 cmd->pdir_dma = cq->pdir.dir_dma; 177 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP); 178 if (ret < 0) { 179 dev_warn(&dev->pdev->dev, 180 "could not create completion queue, error: %d\n", ret); 181 goto err_page_dir; 182 } 183 184 cq->ibcq.cqe = resp->cqe; 185 cq->cq_handle = resp->cq_handle; 186 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 187 dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; 188 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 189 190 if (context) { 191 cq->uar = &(to_vucontext(context)->uar); 192 193 /* Copy udata back. */ 194 if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) { 195 dev_warn(&dev->pdev->dev, 196 "failed to copy back udata\n"); 197 pvrdma_destroy_cq(&cq->ibcq); 198 return ERR_PTR(-EINVAL); 199 } 200 } 201 202 return &cq->ibcq; 203 204 err_page_dir: 205 pvrdma_page_dir_cleanup(dev, &cq->pdir); 206 err_umem: 207 if (context) 208 ib_umem_release(cq->umem); 209 err_cq: 210 atomic_dec(&dev->num_cqs); 211 kfree(cq); 212 213 return ERR_PTR(ret); 214 } 215 216 static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) 217 { 218 atomic_dec(&cq->refcnt); 219 wait_event(cq->wait, !atomic_read(&cq->refcnt)); 220 221 if (!cq->is_kernel) 222 ib_umem_release(cq->umem); 223 224 pvrdma_page_dir_cleanup(dev, &cq->pdir); 225 kfree(cq); 226 } 227 228 /** 229 * pvrdma_destroy_cq - destroy completion queue 230 * @cq: the completion queue to destroy. 231 * 232 * @return: 0 for success. 233 */ 234 int pvrdma_destroy_cq(struct ib_cq *cq) 235 { 236 struct pvrdma_cq *vcq = to_vcq(cq); 237 union pvrdma_cmd_req req; 238 struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq; 239 struct pvrdma_dev *dev = to_vdev(cq->device); 240 unsigned long flags; 241 int ret; 242 243 memset(cmd, 0, sizeof(*cmd)); 244 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ; 245 cmd->cq_handle = vcq->cq_handle; 246 247 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 248 if (ret < 0) 249 dev_warn(&dev->pdev->dev, 250 "could not destroy completion queue, error: %d\n", 251 ret); 252 253 /* free cq's resources */ 254 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 255 dev->cq_tbl[vcq->cq_handle] = NULL; 256 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 257 258 pvrdma_free_cq(dev, vcq); 259 atomic_dec(&dev->num_cqs); 260 261 return ret; 262 } 263 264 /** 265 * pvrdma_modify_cq - modify the CQ moderation parameters 266 * @ibcq: the CQ to modify 267 * @cq_count: number of CQEs that will trigger an event 268 * @cq_period: max period of time in usec before triggering an event 269 * 270 * @return: -EOPNOTSUPP as CQ resize is not supported. 271 */ 272 int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 273 { 274 return -EOPNOTSUPP; 275 } 276 277 static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) 278 { 279 return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( 280 &cq->pdir, 281 cq->offset + 282 sizeof(struct pvrdma_cqe) * i); 283 } 284 285 void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq) 286 { 287 int head; 288 int has_data; 289 290 if (!cq->is_kernel) 291 return; 292 293 /* Lock held */ 294 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, 295 cq->ibcq.cqe, &head); 296 if (unlikely(has_data > 0)) { 297 int items; 298 int curr; 299 int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail, 300 cq->ibcq.cqe); 301 struct pvrdma_cqe *cqe; 302 struct pvrdma_cqe *curr_cqe; 303 304 items = (tail > head) ? (tail - head) : 305 (cq->ibcq.cqe - head + tail); 306 curr = --tail; 307 while (items-- > 0) { 308 if (curr < 0) 309 curr = cq->ibcq.cqe - 1; 310 if (tail < 0) 311 tail = cq->ibcq.cqe - 1; 312 curr_cqe = get_cqe(cq, curr); 313 if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) { 314 if (curr != tail) { 315 cqe = get_cqe(cq, tail); 316 *cqe = *curr_cqe; 317 } 318 tail--; 319 } else { 320 pvrdma_idx_ring_inc( 321 &cq->ring_state->rx.cons_head, 322 cq->ibcq.cqe); 323 } 324 curr--; 325 } 326 } 327 } 328 329 static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp, 330 struct ib_wc *wc) 331 { 332 struct pvrdma_dev *dev = to_vdev(cq->ibcq.device); 333 int has_data; 334 unsigned int head; 335 bool tried = false; 336 struct pvrdma_cqe *cqe; 337 338 retry: 339 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, 340 cq->ibcq.cqe, &head); 341 if (has_data == 0) { 342 if (tried) 343 return -EAGAIN; 344 345 pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL); 346 347 tried = true; 348 goto retry; 349 } else if (has_data == PVRDMA_INVALID_IDX) { 350 dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); 351 return -EAGAIN; 352 } 353 354 cqe = get_cqe(cq, head); 355 356 /* Ensure cqe is valid. */ 357 rmb(); 358 if (dev->qp_tbl[cqe->qp & 0xffff]) 359 *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff]; 360 else 361 return -EAGAIN; 362 363 wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode); 364 wc->status = pvrdma_wc_status_to_ib(cqe->status); 365 wc->wr_id = cqe->wr_id; 366 wc->qp = &(*cur_qp)->ibqp; 367 wc->byte_len = cqe->byte_len; 368 wc->ex.imm_data = cqe->imm_data; 369 wc->src_qp = cqe->src_qp; 370 wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags); 371 wc->pkey_index = cqe->pkey_index; 372 wc->slid = cqe->slid; 373 wc->sl = cqe->sl; 374 wc->dlid_path_bits = cqe->dlid_path_bits; 375 wc->port_num = cqe->port_num; 376 wc->vendor_err = cqe->vendor_err; 377 378 /* Update shared ring state */ 379 pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); 380 381 return 0; 382 } 383 384 /** 385 * pvrdma_poll_cq - poll for work completion queue entries 386 * @ibcq: completion queue 387 * @num_entries: the maximum number of entries 388 * @entry: pointer to work completion array 389 * 390 * @return: number of polled completion entries 391 */ 392 int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 393 { 394 struct pvrdma_cq *cq = to_vcq(ibcq); 395 struct pvrdma_qp *cur_qp = NULL; 396 unsigned long flags; 397 int npolled; 398 399 if (num_entries < 1 || wc == NULL) 400 return 0; 401 402 spin_lock_irqsave(&cq->cq_lock, flags); 403 for (npolled = 0; npolled < num_entries; ++npolled) { 404 if (pvrdma_poll_one(cq, &cur_qp, wc + npolled)) 405 break; 406 } 407 408 spin_unlock_irqrestore(&cq->cq_lock, flags); 409 410 /* Ensure we do not return errors from poll_cq */ 411 return npolled; 412 } 413 414 /** 415 * pvrdma_resize_cq - resize CQ 416 * @ibcq: the completion queue 417 * @entries: CQ entries 418 * @udata: user data 419 * 420 * @return: -EOPNOTSUPP as CQ resize is not supported. 421 */ 422 int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) 423 { 424 return -EOPNOTSUPP; 425 } 426