1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/module.h> 34 #include <rdma/ib_umem.h> 35 #include <rdma/ib_umem_odp.h> 36 #include "mlx5_ib.h" 37 #include <linux/jiffies.h> 38 39 /* @umem: umem object to scan 40 * @addr: ib virtual address requested by the user 41 * @max_page_shift: high limit for page_shift - 0 means no limit 42 * @count: number of PAGE_SIZE pages covered by umem 43 * @shift: page shift for the compound pages found in the region 44 * @ncont: number of compund pages 45 * @order: log2 of the number of compound pages 46 */ 47 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, 48 unsigned long max_page_shift, 49 int *count, int *shift, 50 int *ncont, int *order) 51 { 52 unsigned long tmp; 53 unsigned long m; 54 u64 base = ~0, p = 0; 55 u64 len, pfn; 56 int i = 0; 57 struct scatterlist *sg; 58 int entry; 59 60 addr = addr >> PAGE_SHIFT; 61 tmp = (unsigned long)addr; 62 m = find_first_bit(&tmp, BITS_PER_LONG); 63 if (max_page_shift) 64 m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); 65 66 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 67 len = sg_dma_len(sg) >> PAGE_SHIFT; 68 pfn = sg_dma_address(sg) >> PAGE_SHIFT; 69 if (base + p != pfn) { 70 /* If either the offset or the new 71 * base are unaligned update m 72 */ 73 tmp = (unsigned long)(pfn | p); 74 if (!IS_ALIGNED(tmp, 1 << m)) 75 m = find_first_bit(&tmp, BITS_PER_LONG); 76 77 base = pfn; 78 p = 0; 79 } 80 81 p += len; 82 i += len; 83 } 84 85 if (i) { 86 m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); 87 88 if (order) 89 *order = ilog2(roundup_pow_of_two(i) >> m); 90 91 *ncont = DIV_ROUND_UP(i, (1 << m)); 92 } else { 93 m = 0; 94 95 if (order) 96 *order = 0; 97 98 *ncont = 0; 99 } 100 *shift = PAGE_SHIFT + m; 101 *count = i; 102 } 103 104 static u64 umem_dma_to_mtt(dma_addr_t umem_dma) 105 { 106 u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; 107 108 if (umem_dma & ODP_READ_ALLOWED_BIT) 109 mtt_entry |= MLX5_IB_MTT_READ; 110 if (umem_dma & ODP_WRITE_ALLOWED_BIT) 111 mtt_entry |= MLX5_IB_MTT_WRITE; 112 113 return mtt_entry; 114 } 115 116 /* 117 * Populate the given array with bus addresses from the umem. 118 * 119 * dev - mlx5_ib device 120 * umem - umem to use to fill the pages 121 * page_shift - determines the page size used in the resulting array 122 * offset - offset into the umem to start from, 123 * only implemented for ODP umems 124 * num_pages - total number of pages to fill 125 * pas - bus addresses array to fill 126 * access_flags - access flags to set on all present pages. 127 use enum mlx5_ib_mtt_access_flags for this. 128 */ 129 void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 130 int page_shift, size_t offset, size_t num_pages, 131 __be64 *pas, int access_flags) 132 { 133 int shift = page_shift - PAGE_SHIFT; 134 int mask = (1 << shift) - 1; 135 int i, k, idx; 136 u64 cur = 0; 137 u64 base; 138 int len; 139 struct scatterlist *sg; 140 int entry; 141 142 if (umem->is_odp) { 143 WARN_ON(shift != 0); 144 WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); 145 146 for (i = 0; i < num_pages; ++i) { 147 dma_addr_t pa = 148 to_ib_umem_odp(umem)->dma_list[offset + i]; 149 150 pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); 151 } 152 return; 153 } 154 155 i = 0; 156 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 157 len = sg_dma_len(sg) >> PAGE_SHIFT; 158 base = sg_dma_address(sg); 159 160 /* Skip elements below offset */ 161 if (i + len < offset << shift) { 162 i += len; 163 continue; 164 } 165 166 /* Skip pages below offset */ 167 if (i < offset << shift) { 168 k = (offset << shift) - i; 169 i = offset << shift; 170 } else { 171 k = 0; 172 } 173 174 for (; k < len; k++) { 175 if (!(i & mask)) { 176 cur = base + (k << PAGE_SHIFT); 177 cur |= access_flags; 178 idx = (i >> shift) - offset; 179 180 pas[idx] = cpu_to_be64(cur); 181 mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", 182 i >> shift, be64_to_cpu(pas[idx])); 183 } 184 i++; 185 186 /* Stop after num_pages reached */ 187 if (i >> shift >= offset + num_pages) 188 return; 189 } 190 } 191 } 192 193 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 194 int page_shift, __be64 *pas, int access_flags) 195 { 196 return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, 197 ib_umem_num_pages(umem), pas, 198 access_flags); 199 } 200 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) 201 { 202 u64 page_size; 203 u64 page_mask; 204 u64 off_size; 205 u64 off_mask; 206 u64 buf_off; 207 208 page_size = (u64)1 << page_shift; 209 page_mask = page_size - 1; 210 buf_off = addr & page_mask; 211 off_size = page_size >> 6; 212 off_mask = off_size - 1; 213 214 if (buf_off & off_mask) 215 return -EINVAL; 216 217 *offset = buf_off >> ilog2(off_size); 218 return 0; 219 } 220 221 #define WR_ID_BF 0xBF 222 #define WR_ID_END 0xBAD 223 #define TEST_WC_NUM_WQES 255 224 #define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) 225 static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, 226 bool signaled) 227 { 228 struct mlx5_ib_qp *qp = to_mqp(ibqp); 229 struct mlx5_wqe_ctrl_seg *ctrl; 230 struct mlx5_bf *bf = &qp->bf; 231 __be32 mmio_wqe[16] = {}; 232 unsigned long flags; 233 unsigned int idx; 234 int i; 235 236 if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) 237 return -EIO; 238 239 spin_lock_irqsave(&qp->sq.lock, flags); 240 241 idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); 242 ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); 243 244 memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); 245 ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; 246 ctrl->opmod_idx_opcode = 247 cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); 248 ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | 249 (qp->trans_qp.base.mqp.qpn << 8)); 250 251 qp->sq.wrid[idx] = wr_id; 252 qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; 253 qp->sq.wqe_head[idx] = qp->sq.head + 1; 254 qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), 255 MLX5_SEND_WQE_BB); 256 qp->sq.w_list[idx].next = qp->sq.cur_post; 257 qp->sq.head++; 258 259 memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); 260 ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= 261 MLX5_WQE_CTRL_CQ_UPDATE; 262 263 /* Make sure that descriptors are written before 264 * updating doorbell record and ringing the doorbell 265 */ 266 wmb(); 267 268 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); 269 270 /* Make sure doorbell record is visible to the HCA before 271 * we hit doorbell 272 */ 273 wmb(); 274 for (i = 0; i < 8; i++) 275 mlx5_write64(&mmio_wqe[i * 2], 276 bf->bfreg->map + bf->offset + i * 8); 277 278 bf->offset ^= bf->buf_size; 279 280 spin_unlock_irqrestore(&qp->sq.lock, flags); 281 282 return 0; 283 } 284 285 static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) 286 { 287 int ret; 288 struct ib_wc wc = {}; 289 unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; 290 291 do { 292 ret = ib_poll_cq(cq, 1, &wc); 293 if (ret < 0 || wc.status) 294 return ret < 0 ? ret : -EINVAL; 295 if (ret) 296 break; 297 } while (!time_after(jiffies, end)); 298 299 if (!ret) 300 return -ETIMEDOUT; 301 302 if (wc.wr_id != WR_ID_BF) 303 ret = 0; 304 305 return ret; 306 } 307 308 static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) 309 { 310 int err, i; 311 312 for (i = 0; i < TEST_WC_NUM_WQES; i++) { 313 err = post_send_nop(dev, qp, WR_ID_BF, false); 314 if (err) 315 return err; 316 } 317 318 return post_send_nop(dev, qp, WR_ID_END, true); 319 } 320 321 int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) 322 { 323 struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; 324 int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); 325 struct ib_qp_init_attr qp_init_attr = { 326 .cap = { .max_send_wr = TEST_WC_NUM_WQES }, 327 .qp_type = IB_QPT_UD, 328 .sq_sig_type = IB_SIGNAL_REQ_WR, 329 .create_flags = MLX5_IB_QP_CREATE_WC_TEST, 330 }; 331 struct ib_qp_attr qp_attr = { .port_num = 1 }; 332 struct ib_device *ibdev = &dev->ib_dev; 333 struct ib_qp *qp; 334 struct ib_cq *cq; 335 struct ib_pd *pd; 336 int ret; 337 338 if (!MLX5_CAP_GEN(dev->mdev, bf)) 339 return 0; 340 341 if (!dev->mdev->roce.roce_en && 342 port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { 343 if (mlx5_core_is_pf(dev->mdev)) 344 dev->wc_support = true; 345 return 0; 346 } 347 348 ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); 349 if (ret) 350 goto print_err; 351 352 if (!dev->wc_bfreg.wc) 353 goto out1; 354 355 pd = ib_alloc_pd(ibdev, 0); 356 if (IS_ERR(pd)) { 357 ret = PTR_ERR(pd); 358 goto out1; 359 } 360 361 cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); 362 if (IS_ERR(cq)) { 363 ret = PTR_ERR(cq); 364 goto out2; 365 } 366 367 qp_init_attr.recv_cq = cq; 368 qp_init_attr.send_cq = cq; 369 qp = ib_create_qp(pd, &qp_init_attr); 370 if (IS_ERR(qp)) { 371 ret = PTR_ERR(qp); 372 goto out3; 373 } 374 375 qp_attr.qp_state = IB_QPS_INIT; 376 ret = ib_modify_qp(qp, &qp_attr, 377 IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | 378 IB_QP_QKEY); 379 if (ret) 380 goto out4; 381 382 qp_attr.qp_state = IB_QPS_RTR; 383 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 384 if (ret) 385 goto out4; 386 387 qp_attr.qp_state = IB_QPS_RTS; 388 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 389 if (ret) 390 goto out4; 391 392 ret = test_wc_do_send(dev, qp); 393 if (ret < 0) 394 goto out4; 395 396 ret = test_wc_poll_cq_result(dev, cq); 397 if (ret > 0) { 398 dev->wc_support = true; 399 ret = 0; 400 } 401 402 out4: 403 ib_destroy_qp(qp); 404 out3: 405 ib_destroy_cq(cq); 406 out2: 407 ib_dealloc_pd(pd); 408 out1: 409 mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); 410 print_err: 411 if (ret) 412 mlx5_ib_err( 413 dev, 414 "Error %d while trying to test write-combining support\n", 415 ret); 416 return ret; 417 } 418