1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/module.h> 34 #include <rdma/ib_umem.h> 35 #include <rdma/ib_umem_odp.h> 36 #include "mlx5_ib.h" 37 #include <linux/jiffies.h> 38 39 /* @umem: umem object to scan 40 * @addr: ib virtual address requested by the user 41 * @max_page_shift: high limit for page_shift - 0 means no limit 42 * @count: number of PAGE_SIZE pages covered by umem 43 * @shift: page shift for the compound pages found in the region 44 * @ncont: number of compund pages 45 * @order: log2 of the number of compound pages 46 */ 47 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, 48 unsigned long max_page_shift, 49 int *count, int *shift, 50 int *ncont, int *order) 51 { 52 unsigned long tmp; 53 unsigned long m; 54 u64 base = ~0, p = 0; 55 u64 len, pfn; 56 int i = 0; 57 struct scatterlist *sg; 58 int entry; 59 60 addr = addr >> PAGE_SHIFT; 61 tmp = (unsigned long)addr; 62 m = find_first_bit(&tmp, BITS_PER_LONG); 63 if (max_page_shift) 64 m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); 65 66 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 67 len = sg_dma_len(sg) >> PAGE_SHIFT; 68 pfn = sg_dma_address(sg) >> PAGE_SHIFT; 69 if (base + p != pfn) { 70 /* If either the offset or the new 71 * base are unaligned update m 72 */ 73 tmp = (unsigned long)(pfn | p); 74 if (!IS_ALIGNED(tmp, 1 << m)) 75 m = find_first_bit(&tmp, BITS_PER_LONG); 76 77 base = pfn; 78 p = 0; 79 } 80 81 p += len; 82 i += len; 83 } 84 85 if (i) { 86 m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); 87 88 if (order) 89 *order = ilog2(roundup_pow_of_two(i) >> m); 90 91 *ncont = DIV_ROUND_UP(i, (1 << m)); 92 } else { 93 m = 0; 94 95 if (order) 96 *order = 0; 97 98 *ncont = 0; 99 } 100 *shift = PAGE_SHIFT + m; 101 *count = i; 102 } 103 104 /* 105 * Populate the given array with bus addresses from the umem. 106 * 107 * dev - mlx5_ib device 108 * umem - umem to use to fill the pages 109 * page_shift - determines the page size used in the resulting array 110 * offset - offset into the umem to start from, 111 * only implemented for ODP umems 112 * num_pages - total number of pages to fill 113 * pas - bus addresses array to fill 114 * access_flags - access flags to set on all present pages. 115 use enum mlx5_ib_mtt_access_flags for this. 116 */ 117 void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 118 int page_shift, size_t offset, size_t num_pages, 119 __be64 *pas, int access_flags) 120 { 121 int shift = page_shift - PAGE_SHIFT; 122 int mask = (1 << shift) - 1; 123 int i, k, idx; 124 u64 cur = 0; 125 u64 base; 126 int len; 127 struct scatterlist *sg; 128 int entry; 129 130 i = 0; 131 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 132 len = sg_dma_len(sg) >> PAGE_SHIFT; 133 base = sg_dma_address(sg); 134 135 /* Skip elements below offset */ 136 if (i + len < offset << shift) { 137 i += len; 138 continue; 139 } 140 141 /* Skip pages below offset */ 142 if (i < offset << shift) { 143 k = (offset << shift) - i; 144 i = offset << shift; 145 } else { 146 k = 0; 147 } 148 149 for (; k < len; k++) { 150 if (!(i & mask)) { 151 cur = base + (k << PAGE_SHIFT); 152 cur |= access_flags; 153 idx = (i >> shift) - offset; 154 155 pas[idx] = cpu_to_be64(cur); 156 mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", 157 i >> shift, be64_to_cpu(pas[idx])); 158 } 159 i++; 160 161 /* Stop after num_pages reached */ 162 if (i >> shift >= offset + num_pages) 163 return; 164 } 165 } 166 } 167 168 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 169 int page_shift, __be64 *pas, int access_flags) 170 { 171 return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, 172 ib_umem_num_pages(umem), pas, 173 access_flags); 174 } 175 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) 176 { 177 u64 page_size; 178 u64 page_mask; 179 u64 off_size; 180 u64 off_mask; 181 u64 buf_off; 182 183 page_size = (u64)1 << page_shift; 184 page_mask = page_size - 1; 185 buf_off = addr & page_mask; 186 off_size = page_size >> 6; 187 off_mask = off_size - 1; 188 189 if (buf_off & off_mask) 190 return -EINVAL; 191 192 *offset = buf_off >> ilog2(off_size); 193 return 0; 194 } 195 196 #define WR_ID_BF 0xBF 197 #define WR_ID_END 0xBAD 198 #define TEST_WC_NUM_WQES 255 199 #define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) 200 static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, 201 bool signaled) 202 { 203 struct mlx5_ib_qp *qp = to_mqp(ibqp); 204 struct mlx5_wqe_ctrl_seg *ctrl; 205 struct mlx5_bf *bf = &qp->bf; 206 __be32 mmio_wqe[16] = {}; 207 unsigned long flags; 208 unsigned int idx; 209 int i; 210 211 if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) 212 return -EIO; 213 214 spin_lock_irqsave(&qp->sq.lock, flags); 215 216 idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); 217 ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); 218 219 memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); 220 ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; 221 ctrl->opmod_idx_opcode = 222 cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); 223 ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | 224 (qp->trans_qp.base.mqp.qpn << 8)); 225 226 qp->sq.wrid[idx] = wr_id; 227 qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; 228 qp->sq.wqe_head[idx] = qp->sq.head + 1; 229 qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), 230 MLX5_SEND_WQE_BB); 231 qp->sq.w_list[idx].next = qp->sq.cur_post; 232 qp->sq.head++; 233 234 memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); 235 ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= 236 MLX5_WQE_CTRL_CQ_UPDATE; 237 238 /* Make sure that descriptors are written before 239 * updating doorbell record and ringing the doorbell 240 */ 241 wmb(); 242 243 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); 244 245 /* Make sure doorbell record is visible to the HCA before 246 * we hit doorbell 247 */ 248 wmb(); 249 for (i = 0; i < 8; i++) 250 mlx5_write64(&mmio_wqe[i * 2], 251 bf->bfreg->map + bf->offset + i * 8); 252 253 bf->offset ^= bf->buf_size; 254 255 spin_unlock_irqrestore(&qp->sq.lock, flags); 256 257 return 0; 258 } 259 260 static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) 261 { 262 int ret; 263 struct ib_wc wc = {}; 264 unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; 265 266 do { 267 ret = ib_poll_cq(cq, 1, &wc); 268 if (ret < 0 || wc.status) 269 return ret < 0 ? ret : -EINVAL; 270 if (ret) 271 break; 272 } while (!time_after(jiffies, end)); 273 274 if (!ret) 275 return -ETIMEDOUT; 276 277 if (wc.wr_id != WR_ID_BF) 278 ret = 0; 279 280 return ret; 281 } 282 283 static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) 284 { 285 int err, i; 286 287 for (i = 0; i < TEST_WC_NUM_WQES; i++) { 288 err = post_send_nop(dev, qp, WR_ID_BF, false); 289 if (err) 290 return err; 291 } 292 293 return post_send_nop(dev, qp, WR_ID_END, true); 294 } 295 296 int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) 297 { 298 struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; 299 int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); 300 struct ib_qp_init_attr qp_init_attr = { 301 .cap = { .max_send_wr = TEST_WC_NUM_WQES }, 302 .qp_type = IB_QPT_UD, 303 .sq_sig_type = IB_SIGNAL_REQ_WR, 304 .create_flags = MLX5_IB_QP_CREATE_WC_TEST, 305 }; 306 struct ib_qp_attr qp_attr = { .port_num = 1 }; 307 struct ib_device *ibdev = &dev->ib_dev; 308 struct ib_qp *qp; 309 struct ib_cq *cq; 310 struct ib_pd *pd; 311 int ret; 312 313 if (!MLX5_CAP_GEN(dev->mdev, bf)) 314 return 0; 315 316 if (!dev->mdev->roce.roce_en && 317 port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { 318 if (mlx5_core_is_pf(dev->mdev)) 319 dev->wc_support = arch_can_pci_mmap_wc(); 320 return 0; 321 } 322 323 ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); 324 if (ret) 325 goto print_err; 326 327 if (!dev->wc_bfreg.wc) 328 goto out1; 329 330 pd = ib_alloc_pd(ibdev, 0); 331 if (IS_ERR(pd)) { 332 ret = PTR_ERR(pd); 333 goto out1; 334 } 335 336 cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); 337 if (IS_ERR(cq)) { 338 ret = PTR_ERR(cq); 339 goto out2; 340 } 341 342 qp_init_attr.recv_cq = cq; 343 qp_init_attr.send_cq = cq; 344 qp = ib_create_qp(pd, &qp_init_attr); 345 if (IS_ERR(qp)) { 346 ret = PTR_ERR(qp); 347 goto out3; 348 } 349 350 qp_attr.qp_state = IB_QPS_INIT; 351 ret = ib_modify_qp(qp, &qp_attr, 352 IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | 353 IB_QP_QKEY); 354 if (ret) 355 goto out4; 356 357 qp_attr.qp_state = IB_QPS_RTR; 358 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 359 if (ret) 360 goto out4; 361 362 qp_attr.qp_state = IB_QPS_RTS; 363 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 364 if (ret) 365 goto out4; 366 367 ret = test_wc_do_send(dev, qp); 368 if (ret < 0) 369 goto out4; 370 371 ret = test_wc_poll_cq_result(dev, cq); 372 if (ret > 0) { 373 dev->wc_support = true; 374 ret = 0; 375 } 376 377 out4: 378 ib_destroy_qp(qp); 379 out3: 380 ib_destroy_cq(cq); 381 out2: 382 ib_dealloc_pd(pd); 383 out1: 384 mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); 385 print_err: 386 if (ret) 387 mlx5_ib_err( 388 dev, 389 "Error %d while trying to test write-combining support\n", 390 ret); 391 return ret; 392 } 393