1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. 4 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 5 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 6 */ 7 8 /* 9 * rxe_mcast.c implements driver support for multicast transport. 10 * It is based on two data structures struct rxe_mcg ('mcg') and 11 * struct rxe_mca ('mca'). An mcg is allocated each time a qp is 12 * attached to a new mgid for the first time. These are indexed by 13 * a red-black tree using the mgid. This data structure is searched 14 * for the mcg when a multicast packet is received and when another 15 * qp is attached to the same mgid. It is cleaned up when the last qp 16 * is detached from the mcg. Each time a qp is attached to an mcg an 17 * mca is created. It holds a pointer to the qp and is added to a list 18 * of qp's that are attached to the mcg. The qp_list is used to replicate 19 * mcast packets in the rxe receive path. 20 */ 21 22 #include "rxe.h" 23 24 /** 25 * rxe_mcast_add - add multicast address to rxe device 26 * @rxe: rxe device object 27 * @mgid: multicast address as a gid 28 * 29 * Returns 0 on success else an error 30 */ 31 static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 32 { 33 unsigned char ll_addr[ETH_ALEN]; 34 35 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 36 37 return dev_mc_add(rxe->ndev, ll_addr); 38 } 39 40 /** 41 * rxe_mcast_delete - delete multicast address from rxe device 42 * @rxe: rxe device object 43 * @mgid: multicast address as a gid 44 * 45 * Returns 0 on success else an error 46 */ 47 static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) 48 { 49 unsigned char ll_addr[ETH_ALEN]; 50 51 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 52 53 return dev_mc_del(rxe->ndev, ll_addr); 54 } 55 56 /** 57 * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) 58 * @mcg: mcg object with an embedded red-black tree node 59 * 60 * Context: caller must hold a reference to mcg and rxe->mcg_lock and 61 * is responsible to avoid adding the same mcg twice to the tree. 62 */ 63 static void __rxe_insert_mcg(struct rxe_mcg *mcg) 64 { 65 struct rb_root *tree = &mcg->rxe->mcg_tree; 66 struct rb_node **link = &tree->rb_node; 67 struct rb_node *node = NULL; 68 struct rxe_mcg *tmp; 69 int cmp; 70 71 while (*link) { 72 node = *link; 73 tmp = rb_entry(node, struct rxe_mcg, node); 74 75 cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); 76 if (cmp > 0) 77 link = &(*link)->rb_left; 78 else 79 link = &(*link)->rb_right; 80 } 81 82 rb_link_node(&mcg->node, node, link); 83 rb_insert_color(&mcg->node, tree); 84 } 85 86 /** 87 * __rxe_remove_mcg - remove an mcg from red-black tree holding lock 88 * @mcg: mcast group object with an embedded red-black tree node 89 * 90 * Context: caller must hold a reference to mcg and rxe->mcg_lock 91 */ 92 static void __rxe_remove_mcg(struct rxe_mcg *mcg) 93 { 94 rb_erase(&mcg->node, &mcg->rxe->mcg_tree); 95 } 96 97 /** 98 * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock 99 * @rxe: rxe device object 100 * @mgid: multicast IP address 101 * 102 * Context: caller must hold rxe->mcg_lock 103 * Returns: mcg on success and takes a ref to mcg else NULL 104 */ 105 static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, 106 union ib_gid *mgid) 107 { 108 struct rb_root *tree = &rxe->mcg_tree; 109 struct rxe_mcg *mcg; 110 struct rb_node *node; 111 int cmp; 112 113 node = tree->rb_node; 114 115 while (node) { 116 mcg = rb_entry(node, struct rxe_mcg, node); 117 118 cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); 119 120 if (cmp > 0) 121 node = node->rb_left; 122 else if (cmp < 0) 123 node = node->rb_right; 124 else 125 break; 126 } 127 128 if (node) { 129 kref_get(&mcg->ref_cnt); 130 return mcg; 131 } 132 133 return NULL; 134 } 135 136 /** 137 * rxe_lookup_mcg - lookup up mcg in red-back tree 138 * @rxe: rxe device object 139 * @mgid: multicast IP address 140 * 141 * Returns: mcg if found else NULL 142 */ 143 struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 144 { 145 struct rxe_mcg *mcg; 146 unsigned long flags; 147 148 spin_lock_irqsave(&rxe->mcg_lock, flags); 149 mcg = __rxe_lookup_mcg(rxe, mgid); 150 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 151 152 return mcg; 153 } 154 155 /** 156 * __rxe_init_mcg - initialize a new mcg 157 * @rxe: rxe device 158 * @mgid: multicast address as a gid 159 * @mcg: new mcg object 160 * 161 * Context: caller should hold rxe->mcg lock 162 * Returns: 0 on success else an error 163 */ 164 static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, 165 struct rxe_mcg *mcg) 166 { 167 int err; 168 169 err = rxe_mcast_add(rxe, mgid); 170 if (unlikely(err)) 171 return err; 172 173 kref_init(&mcg->ref_cnt); 174 memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); 175 INIT_LIST_HEAD(&mcg->qp_list); 176 mcg->rxe = rxe; 177 178 /* caller holds a ref on mcg but that will be 179 * dropped when mcg goes out of scope. We need to take a ref 180 * on the pointer that will be saved in the red-black tree 181 * by __rxe_insert_mcg and used to lookup mcg from mgid later. 182 * Inserting mcg makes it visible to outside so this should 183 * be done last after the object is ready. 184 */ 185 kref_get(&mcg->ref_cnt); 186 __rxe_insert_mcg(mcg); 187 188 return 0; 189 } 190 191 /** 192 * rxe_get_mcg - lookup or allocate a mcg 193 * @rxe: rxe device object 194 * @mgid: multicast IP address as a gid 195 * 196 * Returns: mcg on success else ERR_PTR(error) 197 */ 198 static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 199 { 200 struct rxe_mcg *mcg, *tmp; 201 unsigned long flags; 202 int err; 203 204 if (rxe->attr.max_mcast_grp == 0) 205 return ERR_PTR(-EINVAL); 206 207 /* check to see if mcg already exists */ 208 mcg = rxe_lookup_mcg(rxe, mgid); 209 if (mcg) 210 return mcg; 211 212 /* speculative alloc of new mcg */ 213 mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); 214 if (!mcg) 215 return ERR_PTR(-ENOMEM); 216 217 spin_lock_irqsave(&rxe->mcg_lock, flags); 218 /* re-check to see if someone else just added it */ 219 tmp = __rxe_lookup_mcg(rxe, mgid); 220 if (tmp) { 221 kfree(mcg); 222 mcg = tmp; 223 goto out; 224 } 225 226 if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { 227 err = -ENOMEM; 228 goto err_dec; 229 } 230 231 err = __rxe_init_mcg(rxe, mgid, mcg); 232 if (err) 233 goto err_dec; 234 out: 235 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 236 return mcg; 237 238 err_dec: 239 atomic_dec(&rxe->mcg_num); 240 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 241 kfree(mcg); 242 return ERR_PTR(err); 243 } 244 245 /** 246 * rxe_cleanup_mcg - cleanup mcg for kref_put 247 * @kref: struct kref embnedded in mcg 248 */ 249 void rxe_cleanup_mcg(struct kref *kref) 250 { 251 struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); 252 253 kfree(mcg); 254 } 255 256 /** 257 * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock 258 * @mcg: the mcg object 259 * 260 * Context: caller is holding rxe->mcg_lock 261 * no qp's are attached to mcg 262 */ 263 static void __rxe_destroy_mcg(struct rxe_mcg *mcg) 264 { 265 struct rxe_dev *rxe = mcg->rxe; 266 267 /* remove mcg from red-black tree then drop ref */ 268 __rxe_remove_mcg(mcg); 269 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 270 271 rxe_mcast_delete(mcg->rxe, &mcg->mgid); 272 atomic_dec(&rxe->mcg_num); 273 } 274 275 /** 276 * rxe_destroy_mcg - destroy mcg object 277 * @mcg: the mcg object 278 * 279 * Context: no qp's are attached to mcg 280 */ 281 static void rxe_destroy_mcg(struct rxe_mcg *mcg) 282 { 283 unsigned long flags; 284 285 spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); 286 __rxe_destroy_mcg(mcg); 287 spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); 288 } 289 290 /** 291 * __rxe_init_mca - initialize a new mca holding lock 292 * @qp: qp object 293 * @mcg: mcg object 294 * @mca: empty space for new mca 295 * 296 * Context: caller must hold references on qp and mcg, rxe->mcg_lock 297 * and pass memory for new mca 298 * 299 * Returns: 0 on success else an error 300 */ 301 static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, 302 struct rxe_mca *mca) 303 { 304 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 305 int n; 306 307 n = atomic_inc_return(&rxe->mcg_attach); 308 if (n > rxe->attr.max_total_mcast_qp_attach) { 309 atomic_dec(&rxe->mcg_attach); 310 return -ENOMEM; 311 } 312 313 n = atomic_inc_return(&mcg->qp_num); 314 if (n > rxe->attr.max_mcast_qp_attach) { 315 atomic_dec(&mcg->qp_num); 316 atomic_dec(&rxe->mcg_attach); 317 return -ENOMEM; 318 } 319 320 atomic_inc(&qp->mcg_num); 321 322 rxe_get(qp); 323 mca->qp = qp; 324 325 list_add_tail(&mca->qp_list, &mcg->qp_list); 326 327 return 0; 328 } 329 330 /** 331 * rxe_attach_mcg - attach qp to mcg if not already attached 332 * @qp: qp object 333 * @mcg: mcg object 334 * 335 * Context: caller must hold reference on qp and mcg. 336 * Returns: 0 on success else an error 337 */ 338 static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 339 { 340 struct rxe_dev *rxe = mcg->rxe; 341 struct rxe_mca *mca, *tmp; 342 unsigned long flags; 343 int err; 344 345 /* check to see if the qp is already a member of the group */ 346 spin_lock_irqsave(&rxe->mcg_lock, flags); 347 list_for_each_entry(mca, &mcg->qp_list, qp_list) { 348 if (mca->qp == qp) { 349 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 350 return 0; 351 } 352 } 353 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 354 355 /* speculative alloc new mca without using GFP_ATOMIC */ 356 mca = kzalloc(sizeof(*mca), GFP_KERNEL); 357 if (!mca) 358 return -ENOMEM; 359 360 spin_lock_irqsave(&rxe->mcg_lock, flags); 361 /* re-check to see if someone else just attached qp */ 362 list_for_each_entry(tmp, &mcg->qp_list, qp_list) { 363 if (tmp->qp == qp) { 364 kfree(mca); 365 err = 0; 366 goto out; 367 } 368 } 369 370 err = __rxe_init_mca(qp, mcg, mca); 371 if (err) 372 kfree(mca); 373 out: 374 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 375 return err; 376 } 377 378 /** 379 * __rxe_cleanup_mca - cleanup mca object holding lock 380 * @mca: mca object 381 * @mcg: mcg object 382 * 383 * Context: caller must hold a reference to mcg and rxe->mcg_lock 384 */ 385 static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) 386 { 387 list_del(&mca->qp_list); 388 389 atomic_dec(&mcg->qp_num); 390 atomic_dec(&mcg->rxe->mcg_attach); 391 atomic_dec(&mca->qp->mcg_num); 392 rxe_put(mca->qp); 393 394 kfree(mca); 395 } 396 397 /** 398 * rxe_detach_mcg - detach qp from mcg 399 * @mcg: mcg object 400 * @qp: qp object 401 * 402 * Returns: 0 on success else an error if qp is not attached. 403 */ 404 static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 405 { 406 struct rxe_dev *rxe = mcg->rxe; 407 struct rxe_mca *mca, *tmp; 408 unsigned long flags; 409 410 spin_lock_irqsave(&rxe->mcg_lock, flags); 411 list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { 412 if (mca->qp == qp) { 413 __rxe_cleanup_mca(mca, mcg); 414 415 /* if the number of qp's attached to the 416 * mcast group falls to zero go ahead and 417 * tear it down. This will not free the 418 * object since we are still holding a ref 419 * from the caller 420 */ 421 if (atomic_read(&mcg->qp_num) <= 0) 422 __rxe_destroy_mcg(mcg); 423 424 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 425 return 0; 426 } 427 } 428 429 /* we didn't find the qp on the list */ 430 spin_unlock_irqrestore(&rxe->mcg_lock, flags); 431 return -EINVAL; 432 } 433 434 /** 435 * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) 436 * @ibqp: (IB) qp object 437 * @mgid: multicast IP address 438 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 439 * 440 * Returns: 0 on success else an errno 441 */ 442 int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 443 { 444 int err; 445 struct rxe_dev *rxe = to_rdev(ibqp->device); 446 struct rxe_qp *qp = to_rqp(ibqp); 447 struct rxe_mcg *mcg; 448 449 /* takes a ref on mcg if successful */ 450 mcg = rxe_get_mcg(rxe, mgid); 451 if (IS_ERR(mcg)) 452 return PTR_ERR(mcg); 453 454 err = rxe_attach_mcg(mcg, qp); 455 456 /* if we failed to attach the first qp to mcg tear it down */ 457 if (atomic_read(&mcg->qp_num) == 0) 458 rxe_destroy_mcg(mcg); 459 460 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 461 462 return err; 463 } 464 465 /** 466 * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) 467 * @ibqp: address of (IB) qp object 468 * @mgid: multicast IP address 469 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 470 * 471 * Returns: 0 on success else an errno 472 */ 473 int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 474 { 475 struct rxe_dev *rxe = to_rdev(ibqp->device); 476 struct rxe_qp *qp = to_rqp(ibqp); 477 struct rxe_mcg *mcg; 478 int err; 479 480 mcg = rxe_lookup_mcg(rxe, mgid); 481 if (!mcg) 482 return -EINVAL; 483 484 err = rxe_detach_mcg(mcg, qp); 485 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 486 487 return err; 488 } 489