1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. 4 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 5 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 6 */ 7 8 /* 9 * rxe_mcast.c implements driver support for multicast transport. 10 * It is based on two data structures struct rxe_mcg ('mcg') and 11 * struct rxe_mca ('mca'). An mcg is allocated each time a qp is 12 * attached to a new mgid for the first time. These are indexed by 13 * a red-black tree using the mgid. This data structure is searched 14 * for the mcg when a multicast packet is received and when another 15 * qp is attached to the same mgid. It is cleaned up when the last qp 16 * is detached from the mcg. Each time a qp is attached to an mcg an 17 * mca is created. It holds a pointer to the qp and is added to a list 18 * of qp's that are attached to the mcg. The qp_list is used to replicate 19 * mcast packets in the rxe receive path. 20 */ 21 22 #include "rxe.h" 23 24 /** 25 * rxe_mcast_add - add multicast address to rxe device 26 * @rxe: rxe device object 27 * @mgid: multicast address as a gid 28 * 29 * Returns 0 on success else an error 30 */ 31 static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 32 { 33 unsigned char ll_addr[ETH_ALEN]; 34 35 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 36 37 return dev_mc_add(rxe->ndev, ll_addr); 38 } 39 40 /** 41 * rxe_mcast_del - delete multicast address from rxe device 42 * @rxe: rxe device object 43 * @mgid: multicast address as a gid 44 * 45 * Returns 0 on success else an error 46 */ 47 static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) 48 { 49 unsigned char ll_addr[ETH_ALEN]; 50 51 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 52 53 return dev_mc_del(rxe->ndev, ll_addr); 54 } 55 56 /** 57 * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) 58 * @mcg: mcg object with an embedded red-black tree node 59 * 60 * Context: caller must hold a reference to mcg and rxe->mcg_lock and 61 * is responsible to avoid adding the same mcg twice to the tree. 62 */ 63 static void __rxe_insert_mcg(struct rxe_mcg *mcg) 64 { 65 struct rb_root *tree = &mcg->rxe->mcg_tree; 66 struct rb_node **link = &tree->rb_node; 67 struct rb_node *node = NULL; 68 struct rxe_mcg *tmp; 69 int cmp; 70 71 while (*link) { 72 node = *link; 73 tmp = rb_entry(node, struct rxe_mcg, node); 74 75 cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); 76 if (cmp > 0) 77 link = &(*link)->rb_left; 78 else 79 link = &(*link)->rb_right; 80 } 81 82 rb_link_node(&mcg->node, node, link); 83 rb_insert_color(&mcg->node, tree); 84 } 85 86 /** 87 * __rxe_remove_mcg - remove an mcg from red-black tree holding lock 88 * @mcg: mcast group object with an embedded red-black tree node 89 * 90 * Context: caller must hold a reference to mcg and rxe->mcg_lock 91 */ 92 static void __rxe_remove_mcg(struct rxe_mcg *mcg) 93 { 94 rb_erase(&mcg->node, &mcg->rxe->mcg_tree); 95 } 96 97 /** 98 * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock 99 * @rxe: rxe device object 100 * @mgid: multicast IP address 101 * 102 * Context: caller must hold rxe->mcg_lock 103 * Returns: mcg on success and takes a ref to mcg else NULL 104 */ 105 static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, 106 union ib_gid *mgid) 107 { 108 struct rb_root *tree = &rxe->mcg_tree; 109 struct rxe_mcg *mcg; 110 struct rb_node *node; 111 int cmp; 112 113 node = tree->rb_node; 114 115 while (node) { 116 mcg = rb_entry(node, struct rxe_mcg, node); 117 118 cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); 119 120 if (cmp > 0) 121 node = node->rb_left; 122 else if (cmp < 0) 123 node = node->rb_right; 124 else 125 break; 126 } 127 128 if (node) { 129 kref_get(&mcg->ref_cnt); 130 return mcg; 131 } 132 133 return NULL; 134 } 135 136 /** 137 * rxe_lookup_mcg - lookup up mcg in red-back tree 138 * @rxe: rxe device object 139 * @mgid: multicast IP address 140 * 141 * Returns: mcg if found else NULL 142 */ 143 struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 144 { 145 struct rxe_mcg *mcg; 146 147 spin_lock_bh(&rxe->mcg_lock); 148 mcg = __rxe_lookup_mcg(rxe, mgid); 149 spin_unlock_bh(&rxe->mcg_lock); 150 151 return mcg; 152 } 153 154 /** 155 * __rxe_init_mcg - initialize a new mcg 156 * @rxe: rxe device 157 * @mgid: multicast address as a gid 158 * @mcg: new mcg object 159 * 160 * Context: caller should hold rxe->mcg lock 161 */ 162 static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, 163 struct rxe_mcg *mcg) 164 { 165 kref_init(&mcg->ref_cnt); 166 memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); 167 INIT_LIST_HEAD(&mcg->qp_list); 168 mcg->rxe = rxe; 169 170 /* caller holds a ref on mcg but that will be 171 * dropped when mcg goes out of scope. We need to take a ref 172 * on the pointer that will be saved in the red-black tree 173 * by __rxe_insert_mcg and used to lookup mcg from mgid later. 174 * Inserting mcg makes it visible to outside so this should 175 * be done last after the object is ready. 176 */ 177 kref_get(&mcg->ref_cnt); 178 __rxe_insert_mcg(mcg); 179 } 180 181 /** 182 * rxe_get_mcg - lookup or allocate a mcg 183 * @rxe: rxe device object 184 * @mgid: multicast IP address as a gid 185 * 186 * Returns: mcg on success else ERR_PTR(error) 187 */ 188 static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 189 { 190 struct rxe_mcg *mcg, *tmp; 191 int err; 192 193 if (rxe->attr.max_mcast_grp == 0) 194 return ERR_PTR(-EINVAL); 195 196 /* check to see if mcg already exists */ 197 mcg = rxe_lookup_mcg(rxe, mgid); 198 if (mcg) 199 return mcg; 200 201 /* check to see if we have reached limit */ 202 if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { 203 err = -ENOMEM; 204 goto err_dec; 205 } 206 207 /* speculative alloc of new mcg */ 208 mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); 209 if (!mcg) { 210 err = -ENOMEM; 211 goto err_dec; 212 } 213 214 spin_lock_bh(&rxe->mcg_lock); 215 /* re-check to see if someone else just added it */ 216 tmp = __rxe_lookup_mcg(rxe, mgid); 217 if (tmp) { 218 spin_unlock_bh(&rxe->mcg_lock); 219 atomic_dec(&rxe->mcg_num); 220 kfree(mcg); 221 return tmp; 222 } 223 224 __rxe_init_mcg(rxe, mgid, mcg); 225 spin_unlock_bh(&rxe->mcg_lock); 226 227 /* add mcast address outside of lock */ 228 err = rxe_mcast_add(rxe, mgid); 229 if (!err) 230 return mcg; 231 232 kfree(mcg); 233 err_dec: 234 atomic_dec(&rxe->mcg_num); 235 return ERR_PTR(err); 236 } 237 238 /** 239 * rxe_cleanup_mcg - cleanup mcg for kref_put 240 * @kref: struct kref embnedded in mcg 241 */ 242 void rxe_cleanup_mcg(struct kref *kref) 243 { 244 struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); 245 246 kfree(mcg); 247 } 248 249 /** 250 * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock 251 * @mcg: the mcg object 252 * 253 * Context: caller is holding rxe->mcg_lock 254 * no qp's are attached to mcg 255 */ 256 static void __rxe_destroy_mcg(struct rxe_mcg *mcg) 257 { 258 struct rxe_dev *rxe = mcg->rxe; 259 260 /* remove mcg from red-black tree then drop ref */ 261 __rxe_remove_mcg(mcg); 262 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 263 264 atomic_dec(&rxe->mcg_num); 265 } 266 267 /** 268 * rxe_destroy_mcg - destroy mcg object 269 * @mcg: the mcg object 270 * 271 * Context: no qp's are attached to mcg 272 */ 273 static void rxe_destroy_mcg(struct rxe_mcg *mcg) 274 { 275 /* delete mcast address outside of lock */ 276 rxe_mcast_del(mcg->rxe, &mcg->mgid); 277 278 spin_lock_bh(&mcg->rxe->mcg_lock); 279 __rxe_destroy_mcg(mcg); 280 spin_unlock_bh(&mcg->rxe->mcg_lock); 281 } 282 283 /** 284 * __rxe_init_mca - initialize a new mca holding lock 285 * @qp: qp object 286 * @mcg: mcg object 287 * @mca: empty space for new mca 288 * 289 * Context: caller must hold references on qp and mcg, rxe->mcg_lock 290 * and pass memory for new mca 291 * 292 * Returns: 0 on success else an error 293 */ 294 static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, 295 struct rxe_mca *mca) 296 { 297 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 298 int n; 299 300 n = atomic_inc_return(&rxe->mcg_attach); 301 if (n > rxe->attr.max_total_mcast_qp_attach) { 302 atomic_dec(&rxe->mcg_attach); 303 return -ENOMEM; 304 } 305 306 n = atomic_inc_return(&mcg->qp_num); 307 if (n > rxe->attr.max_mcast_qp_attach) { 308 atomic_dec(&mcg->qp_num); 309 atomic_dec(&rxe->mcg_attach); 310 return -ENOMEM; 311 } 312 313 atomic_inc(&qp->mcg_num); 314 315 rxe_get(qp); 316 mca->qp = qp; 317 318 list_add_tail(&mca->qp_list, &mcg->qp_list); 319 320 return 0; 321 } 322 323 /** 324 * rxe_attach_mcg - attach qp to mcg if not already attached 325 * @qp: qp object 326 * @mcg: mcg object 327 * 328 * Context: caller must hold reference on qp and mcg. 329 * Returns: 0 on success else an error 330 */ 331 static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 332 { 333 struct rxe_dev *rxe = mcg->rxe; 334 struct rxe_mca *mca, *tmp; 335 int err; 336 337 /* check to see if the qp is already a member of the group */ 338 spin_lock_bh(&rxe->mcg_lock); 339 list_for_each_entry(mca, &mcg->qp_list, qp_list) { 340 if (mca->qp == qp) { 341 spin_unlock_bh(&rxe->mcg_lock); 342 return 0; 343 } 344 } 345 spin_unlock_bh(&rxe->mcg_lock); 346 347 /* speculative alloc new mca without using GFP_ATOMIC */ 348 mca = kzalloc(sizeof(*mca), GFP_KERNEL); 349 if (!mca) 350 return -ENOMEM; 351 352 spin_lock_bh(&rxe->mcg_lock); 353 /* re-check to see if someone else just attached qp */ 354 list_for_each_entry(tmp, &mcg->qp_list, qp_list) { 355 if (tmp->qp == qp) { 356 kfree(mca); 357 err = 0; 358 goto out; 359 } 360 } 361 362 err = __rxe_init_mca(qp, mcg, mca); 363 if (err) 364 kfree(mca); 365 out: 366 spin_unlock_bh(&rxe->mcg_lock); 367 return err; 368 } 369 370 /** 371 * __rxe_cleanup_mca - cleanup mca object holding lock 372 * @mca: mca object 373 * @mcg: mcg object 374 * 375 * Context: caller must hold a reference to mcg and rxe->mcg_lock 376 */ 377 static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) 378 { 379 list_del(&mca->qp_list); 380 381 atomic_dec(&mcg->qp_num); 382 atomic_dec(&mcg->rxe->mcg_attach); 383 atomic_dec(&mca->qp->mcg_num); 384 rxe_put(mca->qp); 385 386 kfree(mca); 387 } 388 389 /** 390 * rxe_detach_mcg - detach qp from mcg 391 * @mcg: mcg object 392 * @qp: qp object 393 * 394 * Returns: 0 on success else an error if qp is not attached. 395 */ 396 static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 397 { 398 struct rxe_dev *rxe = mcg->rxe; 399 struct rxe_mca *mca, *tmp; 400 401 spin_lock_bh(&rxe->mcg_lock); 402 list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { 403 if (mca->qp == qp) { 404 __rxe_cleanup_mca(mca, mcg); 405 406 /* if the number of qp's attached to the 407 * mcast group falls to zero go ahead and 408 * tear it down. This will not free the 409 * object since we are still holding a ref 410 * from the caller 411 */ 412 if (atomic_read(&mcg->qp_num) <= 0) 413 __rxe_destroy_mcg(mcg); 414 415 spin_unlock_bh(&rxe->mcg_lock); 416 return 0; 417 } 418 } 419 420 /* we didn't find the qp on the list */ 421 spin_unlock_bh(&rxe->mcg_lock); 422 return -EINVAL; 423 } 424 425 /** 426 * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) 427 * @ibqp: (IB) qp object 428 * @mgid: multicast IP address 429 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 430 * 431 * Returns: 0 on success else an errno 432 */ 433 int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 434 { 435 int err; 436 struct rxe_dev *rxe = to_rdev(ibqp->device); 437 struct rxe_qp *qp = to_rqp(ibqp); 438 struct rxe_mcg *mcg; 439 440 /* takes a ref on mcg if successful */ 441 mcg = rxe_get_mcg(rxe, mgid); 442 if (IS_ERR(mcg)) 443 return PTR_ERR(mcg); 444 445 err = rxe_attach_mcg(mcg, qp); 446 447 /* if we failed to attach the first qp to mcg tear it down */ 448 if (atomic_read(&mcg->qp_num) == 0) 449 rxe_destroy_mcg(mcg); 450 451 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 452 453 return err; 454 } 455 456 /** 457 * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) 458 * @ibqp: address of (IB) qp object 459 * @mgid: multicast IP address 460 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 461 * 462 * Returns: 0 on success else an errno 463 */ 464 int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 465 { 466 struct rxe_dev *rxe = to_rdev(ibqp->device); 467 struct rxe_qp *qp = to_rqp(ibqp); 468 struct rxe_mcg *mcg; 469 int err; 470 471 mcg = rxe_lookup_mcg(rxe, mgid); 472 if (!mcg) 473 return -EINVAL; 474 475 err = rxe_detach_mcg(mcg, qp); 476 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 477 478 return err; 479 } 480