1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. 4 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 5 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 6 */ 7 8 /* 9 * rxe_mcast.c implements driver support for multicast transport. 10 * It is based on two data structures struct rxe_mcg ('mcg') and 11 * struct rxe_mca ('mca'). An mcg is allocated each time a qp is 12 * attached to a new mgid for the first time. These are indexed by 13 * a red-black tree using the mgid. This data structure is searched 14 * for the mcg when a multicast packet is received and when another 15 * qp is attached to the same mgid. It is cleaned up when the last qp 16 * is detached from the mcg. Each time a qp is attached to an mcg an 17 * mca is created. It holds a pointer to the qp and is added to a list 18 * of qp's that are attached to the mcg. The qp_list is used to replicate 19 * mcast packets in the rxe receive path. 20 */ 21 22 #include "rxe.h" 23 24 /** 25 * rxe_mcast_add - add multicast address to rxe device 26 * @rxe: rxe device object 27 * @mgid: multicast address as a gid 28 * 29 * Returns 0 on success else an error 30 */ 31 static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 32 { 33 unsigned char ll_addr[ETH_ALEN]; 34 35 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 36 37 return dev_mc_add(rxe->ndev, ll_addr); 38 } 39 40 /** 41 * rxe_mcast_del - delete multicast address from rxe device 42 * @rxe: rxe device object 43 * @mgid: multicast address as a gid 44 * 45 * Returns 0 on success else an error 46 */ 47 static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) 48 { 49 unsigned char ll_addr[ETH_ALEN]; 50 51 ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 52 53 return dev_mc_del(rxe->ndev, ll_addr); 54 } 55 56 /** 57 * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) 58 * @mcg: mcg object with an embedded red-black tree node 59 * 60 * Context: caller must hold a reference to mcg and rxe->mcg_lock and 61 * is responsible to avoid adding the same mcg twice to the tree. 62 */ 63 static void __rxe_insert_mcg(struct rxe_mcg *mcg) 64 { 65 struct rb_root *tree = &mcg->rxe->mcg_tree; 66 struct rb_node **link = &tree->rb_node; 67 struct rb_node *node = NULL; 68 struct rxe_mcg *tmp; 69 int cmp; 70 71 while (*link) { 72 node = *link; 73 tmp = rb_entry(node, struct rxe_mcg, node); 74 75 cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); 76 if (cmp > 0) 77 link = &(*link)->rb_left; 78 else 79 link = &(*link)->rb_right; 80 } 81 82 rb_link_node(&mcg->node, node, link); 83 rb_insert_color(&mcg->node, tree); 84 } 85 86 /** 87 * __rxe_remove_mcg - remove an mcg from red-black tree holding lock 88 * @mcg: mcast group object with an embedded red-black tree node 89 * 90 * Context: caller must hold a reference to mcg and rxe->mcg_lock 91 */ 92 static void __rxe_remove_mcg(struct rxe_mcg *mcg) 93 { 94 rb_erase(&mcg->node, &mcg->rxe->mcg_tree); 95 } 96 97 /** 98 * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock 99 * @rxe: rxe device object 100 * @mgid: multicast IP address 101 * 102 * Context: caller must hold rxe->mcg_lock 103 * Returns: mcg on success and takes a ref to mcg else NULL 104 */ 105 static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, 106 union ib_gid *mgid) 107 { 108 struct rb_root *tree = &rxe->mcg_tree; 109 struct rxe_mcg *mcg; 110 struct rb_node *node; 111 int cmp; 112 113 node = tree->rb_node; 114 115 while (node) { 116 mcg = rb_entry(node, struct rxe_mcg, node); 117 118 cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); 119 120 if (cmp > 0) 121 node = node->rb_left; 122 else if (cmp < 0) 123 node = node->rb_right; 124 else 125 break; 126 } 127 128 if (node) { 129 kref_get(&mcg->ref_cnt); 130 return mcg; 131 } 132 133 return NULL; 134 } 135 136 /** 137 * rxe_lookup_mcg - lookup up mcg in red-back tree 138 * @rxe: rxe device object 139 * @mgid: multicast IP address 140 * 141 * Returns: mcg if found else NULL 142 */ 143 struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 144 { 145 struct rxe_mcg *mcg; 146 147 spin_lock_bh(&rxe->mcg_lock); 148 mcg = __rxe_lookup_mcg(rxe, mgid); 149 spin_unlock_bh(&rxe->mcg_lock); 150 151 return mcg; 152 } 153 154 /** 155 * __rxe_init_mcg - initialize a new mcg 156 * @rxe: rxe device 157 * @mgid: multicast address as a gid 158 * @mcg: new mcg object 159 * 160 * Context: caller should hold rxe->mcg lock 161 */ 162 static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, 163 struct rxe_mcg *mcg) 164 { 165 kref_init(&mcg->ref_cnt); 166 memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); 167 INIT_LIST_HEAD(&mcg->qp_list); 168 mcg->rxe = rxe; 169 170 /* caller holds a ref on mcg but that will be 171 * dropped when mcg goes out of scope. We need to take a ref 172 * on the pointer that will be saved in the red-black tree 173 * by __rxe_insert_mcg and used to lookup mcg from mgid later. 174 * Inserting mcg makes it visible to outside so this should 175 * be done last after the object is ready. 176 */ 177 kref_get(&mcg->ref_cnt); 178 __rxe_insert_mcg(mcg); 179 } 180 181 /** 182 * rxe_get_mcg - lookup or allocate a mcg 183 * @rxe: rxe device object 184 * @mgid: multicast IP address as a gid 185 * 186 * Returns: mcg on success else ERR_PTR(error) 187 */ 188 static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) 189 { 190 struct rxe_mcg *mcg, *tmp; 191 int err; 192 193 if (rxe->attr.max_mcast_grp == 0) 194 return ERR_PTR(-EINVAL); 195 196 /* check to see if mcg already exists */ 197 mcg = rxe_lookup_mcg(rxe, mgid); 198 if (mcg) 199 return mcg; 200 201 /* check to see if we have reached limit */ 202 if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { 203 err = -ENOMEM; 204 goto err_dec; 205 } 206 207 /* speculative alloc of new mcg */ 208 mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); 209 if (!mcg) 210 return ERR_PTR(-ENOMEM); 211 212 spin_lock_bh(&rxe->mcg_lock); 213 /* re-check to see if someone else just added it */ 214 tmp = __rxe_lookup_mcg(rxe, mgid); 215 if (tmp) { 216 spin_unlock_bh(&rxe->mcg_lock); 217 atomic_dec(&rxe->mcg_num); 218 kfree(mcg); 219 return tmp; 220 } 221 222 __rxe_init_mcg(rxe, mgid, mcg); 223 spin_unlock_bh(&rxe->mcg_lock); 224 225 /* add mcast address outside of lock */ 226 err = rxe_mcast_add(rxe, mgid); 227 if (!err) 228 return mcg; 229 230 kfree(mcg); 231 err_dec: 232 atomic_dec(&rxe->mcg_num); 233 return ERR_PTR(err); 234 } 235 236 /** 237 * rxe_cleanup_mcg - cleanup mcg for kref_put 238 * @kref: struct kref embnedded in mcg 239 */ 240 void rxe_cleanup_mcg(struct kref *kref) 241 { 242 struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); 243 244 kfree(mcg); 245 } 246 247 /** 248 * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock 249 * @mcg: the mcg object 250 * 251 * Context: caller is holding rxe->mcg_lock 252 * no qp's are attached to mcg 253 */ 254 static void __rxe_destroy_mcg(struct rxe_mcg *mcg) 255 { 256 struct rxe_dev *rxe = mcg->rxe; 257 258 /* remove mcg from red-black tree then drop ref */ 259 __rxe_remove_mcg(mcg); 260 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 261 262 atomic_dec(&rxe->mcg_num); 263 } 264 265 /** 266 * rxe_destroy_mcg - destroy mcg object 267 * @mcg: the mcg object 268 * 269 * Context: no qp's are attached to mcg 270 */ 271 static void rxe_destroy_mcg(struct rxe_mcg *mcg) 272 { 273 /* delete mcast address outside of lock */ 274 rxe_mcast_del(mcg->rxe, &mcg->mgid); 275 276 spin_lock_bh(&mcg->rxe->mcg_lock); 277 __rxe_destroy_mcg(mcg); 278 spin_unlock_bh(&mcg->rxe->mcg_lock); 279 } 280 281 /** 282 * __rxe_init_mca - initialize a new mca holding lock 283 * @qp: qp object 284 * @mcg: mcg object 285 * @mca: empty space for new mca 286 * 287 * Context: caller must hold references on qp and mcg, rxe->mcg_lock 288 * and pass memory for new mca 289 * 290 * Returns: 0 on success else an error 291 */ 292 static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, 293 struct rxe_mca *mca) 294 { 295 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 296 int n; 297 298 n = atomic_inc_return(&rxe->mcg_attach); 299 if (n > rxe->attr.max_total_mcast_qp_attach) { 300 atomic_dec(&rxe->mcg_attach); 301 return -ENOMEM; 302 } 303 304 n = atomic_inc_return(&mcg->qp_num); 305 if (n > rxe->attr.max_mcast_qp_attach) { 306 atomic_dec(&mcg->qp_num); 307 atomic_dec(&rxe->mcg_attach); 308 return -ENOMEM; 309 } 310 311 atomic_inc(&qp->mcg_num); 312 313 rxe_get(qp); 314 mca->qp = qp; 315 316 list_add_tail(&mca->qp_list, &mcg->qp_list); 317 318 return 0; 319 } 320 321 /** 322 * rxe_attach_mcg - attach qp to mcg if not already attached 323 * @qp: qp object 324 * @mcg: mcg object 325 * 326 * Context: caller must hold reference on qp and mcg. 327 * Returns: 0 on success else an error 328 */ 329 static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 330 { 331 struct rxe_dev *rxe = mcg->rxe; 332 struct rxe_mca *mca, *tmp; 333 int err; 334 335 /* check to see if the qp is already a member of the group */ 336 spin_lock_bh(&rxe->mcg_lock); 337 list_for_each_entry(mca, &mcg->qp_list, qp_list) { 338 if (mca->qp == qp) { 339 spin_unlock_bh(&rxe->mcg_lock); 340 return 0; 341 } 342 } 343 spin_unlock_bh(&rxe->mcg_lock); 344 345 /* speculative alloc new mca without using GFP_ATOMIC */ 346 mca = kzalloc(sizeof(*mca), GFP_KERNEL); 347 if (!mca) 348 return -ENOMEM; 349 350 spin_lock_bh(&rxe->mcg_lock); 351 /* re-check to see if someone else just attached qp */ 352 list_for_each_entry(tmp, &mcg->qp_list, qp_list) { 353 if (tmp->qp == qp) { 354 kfree(mca); 355 err = 0; 356 goto out; 357 } 358 } 359 360 err = __rxe_init_mca(qp, mcg, mca); 361 if (err) 362 kfree(mca); 363 out: 364 spin_unlock_bh(&rxe->mcg_lock); 365 return err; 366 } 367 368 /** 369 * __rxe_cleanup_mca - cleanup mca object holding lock 370 * @mca: mca object 371 * @mcg: mcg object 372 * 373 * Context: caller must hold a reference to mcg and rxe->mcg_lock 374 */ 375 static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) 376 { 377 list_del(&mca->qp_list); 378 379 atomic_dec(&mcg->qp_num); 380 atomic_dec(&mcg->rxe->mcg_attach); 381 atomic_dec(&mca->qp->mcg_num); 382 rxe_put(mca->qp); 383 384 kfree(mca); 385 } 386 387 /** 388 * rxe_detach_mcg - detach qp from mcg 389 * @mcg: mcg object 390 * @qp: qp object 391 * 392 * Returns: 0 on success else an error if qp is not attached. 393 */ 394 static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) 395 { 396 struct rxe_dev *rxe = mcg->rxe; 397 struct rxe_mca *mca, *tmp; 398 399 spin_lock_bh(&rxe->mcg_lock); 400 list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { 401 if (mca->qp == qp) { 402 __rxe_cleanup_mca(mca, mcg); 403 404 /* if the number of qp's attached to the 405 * mcast group falls to zero go ahead and 406 * tear it down. This will not free the 407 * object since we are still holding a ref 408 * from the caller 409 */ 410 if (atomic_read(&mcg->qp_num) <= 0) 411 __rxe_destroy_mcg(mcg); 412 413 spin_unlock_bh(&rxe->mcg_lock); 414 return 0; 415 } 416 } 417 418 /* we didn't find the qp on the list */ 419 spin_unlock_bh(&rxe->mcg_lock); 420 return -EINVAL; 421 } 422 423 /** 424 * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) 425 * @ibqp: (IB) qp object 426 * @mgid: multicast IP address 427 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 428 * 429 * Returns: 0 on success else an errno 430 */ 431 int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 432 { 433 int err; 434 struct rxe_dev *rxe = to_rdev(ibqp->device); 435 struct rxe_qp *qp = to_rqp(ibqp); 436 struct rxe_mcg *mcg; 437 438 /* takes a ref on mcg if successful */ 439 mcg = rxe_get_mcg(rxe, mgid); 440 if (IS_ERR(mcg)) 441 return PTR_ERR(mcg); 442 443 err = rxe_attach_mcg(mcg, qp); 444 445 /* if we failed to attach the first qp to mcg tear it down */ 446 if (atomic_read(&mcg->qp_num) == 0) 447 rxe_destroy_mcg(mcg); 448 449 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 450 451 return err; 452 } 453 454 /** 455 * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) 456 * @ibqp: address of (IB) qp object 457 * @mgid: multicast IP address 458 * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) 459 * 460 * Returns: 0 on success else an errno 461 */ 462 int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) 463 { 464 struct rxe_dev *rxe = to_rdev(ibqp->device); 465 struct rxe_qp *qp = to_rqp(ibqp); 466 struct rxe_mcg *mcg; 467 int err; 468 469 mcg = rxe_lookup_mcg(rxe, mgid); 470 if (!mcg) 471 return -EINVAL; 472 473 err = rxe_detach_mcg(mcg, qp); 474 kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); 475 476 return err; 477 } 478