1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Management Component Transport Protocol (MCTP) - routing 4 * implementation. 5 * 6 * This is currently based on a simple routing table, with no dst cache. The 7 * number of routes should stay fairly small, so the lookup cost is small. 8 * 9 * Copyright (c) 2021 Code Construct 10 * Copyright (c) 2021 Google 11 */ 12 13 #include <linux/idr.h> 14 #include <linux/mctp.h> 15 #include <linux/netdevice.h> 16 #include <linux/rtnetlink.h> 17 #include <linux/skbuff.h> 18 19 #include <uapi/linux/if_arp.h> 20 21 #include <net/mctp.h> 22 #include <net/mctpdevice.h> 23 #include <net/netlink.h> 24 #include <net/sock.h> 25 26 static const unsigned int mctp_message_maxlen = 64 * 1024; 27 28 /* route output callbacks */ 29 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) 30 { 31 kfree_skb(skb); 32 return 0; 33 } 34 35 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) 36 { 37 struct mctp_skb_cb *cb = mctp_cb(skb); 38 struct mctp_hdr *mh; 39 struct sock *sk; 40 u8 type; 41 42 WARN_ON(!rcu_read_lock_held()); 43 44 /* TODO: look up in skb->cb? */ 45 mh = mctp_hdr(skb); 46 47 if (!skb_headlen(skb)) 48 return NULL; 49 50 type = (*(u8 *)skb->data) & 0x7f; 51 52 sk_for_each_rcu(sk, &net->mctp.binds) { 53 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 54 55 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) 56 continue; 57 58 if (msk->bind_type != type) 59 continue; 60 61 if (msk->bind_addr != MCTP_ADDR_ANY && 62 msk->bind_addr != mh->dest) 63 continue; 64 65 return msk; 66 } 67 68 return NULL; 69 } 70 71 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, 72 mctp_eid_t peer, u8 tag) 73 { 74 if (key->local_addr != local) 75 return false; 76 77 if (key->peer_addr != peer) 78 return false; 79 80 if (key->tag != tag) 81 return false; 82 83 return true; 84 } 85 86 /* returns a key (with key->lock held, and refcounted), or NULL if no such 87 * key exists. 88 */ 89 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, 90 mctp_eid_t peer, 91 unsigned long *irqflags) 92 __acquires(&key->lock) 93 { 94 struct mctp_sk_key *key, *ret; 95 unsigned long flags; 96 struct mctp_hdr *mh; 97 u8 tag; 98 99 mh = mctp_hdr(skb); 100 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 101 102 ret = NULL; 103 spin_lock_irqsave(&net->mctp.keys_lock, flags); 104 105 hlist_for_each_entry(key, &net->mctp.keys, hlist) { 106 if (!mctp_key_match(key, mh->dest, peer, tag)) 107 continue; 108 109 spin_lock(&key->lock); 110 if (key->valid) { 111 refcount_inc(&key->refs); 112 ret = key; 113 break; 114 } 115 spin_unlock(&key->lock); 116 } 117 118 if (ret) { 119 spin_unlock(&net->mctp.keys_lock); 120 *irqflags = flags; 121 } else { 122 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 123 } 124 125 return ret; 126 } 127 128 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, 129 mctp_eid_t local, mctp_eid_t peer, 130 u8 tag, gfp_t gfp) 131 { 132 struct mctp_sk_key *key; 133 134 key = kzalloc(sizeof(*key), gfp); 135 if (!key) 136 return NULL; 137 138 key->peer_addr = peer; 139 key->local_addr = local; 140 key->tag = tag; 141 key->sk = &msk->sk; 142 key->valid = true; 143 spin_lock_init(&key->lock); 144 refcount_set(&key->refs, 1); 145 146 return key; 147 } 148 149 void mctp_key_unref(struct mctp_sk_key *key) 150 { 151 if (refcount_dec_and_test(&key->refs)) 152 kfree(key); 153 } 154 155 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) 156 { 157 struct net *net = sock_net(&msk->sk); 158 struct mctp_sk_key *tmp; 159 unsigned long flags; 160 int rc = 0; 161 162 spin_lock_irqsave(&net->mctp.keys_lock, flags); 163 164 hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { 165 if (mctp_key_match(tmp, key->local_addr, key->peer_addr, 166 key->tag)) { 167 spin_lock(&tmp->lock); 168 if (tmp->valid) 169 rc = -EEXIST; 170 spin_unlock(&tmp->lock); 171 if (rc) 172 break; 173 } 174 } 175 176 if (!rc) { 177 refcount_inc(&key->refs); 178 hlist_add_head(&key->hlist, &net->mctp.keys); 179 hlist_add_head(&key->sklist, &msk->keys); 180 } 181 182 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 183 184 return rc; 185 } 186 187 /* We're done with the key; unset valid and remove from lists. There may still 188 * be outstanding refs on the key though... 189 */ 190 static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, 191 unsigned long flags) 192 __releases(&key->lock) 193 { 194 struct sk_buff *skb; 195 196 skb = key->reasm_head; 197 key->reasm_head = NULL; 198 key->reasm_dead = true; 199 key->valid = false; 200 spin_unlock_irqrestore(&key->lock, flags); 201 202 spin_lock_irqsave(&net->mctp.keys_lock, flags); 203 hlist_del(&key->hlist); 204 hlist_del(&key->sklist); 205 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 206 207 /* one unref for the lists */ 208 mctp_key_unref(key); 209 210 /* and one for the local reference */ 211 mctp_key_unref(key); 212 213 if (skb) 214 kfree_skb(skb); 215 216 } 217 218 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) 219 { 220 struct mctp_hdr *hdr = mctp_hdr(skb); 221 u8 exp_seq, this_seq; 222 223 this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) 224 & MCTP_HDR_SEQ_MASK; 225 226 if (!key->reasm_head) { 227 key->reasm_head = skb; 228 key->reasm_tailp = &(skb_shinfo(skb)->frag_list); 229 key->last_seq = this_seq; 230 return 0; 231 } 232 233 exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; 234 235 if (this_seq != exp_seq) 236 return -EINVAL; 237 238 if (key->reasm_head->len + skb->len > mctp_message_maxlen) 239 return -EINVAL; 240 241 skb->next = NULL; 242 skb->sk = NULL; 243 *key->reasm_tailp = skb; 244 key->reasm_tailp = &skb->next; 245 246 key->last_seq = this_seq; 247 248 key->reasm_head->data_len += skb->len; 249 key->reasm_head->len += skb->len; 250 key->reasm_head->truesize += skb->truesize; 251 252 return 0; 253 } 254 255 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) 256 { 257 struct net *net = dev_net(skb->dev); 258 struct mctp_sk_key *key; 259 struct mctp_sock *msk; 260 struct mctp_hdr *mh; 261 unsigned long f; 262 u8 tag, flags; 263 int rc; 264 265 msk = NULL; 266 rc = -EINVAL; 267 268 /* we may be receiving a locally-routed packet; drop source sk 269 * accounting 270 */ 271 skb_orphan(skb); 272 273 /* ensure we have enough data for a header and a type */ 274 if (skb->len < sizeof(struct mctp_hdr) + 1) 275 goto out; 276 277 /* grab header, advance data ptr */ 278 mh = mctp_hdr(skb); 279 skb_pull(skb, sizeof(struct mctp_hdr)); 280 281 if (mh->ver != 1) 282 goto out; 283 284 flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); 285 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 286 287 rcu_read_lock(); 288 289 /* lookup socket / reasm context, exactly matching (src,dest,tag). 290 * we hold a ref on the key, and key->lock held. 291 */ 292 key = mctp_lookup_key(net, skb, mh->src, &f); 293 294 if (flags & MCTP_HDR_FLAG_SOM) { 295 if (key) { 296 msk = container_of(key->sk, struct mctp_sock, sk); 297 } else { 298 /* first response to a broadcast? do a more general 299 * key lookup to find the socket, but don't use this 300 * key for reassembly - we'll create a more specific 301 * one for future packets if required (ie, !EOM). 302 */ 303 key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); 304 if (key) { 305 msk = container_of(key->sk, 306 struct mctp_sock, sk); 307 spin_unlock_irqrestore(&key->lock, f); 308 mctp_key_unref(key); 309 key = NULL; 310 } 311 } 312 313 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) 314 msk = mctp_lookup_bind(net, skb); 315 316 if (!msk) { 317 rc = -ENOENT; 318 goto out_unlock; 319 } 320 321 /* single-packet message? deliver to socket, clean up any 322 * pending key. 323 */ 324 if (flags & MCTP_HDR_FLAG_EOM) { 325 sock_queue_rcv_skb(&msk->sk, skb); 326 if (key) { 327 /* we've hit a pending reassembly; not much we 328 * can do but drop it 329 */ 330 __mctp_key_unlock_drop(key, net, f); 331 key = NULL; 332 } 333 rc = 0; 334 goto out_unlock; 335 } 336 337 /* broadcast response or a bind() - create a key for further 338 * packets for this message 339 */ 340 if (!key) { 341 key = mctp_key_alloc(msk, mh->dest, mh->src, 342 tag, GFP_ATOMIC); 343 if (!key) { 344 rc = -ENOMEM; 345 goto out_unlock; 346 } 347 348 /* we can queue without the key lock here, as the 349 * key isn't observable yet 350 */ 351 mctp_frag_queue(key, skb); 352 353 /* if the key_add fails, we've raced with another 354 * SOM packet with the same src, dest and tag. There's 355 * no way to distinguish future packets, so all we 356 * can do is drop; we'll free the skb on exit from 357 * this function. 358 */ 359 rc = mctp_key_add(key, msk); 360 if (rc) 361 kfree(key); 362 363 /* we don't need to release key->lock on exit */ 364 key = NULL; 365 366 } else { 367 if (key->reasm_head || key->reasm_dead) { 368 /* duplicate start? drop everything */ 369 __mctp_key_unlock_drop(key, net, f); 370 rc = -EEXIST; 371 key = NULL; 372 } else { 373 rc = mctp_frag_queue(key, skb); 374 } 375 } 376 377 } else if (key) { 378 /* this packet continues a previous message; reassemble 379 * using the message-specific key 380 */ 381 382 /* we need to be continuing an existing reassembly... */ 383 if (!key->reasm_head) 384 rc = -EINVAL; 385 else 386 rc = mctp_frag_queue(key, skb); 387 388 /* end of message? deliver to socket, and we're done with 389 * the reassembly/response key 390 */ 391 if (!rc && flags & MCTP_HDR_FLAG_EOM) { 392 sock_queue_rcv_skb(key->sk, key->reasm_head); 393 key->reasm_head = NULL; 394 __mctp_key_unlock_drop(key, net, f); 395 key = NULL; 396 } 397 398 } else { 399 /* not a start, no matching key */ 400 rc = -ENOENT; 401 } 402 403 out_unlock: 404 rcu_read_unlock(); 405 if (key) { 406 spin_unlock_irqrestore(&key->lock, f); 407 mctp_key_unref(key); 408 } 409 out: 410 if (rc) 411 kfree_skb(skb); 412 return rc; 413 } 414 415 static unsigned int mctp_route_mtu(struct mctp_route *rt) 416 { 417 return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); 418 } 419 420 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) 421 { 422 struct mctp_hdr *hdr = mctp_hdr(skb); 423 char daddr_buf[MAX_ADDR_LEN]; 424 char *daddr = NULL; 425 unsigned int mtu; 426 int rc; 427 428 skb->protocol = htons(ETH_P_MCTP); 429 430 mtu = READ_ONCE(skb->dev->mtu); 431 if (skb->len > mtu) { 432 kfree_skb(skb); 433 return -EMSGSIZE; 434 } 435 436 /* If lookup fails let the device handle daddr==NULL */ 437 if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) 438 daddr = daddr_buf; 439 440 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), 441 daddr, skb->dev->dev_addr, skb->len); 442 if (rc) { 443 kfree_skb(skb); 444 return -EHOSTUNREACH; 445 } 446 447 rc = dev_queue_xmit(skb); 448 if (rc) 449 rc = net_xmit_errno(rc); 450 451 return rc; 452 } 453 454 /* route alloc/release */ 455 static void mctp_route_release(struct mctp_route *rt) 456 { 457 if (refcount_dec_and_test(&rt->refs)) { 458 mctp_dev_put(rt->dev); 459 kfree_rcu(rt, rcu); 460 } 461 } 462 463 /* returns a route with the refcount at 1 */ 464 static struct mctp_route *mctp_route_alloc(void) 465 { 466 struct mctp_route *rt; 467 468 rt = kzalloc(sizeof(*rt), GFP_KERNEL); 469 if (!rt) 470 return NULL; 471 472 INIT_LIST_HEAD(&rt->list); 473 refcount_set(&rt->refs, 1); 474 rt->output = mctp_route_discard; 475 476 return rt; 477 } 478 479 unsigned int mctp_default_net(struct net *net) 480 { 481 return READ_ONCE(net->mctp.default_net); 482 } 483 484 int mctp_default_net_set(struct net *net, unsigned int index) 485 { 486 if (index == 0) 487 return -EINVAL; 488 WRITE_ONCE(net->mctp.default_net, index); 489 return 0; 490 } 491 492 /* tag management */ 493 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, 494 struct mctp_sock *msk) 495 { 496 struct netns_mctp *mns = &net->mctp; 497 498 lockdep_assert_held(&mns->keys_lock); 499 500 /* we hold the net->key_lock here, allowing updates to both 501 * then net and sk 502 */ 503 hlist_add_head_rcu(&key->hlist, &mns->keys); 504 hlist_add_head_rcu(&key->sklist, &msk->keys); 505 refcount_inc(&key->refs); 506 } 507 508 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve 509 * it for the socket msk 510 */ 511 static int mctp_alloc_local_tag(struct mctp_sock *msk, 512 mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp) 513 { 514 struct net *net = sock_net(&msk->sk); 515 struct netns_mctp *mns = &net->mctp; 516 struct mctp_sk_key *key, *tmp; 517 unsigned long flags; 518 int rc = -EAGAIN; 519 u8 tagbits; 520 521 /* for NULL destination EIDs, we may get a response from any peer */ 522 if (daddr == MCTP_ADDR_NULL) 523 daddr = MCTP_ADDR_ANY; 524 525 /* be optimistic, alloc now */ 526 key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); 527 if (!key) 528 return -ENOMEM; 529 530 /* 8 possible tag values */ 531 tagbits = 0xff; 532 533 spin_lock_irqsave(&mns->keys_lock, flags); 534 535 /* Walk through the existing keys, looking for potential conflicting 536 * tags. If we find a conflict, clear that bit from tagbits 537 */ 538 hlist_for_each_entry(tmp, &mns->keys, hlist) { 539 /* We can check the lookup fields (*_addr, tag) without the 540 * lock held, they don't change over the lifetime of the key. 541 */ 542 543 /* if we don't own the tag, it can't conflict */ 544 if (tmp->tag & MCTP_HDR_FLAG_TO) 545 continue; 546 547 if (!((tmp->peer_addr == daddr || 548 tmp->peer_addr == MCTP_ADDR_ANY) && 549 tmp->local_addr == saddr)) 550 continue; 551 552 spin_lock(&tmp->lock); 553 /* key must still be valid. If we find a match, clear the 554 * potential tag value 555 */ 556 if (tmp->valid) 557 tagbits &= ~(1 << tmp->tag); 558 spin_unlock(&tmp->lock); 559 560 if (!tagbits) 561 break; 562 } 563 564 if (tagbits) { 565 key->tag = __ffs(tagbits); 566 mctp_reserve_tag(net, key, msk); 567 *tagp = key->tag; 568 rc = 0; 569 } 570 571 spin_unlock_irqrestore(&mns->keys_lock, flags); 572 573 if (!tagbits) 574 kfree(key); 575 576 return rc; 577 } 578 579 /* routing lookups */ 580 static bool mctp_rt_match_eid(struct mctp_route *rt, 581 unsigned int net, mctp_eid_t eid) 582 { 583 return READ_ONCE(rt->dev->net) == net && 584 rt->min <= eid && rt->max >= eid; 585 } 586 587 /* compares match, used for duplicate prevention */ 588 static bool mctp_rt_compare_exact(struct mctp_route *rt1, 589 struct mctp_route *rt2) 590 { 591 ASSERT_RTNL(); 592 return rt1->dev->net == rt2->dev->net && 593 rt1->min == rt2->min && 594 rt1->max == rt2->max; 595 } 596 597 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, 598 mctp_eid_t daddr) 599 { 600 struct mctp_route *tmp, *rt = NULL; 601 602 list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { 603 /* TODO: add metrics */ 604 if (mctp_rt_match_eid(tmp, dnet, daddr)) { 605 if (refcount_inc_not_zero(&tmp->refs)) { 606 rt = tmp; 607 break; 608 } 609 } 610 } 611 612 return rt; 613 } 614 615 static struct mctp_route *mctp_route_lookup_null(struct net *net, 616 struct net_device *dev) 617 { 618 struct mctp_route *rt; 619 620 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 621 if (rt->dev->dev == dev && rt->type == RTN_LOCAL && 622 refcount_inc_not_zero(&rt->refs)) 623 return rt; 624 } 625 626 return NULL; 627 } 628 629 /* sends a skb to rt and releases the route. */ 630 int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) 631 { 632 int rc; 633 634 rc = rt->output(rt, skb); 635 mctp_route_release(rt); 636 return rc; 637 } 638 639 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, 640 unsigned int mtu, u8 tag) 641 { 642 const unsigned int hlen = sizeof(struct mctp_hdr); 643 struct mctp_hdr *hdr, *hdr2; 644 unsigned int pos, size; 645 struct sk_buff *skb2; 646 int rc; 647 u8 seq; 648 649 hdr = mctp_hdr(skb); 650 seq = 0; 651 rc = 0; 652 653 if (mtu < hlen + 1) { 654 kfree_skb(skb); 655 return -EMSGSIZE; 656 } 657 658 /* we've got the header */ 659 skb_pull(skb, hlen); 660 661 for (pos = 0; pos < skb->len;) { 662 /* size of message payload */ 663 size = min(mtu - hlen, skb->len - pos); 664 665 skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL); 666 if (!skb2) { 667 rc = -ENOMEM; 668 break; 669 } 670 671 /* generic skb copy */ 672 skb2->protocol = skb->protocol; 673 skb2->priority = skb->priority; 674 skb2->dev = skb->dev; 675 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); 676 677 if (skb->sk) 678 skb_set_owner_w(skb2, skb->sk); 679 680 /* establish packet */ 681 skb_reserve(skb2, MCTP_HEADER_MAXLEN); 682 skb_reset_network_header(skb2); 683 skb_put(skb2, hlen + size); 684 skb2->transport_header = skb2->network_header + hlen; 685 686 /* copy header fields, calculate SOM/EOM flags & seq */ 687 hdr2 = mctp_hdr(skb2); 688 hdr2->ver = hdr->ver; 689 hdr2->dest = hdr->dest; 690 hdr2->src = hdr->src; 691 hdr2->flags_seq_tag = tag & 692 (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 693 694 if (pos == 0) 695 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; 696 697 if (pos + size == skb->len) 698 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; 699 700 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; 701 702 /* copy message payload */ 703 skb_copy_bits(skb, pos, skb_transport_header(skb2), size); 704 705 /* do route, but don't drop the rt reference */ 706 rc = rt->output(rt, skb2); 707 if (rc) 708 break; 709 710 seq = (seq + 1) & MCTP_HDR_SEQ_MASK; 711 pos += size; 712 } 713 714 mctp_route_release(rt); 715 consume_skb(skb); 716 return rc; 717 } 718 719 int mctp_local_output(struct sock *sk, struct mctp_route *rt, 720 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) 721 { 722 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 723 struct mctp_skb_cb *cb = mctp_cb(skb); 724 struct mctp_hdr *hdr; 725 unsigned long flags; 726 unsigned int mtu; 727 mctp_eid_t saddr; 728 int rc; 729 u8 tag; 730 731 if (WARN_ON(!rt->dev)) 732 return -EINVAL; 733 734 spin_lock_irqsave(&rt->dev->addrs_lock, flags); 735 if (rt->dev->num_addrs == 0) { 736 rc = -EHOSTUNREACH; 737 } else { 738 /* use the outbound interface's first address as our source */ 739 saddr = rt->dev->addrs[0]; 740 rc = 0; 741 } 742 spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); 743 744 if (rc) 745 return rc; 746 747 if (req_tag & MCTP_HDR_FLAG_TO) { 748 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); 749 if (rc) 750 return rc; 751 tag |= MCTP_HDR_FLAG_TO; 752 } else { 753 tag = req_tag; 754 } 755 756 757 skb->protocol = htons(ETH_P_MCTP); 758 skb->priority = 0; 759 skb_reset_transport_header(skb); 760 skb_push(skb, sizeof(struct mctp_hdr)); 761 skb_reset_network_header(skb); 762 skb->dev = rt->dev->dev; 763 764 /* cb->net will have been set on initial ingress */ 765 cb->src = saddr; 766 767 /* set up common header fields */ 768 hdr = mctp_hdr(skb); 769 hdr->ver = 1; 770 hdr->dest = daddr; 771 hdr->src = saddr; 772 773 mtu = mctp_route_mtu(rt); 774 775 if (skb->len + sizeof(struct mctp_hdr) <= mtu) { 776 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | 777 tag; 778 return mctp_do_route(rt, skb); 779 } else { 780 return mctp_do_fragment_route(rt, skb, mtu, tag); 781 } 782 } 783 784 /* route management */ 785 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, 786 unsigned int daddr_extent, unsigned int mtu, 787 unsigned char type) 788 { 789 int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); 790 struct net *net = dev_net(mdev->dev); 791 struct mctp_route *rt, *ert; 792 793 if (!mctp_address_ok(daddr_start)) 794 return -EINVAL; 795 796 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) 797 return -EINVAL; 798 799 switch (type) { 800 case RTN_LOCAL: 801 rtfn = mctp_route_input; 802 break; 803 case RTN_UNICAST: 804 rtfn = mctp_route_output; 805 break; 806 default: 807 return -EINVAL; 808 } 809 810 rt = mctp_route_alloc(); 811 if (!rt) 812 return -ENOMEM; 813 814 rt->min = daddr_start; 815 rt->max = daddr_start + daddr_extent; 816 rt->mtu = mtu; 817 rt->dev = mdev; 818 mctp_dev_hold(rt->dev); 819 rt->type = type; 820 rt->output = rtfn; 821 822 ASSERT_RTNL(); 823 /* Prevent duplicate identical routes. */ 824 list_for_each_entry(ert, &net->mctp.routes, list) { 825 if (mctp_rt_compare_exact(rt, ert)) { 826 mctp_route_release(rt); 827 return -EEXIST; 828 } 829 } 830 831 list_add_rcu(&rt->list, &net->mctp.routes); 832 833 return 0; 834 } 835 836 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, 837 unsigned int daddr_extent, unsigned char type) 838 { 839 struct net *net = dev_net(mdev->dev); 840 struct mctp_route *rt, *tmp; 841 mctp_eid_t daddr_end; 842 bool dropped; 843 844 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) 845 return -EINVAL; 846 847 daddr_end = daddr_start + daddr_extent; 848 dropped = false; 849 850 ASSERT_RTNL(); 851 852 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 853 if (rt->dev == mdev && 854 rt->min == daddr_start && rt->max == daddr_end && 855 rt->type == type) { 856 list_del_rcu(&rt->list); 857 /* TODO: immediate RTM_DELROUTE */ 858 mctp_route_release(rt); 859 dropped = true; 860 } 861 } 862 863 return dropped ? 0 : -ENOENT; 864 } 865 866 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) 867 { 868 return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); 869 } 870 871 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) 872 { 873 return mctp_route_remove(mdev, addr, 0, RTN_LOCAL); 874 } 875 876 /* removes all entries for a given device */ 877 void mctp_route_remove_dev(struct mctp_dev *mdev) 878 { 879 struct net *net = dev_net(mdev->dev); 880 struct mctp_route *rt, *tmp; 881 882 ASSERT_RTNL(); 883 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 884 if (rt->dev == mdev) { 885 list_del_rcu(&rt->list); 886 /* TODO: immediate RTM_DELROUTE */ 887 mctp_route_release(rt); 888 } 889 } 890 } 891 892 /* Incoming packet-handling */ 893 894 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, 895 struct packet_type *pt, 896 struct net_device *orig_dev) 897 { 898 struct net *net = dev_net(dev); 899 struct mctp_dev *mdev; 900 struct mctp_skb_cb *cb; 901 struct mctp_route *rt; 902 struct mctp_hdr *mh; 903 904 rcu_read_lock(); 905 mdev = __mctp_dev_get(dev); 906 rcu_read_unlock(); 907 if (!mdev) { 908 /* basic non-data sanity checks */ 909 goto err_drop; 910 } 911 912 if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) 913 goto err_drop; 914 915 skb_reset_transport_header(skb); 916 skb_reset_network_header(skb); 917 918 /* We have enough for a header; decode and route */ 919 mh = mctp_hdr(skb); 920 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) 921 goto err_drop; 922 923 cb = __mctp_cb(skb); 924 cb->net = READ_ONCE(mdev->net); 925 926 rt = mctp_route_lookup(net, cb->net, mh->dest); 927 928 /* NULL EID, but addressed to our physical address */ 929 if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) 930 rt = mctp_route_lookup_null(net, dev); 931 932 if (!rt) 933 goto err_drop; 934 935 mctp_do_route(rt, skb); 936 937 return NET_RX_SUCCESS; 938 939 err_drop: 940 kfree_skb(skb); 941 return NET_RX_DROP; 942 } 943 944 static struct packet_type mctp_packet_type = { 945 .type = cpu_to_be16(ETH_P_MCTP), 946 .func = mctp_pkttype_receive, 947 }; 948 949 /* netlink interface */ 950 951 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { 952 [RTA_DST] = { .type = NLA_U8 }, 953 [RTA_METRICS] = { .type = NLA_NESTED }, 954 [RTA_OIF] = { .type = NLA_U32 }, 955 }; 956 957 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. 958 * tb must hold RTA_MAX+1 elements. 959 */ 960 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, 961 struct netlink_ext_ack *extack, 962 struct nlattr **tb, struct rtmsg **rtm, 963 struct mctp_dev **mdev, mctp_eid_t *daddr_start) 964 { 965 struct net *net = sock_net(skb->sk); 966 struct net_device *dev; 967 unsigned int ifindex; 968 int rc; 969 970 rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, 971 rta_mctp_policy, extack); 972 if (rc < 0) { 973 NL_SET_ERR_MSG(extack, "incorrect format"); 974 return rc; 975 } 976 977 if (!tb[RTA_DST]) { 978 NL_SET_ERR_MSG(extack, "dst EID missing"); 979 return -EINVAL; 980 } 981 *daddr_start = nla_get_u8(tb[RTA_DST]); 982 983 if (!tb[RTA_OIF]) { 984 NL_SET_ERR_MSG(extack, "ifindex missing"); 985 return -EINVAL; 986 } 987 ifindex = nla_get_u32(tb[RTA_OIF]); 988 989 *rtm = nlmsg_data(nlh); 990 if ((*rtm)->rtm_family != AF_MCTP) { 991 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP"); 992 return -EINVAL; 993 } 994 995 dev = __dev_get_by_index(net, ifindex); 996 if (!dev) { 997 NL_SET_ERR_MSG(extack, "bad ifindex"); 998 return -ENODEV; 999 } 1000 *mdev = mctp_dev_get_rtnl(dev); 1001 if (!*mdev) 1002 return -ENODEV; 1003 1004 if (dev->flags & IFF_LOOPBACK) { 1005 NL_SET_ERR_MSG(extack, "no routes to loopback"); 1006 return -EINVAL; 1007 } 1008 1009 return 0; 1010 } 1011 1012 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1013 struct netlink_ext_ack *extack) 1014 { 1015 struct nlattr *tb[RTA_MAX + 1]; 1016 mctp_eid_t daddr_start; 1017 struct mctp_dev *mdev; 1018 struct rtmsg *rtm; 1019 unsigned int mtu; 1020 int rc; 1021 1022 rc = mctp_route_nlparse(skb, nlh, extack, tb, 1023 &rtm, &mdev, &daddr_start); 1024 if (rc < 0) 1025 return rc; 1026 1027 if (rtm->rtm_type != RTN_UNICAST) { 1028 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); 1029 return -EINVAL; 1030 } 1031 1032 /* TODO: parse mtu from nlparse */ 1033 mtu = 0; 1034 1035 if (rtm->rtm_type != RTN_UNICAST) 1036 return -EINVAL; 1037 1038 rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, 1039 rtm->rtm_type); 1040 return rc; 1041 } 1042 1043 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1044 struct netlink_ext_ack *extack) 1045 { 1046 struct nlattr *tb[RTA_MAX + 1]; 1047 mctp_eid_t daddr_start; 1048 struct mctp_dev *mdev; 1049 struct rtmsg *rtm; 1050 int rc; 1051 1052 rc = mctp_route_nlparse(skb, nlh, extack, tb, 1053 &rtm, &mdev, &daddr_start); 1054 if (rc < 0) 1055 return rc; 1056 1057 /* we only have unicast routes */ 1058 if (rtm->rtm_type != RTN_UNICAST) 1059 return -EINVAL; 1060 1061 rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST); 1062 return rc; 1063 } 1064 1065 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, 1066 u32 portid, u32 seq, int event, unsigned int flags) 1067 { 1068 struct nlmsghdr *nlh; 1069 struct rtmsg *hdr; 1070 void *metrics; 1071 1072 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); 1073 if (!nlh) 1074 return -EMSGSIZE; 1075 1076 hdr = nlmsg_data(nlh); 1077 hdr->rtm_family = AF_MCTP; 1078 1079 /* we use the _len fields as a number of EIDs, rather than 1080 * a number of bits in the address 1081 */ 1082 hdr->rtm_dst_len = rt->max - rt->min; 1083 hdr->rtm_src_len = 0; 1084 hdr->rtm_tos = 0; 1085 hdr->rtm_table = RT_TABLE_DEFAULT; 1086 hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ 1087 hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ 1088 hdr->rtm_type = rt->type; 1089 1090 if (nla_put_u8(skb, RTA_DST, rt->min)) 1091 goto cancel; 1092 1093 metrics = nla_nest_start_noflag(skb, RTA_METRICS); 1094 if (!metrics) 1095 goto cancel; 1096 1097 if (rt->mtu) { 1098 if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) 1099 goto cancel; 1100 } 1101 1102 nla_nest_end(skb, metrics); 1103 1104 if (rt->dev) { 1105 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) 1106 goto cancel; 1107 } 1108 1109 /* TODO: conditional neighbour physaddr? */ 1110 1111 nlmsg_end(skb, nlh); 1112 1113 return 0; 1114 1115 cancel: 1116 nlmsg_cancel(skb, nlh); 1117 return -EMSGSIZE; 1118 } 1119 1120 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) 1121 { 1122 struct net *net = sock_net(skb->sk); 1123 struct mctp_route *rt; 1124 int s_idx, idx; 1125 1126 /* TODO: allow filtering on route data, possibly under 1127 * cb->strict_check 1128 */ 1129 1130 /* TODO: change to struct overlay */ 1131 s_idx = cb->args[0]; 1132 idx = 0; 1133 1134 rcu_read_lock(); 1135 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 1136 if (idx++ < s_idx) 1137 continue; 1138 if (mctp_fill_rtinfo(skb, rt, 1139 NETLINK_CB(cb->skb).portid, 1140 cb->nlh->nlmsg_seq, 1141 RTM_NEWROUTE, NLM_F_MULTI) < 0) 1142 break; 1143 } 1144 1145 rcu_read_unlock(); 1146 cb->args[0] = idx; 1147 1148 return skb->len; 1149 } 1150 1151 /* net namespace implementation */ 1152 static int __net_init mctp_routes_net_init(struct net *net) 1153 { 1154 struct netns_mctp *ns = &net->mctp; 1155 1156 INIT_LIST_HEAD(&ns->routes); 1157 INIT_HLIST_HEAD(&ns->binds); 1158 mutex_init(&ns->bind_lock); 1159 INIT_HLIST_HEAD(&ns->keys); 1160 spin_lock_init(&ns->keys_lock); 1161 WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); 1162 return 0; 1163 } 1164 1165 static void __net_exit mctp_routes_net_exit(struct net *net) 1166 { 1167 struct mctp_route *rt; 1168 1169 rcu_read_lock(); 1170 list_for_each_entry_rcu(rt, &net->mctp.routes, list) 1171 mctp_route_release(rt); 1172 rcu_read_unlock(); 1173 } 1174 1175 static struct pernet_operations mctp_net_ops = { 1176 .init = mctp_routes_net_init, 1177 .exit = mctp_routes_net_exit, 1178 }; 1179 1180 int __init mctp_routes_init(void) 1181 { 1182 dev_add_pack(&mctp_packet_type); 1183 1184 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, 1185 NULL, mctp_dump_rtinfo, 0); 1186 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, 1187 mctp_newroute, NULL, 0); 1188 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, 1189 mctp_delroute, NULL, 0); 1190 1191 return register_pernet_subsys(&mctp_net_ops); 1192 } 1193 1194 void __exit mctp_routes_exit(void) 1195 { 1196 unregister_pernet_subsys(&mctp_net_ops); 1197 rtnl_unregister(PF_MCTP, RTM_DELROUTE); 1198 rtnl_unregister(PF_MCTP, RTM_NEWROUTE); 1199 rtnl_unregister(PF_MCTP, RTM_GETROUTE); 1200 dev_remove_pack(&mctp_packet_type); 1201 } 1202