1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Management Component Transport Protocol (MCTP) - routing 4 * implementation. 5 * 6 * This is currently based on a simple routing table, with no dst cache. The 7 * number of routes should stay fairly small, so the lookup cost is small. 8 * 9 * Copyright (c) 2021 Code Construct 10 * Copyright (c) 2021 Google 11 */ 12 13 #include <linux/idr.h> 14 #include <linux/kconfig.h> 15 #include <linux/mctp.h> 16 #include <linux/netdevice.h> 17 #include <linux/rtnetlink.h> 18 #include <linux/skbuff.h> 19 20 #include <uapi/linux/if_arp.h> 21 22 #include <net/mctp.h> 23 #include <net/mctpdevice.h> 24 #include <net/netlink.h> 25 #include <net/sock.h> 26 27 #include <trace/events/mctp.h> 28 29 static const unsigned int mctp_message_maxlen = 64 * 1024; 30 static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; 31 32 static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev); 33 34 /* route output callbacks */ 35 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) 36 { 37 kfree_skb(skb); 38 return 0; 39 } 40 41 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) 42 { 43 struct mctp_skb_cb *cb = mctp_cb(skb); 44 struct mctp_hdr *mh; 45 struct sock *sk; 46 u8 type; 47 48 WARN_ON(!rcu_read_lock_held()); 49 50 /* TODO: look up in skb->cb? */ 51 mh = mctp_hdr(skb); 52 53 if (!skb_headlen(skb)) 54 return NULL; 55 56 type = (*(u8 *)skb->data) & 0x7f; 57 58 sk_for_each_rcu(sk, &net->mctp.binds) { 59 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 60 61 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) 62 continue; 63 64 if (msk->bind_type != type) 65 continue; 66 67 if (!mctp_address_matches(msk->bind_addr, mh->dest)) 68 continue; 69 70 return msk; 71 } 72 73 return NULL; 74 } 75 76 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, 77 mctp_eid_t peer, u8 tag) 78 { 79 if (key->local_addr != local) 80 return false; 81 82 if (key->peer_addr != peer) 83 return false; 84 85 if (key->tag != tag) 86 return false; 87 88 return true; 89 } 90 91 /* returns a key (with key->lock held, and refcounted), or NULL if no such 92 * key exists. 93 */ 94 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, 95 mctp_eid_t peer, 96 unsigned long *irqflags) 97 __acquires(&key->lock) 98 { 99 struct mctp_sk_key *key, *ret; 100 unsigned long flags; 101 struct mctp_hdr *mh; 102 u8 tag; 103 104 mh = mctp_hdr(skb); 105 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 106 107 ret = NULL; 108 spin_lock_irqsave(&net->mctp.keys_lock, flags); 109 110 hlist_for_each_entry(key, &net->mctp.keys, hlist) { 111 if (!mctp_key_match(key, mh->dest, peer, tag)) 112 continue; 113 114 spin_lock(&key->lock); 115 if (key->valid) { 116 refcount_inc(&key->refs); 117 ret = key; 118 break; 119 } 120 spin_unlock(&key->lock); 121 } 122 123 if (ret) { 124 spin_unlock(&net->mctp.keys_lock); 125 *irqflags = flags; 126 } else { 127 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 128 } 129 130 return ret; 131 } 132 133 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, 134 mctp_eid_t local, mctp_eid_t peer, 135 u8 tag, gfp_t gfp) 136 { 137 struct mctp_sk_key *key; 138 139 key = kzalloc(sizeof(*key), gfp); 140 if (!key) 141 return NULL; 142 143 key->peer_addr = peer; 144 key->local_addr = local; 145 key->tag = tag; 146 key->sk = &msk->sk; 147 key->valid = true; 148 spin_lock_init(&key->lock); 149 refcount_set(&key->refs, 1); 150 151 return key; 152 } 153 154 void mctp_key_unref(struct mctp_sk_key *key) 155 { 156 unsigned long flags; 157 158 if (!refcount_dec_and_test(&key->refs)) 159 return; 160 161 /* even though no refs exist here, the lock allows us to stay 162 * consistent with the locking requirement of mctp_dev_release_key 163 */ 164 spin_lock_irqsave(&key->lock, flags); 165 mctp_dev_release_key(key->dev, key); 166 spin_unlock_irqrestore(&key->lock, flags); 167 168 kfree(key); 169 } 170 171 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) 172 { 173 struct net *net = sock_net(&msk->sk); 174 struct mctp_sk_key *tmp; 175 unsigned long flags; 176 int rc = 0; 177 178 spin_lock_irqsave(&net->mctp.keys_lock, flags); 179 180 hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { 181 if (mctp_key_match(tmp, key->local_addr, key->peer_addr, 182 key->tag)) { 183 spin_lock(&tmp->lock); 184 if (tmp->valid) 185 rc = -EEXIST; 186 spin_unlock(&tmp->lock); 187 if (rc) 188 break; 189 } 190 } 191 192 if (!rc) { 193 refcount_inc(&key->refs); 194 key->expiry = jiffies + mctp_key_lifetime; 195 timer_reduce(&msk->key_expiry, key->expiry); 196 197 hlist_add_head(&key->hlist, &net->mctp.keys); 198 hlist_add_head(&key->sklist, &msk->keys); 199 } 200 201 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 202 203 return rc; 204 } 205 206 /* We're done with the key; unset valid and remove from lists. There may still 207 * be outstanding refs on the key though... 208 */ 209 static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, 210 unsigned long flags) 211 __releases(&key->lock) 212 { 213 struct sk_buff *skb; 214 215 skb = key->reasm_head; 216 key->reasm_head = NULL; 217 key->reasm_dead = true; 218 key->valid = false; 219 mctp_dev_release_key(key->dev, key); 220 spin_unlock_irqrestore(&key->lock, flags); 221 222 spin_lock_irqsave(&net->mctp.keys_lock, flags); 223 hlist_del(&key->hlist); 224 hlist_del(&key->sklist); 225 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 226 227 /* one unref for the lists */ 228 mctp_key_unref(key); 229 230 /* and one for the local reference */ 231 mctp_key_unref(key); 232 233 kfree_skb(skb); 234 } 235 236 #ifdef CONFIG_MCTP_FLOWS 237 static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) 238 { 239 struct mctp_flow *flow; 240 241 flow = skb_ext_add(skb, SKB_EXT_MCTP); 242 if (!flow) 243 return; 244 245 refcount_inc(&key->refs); 246 flow->key = key; 247 } 248 249 static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) 250 { 251 struct mctp_sk_key *key; 252 struct mctp_flow *flow; 253 254 flow = skb_ext_find(skb, SKB_EXT_MCTP); 255 if (!flow) 256 return; 257 258 key = flow->key; 259 260 if (WARN_ON(key->dev && key->dev != dev)) 261 return; 262 263 mctp_dev_set_key(dev, key); 264 } 265 #else 266 static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} 267 static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {} 268 #endif 269 270 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) 271 { 272 struct mctp_hdr *hdr = mctp_hdr(skb); 273 u8 exp_seq, this_seq; 274 275 this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) 276 & MCTP_HDR_SEQ_MASK; 277 278 if (!key->reasm_head) { 279 key->reasm_head = skb; 280 key->reasm_tailp = &(skb_shinfo(skb)->frag_list); 281 key->last_seq = this_seq; 282 return 0; 283 } 284 285 exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; 286 287 if (this_seq != exp_seq) 288 return -EINVAL; 289 290 if (key->reasm_head->len + skb->len > mctp_message_maxlen) 291 return -EINVAL; 292 293 skb->next = NULL; 294 skb->sk = NULL; 295 *key->reasm_tailp = skb; 296 key->reasm_tailp = &skb->next; 297 298 key->last_seq = this_seq; 299 300 key->reasm_head->data_len += skb->len; 301 key->reasm_head->len += skb->len; 302 key->reasm_head->truesize += skb->truesize; 303 304 return 0; 305 } 306 307 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) 308 { 309 struct net *net = dev_net(skb->dev); 310 struct mctp_sk_key *key; 311 struct mctp_sock *msk; 312 struct mctp_hdr *mh; 313 unsigned long f; 314 u8 tag, flags; 315 int rc; 316 317 msk = NULL; 318 rc = -EINVAL; 319 320 /* we may be receiving a locally-routed packet; drop source sk 321 * accounting 322 */ 323 skb_orphan(skb); 324 325 /* ensure we have enough data for a header and a type */ 326 if (skb->len < sizeof(struct mctp_hdr) + 1) 327 goto out; 328 329 /* grab header, advance data ptr */ 330 mh = mctp_hdr(skb); 331 skb_pull(skb, sizeof(struct mctp_hdr)); 332 333 if (mh->ver != 1) 334 goto out; 335 336 flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); 337 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 338 339 rcu_read_lock(); 340 341 /* lookup socket / reasm context, exactly matching (src,dest,tag). 342 * we hold a ref on the key, and key->lock held. 343 */ 344 key = mctp_lookup_key(net, skb, mh->src, &f); 345 346 if (flags & MCTP_HDR_FLAG_SOM) { 347 if (key) { 348 msk = container_of(key->sk, struct mctp_sock, sk); 349 } else { 350 /* first response to a broadcast? do a more general 351 * key lookup to find the socket, but don't use this 352 * key for reassembly - we'll create a more specific 353 * one for future packets if required (ie, !EOM). 354 */ 355 key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); 356 if (key) { 357 msk = container_of(key->sk, 358 struct mctp_sock, sk); 359 spin_unlock_irqrestore(&key->lock, f); 360 mctp_key_unref(key); 361 key = NULL; 362 } 363 } 364 365 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) 366 msk = mctp_lookup_bind(net, skb); 367 368 if (!msk) { 369 rc = -ENOENT; 370 goto out_unlock; 371 } 372 373 /* single-packet message? deliver to socket, clean up any 374 * pending key. 375 */ 376 if (flags & MCTP_HDR_FLAG_EOM) { 377 sock_queue_rcv_skb(&msk->sk, skb); 378 if (key) { 379 /* we've hit a pending reassembly; not much we 380 * can do but drop it 381 */ 382 trace_mctp_key_release(key, 383 MCTP_TRACE_KEY_REPLIED); 384 __mctp_key_unlock_drop(key, net, f); 385 key = NULL; 386 } 387 rc = 0; 388 goto out_unlock; 389 } 390 391 /* broadcast response or a bind() - create a key for further 392 * packets for this message 393 */ 394 if (!key) { 395 key = mctp_key_alloc(msk, mh->dest, mh->src, 396 tag, GFP_ATOMIC); 397 if (!key) { 398 rc = -ENOMEM; 399 goto out_unlock; 400 } 401 402 /* we can queue without the key lock here, as the 403 * key isn't observable yet 404 */ 405 mctp_frag_queue(key, skb); 406 407 /* if the key_add fails, we've raced with another 408 * SOM packet with the same src, dest and tag. There's 409 * no way to distinguish future packets, so all we 410 * can do is drop; we'll free the skb on exit from 411 * this function. 412 */ 413 rc = mctp_key_add(key, msk); 414 if (rc) 415 kfree(key); 416 417 trace_mctp_key_acquire(key); 418 419 /* we don't need to release key->lock on exit */ 420 mctp_key_unref(key); 421 key = NULL; 422 423 } else { 424 if (key->reasm_head || key->reasm_dead) { 425 /* duplicate start? drop everything */ 426 trace_mctp_key_release(key, 427 MCTP_TRACE_KEY_INVALIDATED); 428 __mctp_key_unlock_drop(key, net, f); 429 rc = -EEXIST; 430 key = NULL; 431 } else { 432 rc = mctp_frag_queue(key, skb); 433 } 434 } 435 436 } else if (key) { 437 /* this packet continues a previous message; reassemble 438 * using the message-specific key 439 */ 440 441 /* we need to be continuing an existing reassembly... */ 442 if (!key->reasm_head) 443 rc = -EINVAL; 444 else 445 rc = mctp_frag_queue(key, skb); 446 447 /* end of message? deliver to socket, and we're done with 448 * the reassembly/response key 449 */ 450 if (!rc && flags & MCTP_HDR_FLAG_EOM) { 451 sock_queue_rcv_skb(key->sk, key->reasm_head); 452 key->reasm_head = NULL; 453 trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED); 454 __mctp_key_unlock_drop(key, net, f); 455 key = NULL; 456 } 457 458 } else { 459 /* not a start, no matching key */ 460 rc = -ENOENT; 461 } 462 463 out_unlock: 464 rcu_read_unlock(); 465 if (key) { 466 spin_unlock_irqrestore(&key->lock, f); 467 mctp_key_unref(key); 468 } 469 out: 470 if (rc) 471 kfree_skb(skb); 472 return rc; 473 } 474 475 static unsigned int mctp_route_mtu(struct mctp_route *rt) 476 { 477 return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); 478 } 479 480 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) 481 { 482 struct mctp_skb_cb *cb = mctp_cb(skb); 483 struct mctp_hdr *hdr = mctp_hdr(skb); 484 char daddr_buf[MAX_ADDR_LEN]; 485 char *daddr = NULL; 486 unsigned int mtu; 487 int rc; 488 489 skb->protocol = htons(ETH_P_MCTP); 490 491 mtu = READ_ONCE(skb->dev->mtu); 492 if (skb->len > mtu) { 493 kfree_skb(skb); 494 return -EMSGSIZE; 495 } 496 497 if (cb->ifindex) { 498 /* direct route; use the hwaddr we stashed in sendmsg */ 499 daddr = cb->haddr; 500 } else { 501 /* If lookup fails let the device handle daddr==NULL */ 502 if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) 503 daddr = daddr_buf; 504 } 505 506 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), 507 daddr, skb->dev->dev_addr, skb->len); 508 if (rc) { 509 kfree_skb(skb); 510 return -EHOSTUNREACH; 511 } 512 513 mctp_flow_prepare_output(skb, route->dev); 514 515 rc = dev_queue_xmit(skb); 516 if (rc) 517 rc = net_xmit_errno(rc); 518 519 return rc; 520 } 521 522 /* route alloc/release */ 523 static void mctp_route_release(struct mctp_route *rt) 524 { 525 if (refcount_dec_and_test(&rt->refs)) { 526 mctp_dev_put(rt->dev); 527 kfree_rcu(rt, rcu); 528 } 529 } 530 531 /* returns a route with the refcount at 1 */ 532 static struct mctp_route *mctp_route_alloc(void) 533 { 534 struct mctp_route *rt; 535 536 rt = kzalloc(sizeof(*rt), GFP_KERNEL); 537 if (!rt) 538 return NULL; 539 540 INIT_LIST_HEAD(&rt->list); 541 refcount_set(&rt->refs, 1); 542 rt->output = mctp_route_discard; 543 544 return rt; 545 } 546 547 unsigned int mctp_default_net(struct net *net) 548 { 549 return READ_ONCE(net->mctp.default_net); 550 } 551 552 int mctp_default_net_set(struct net *net, unsigned int index) 553 { 554 if (index == 0) 555 return -EINVAL; 556 WRITE_ONCE(net->mctp.default_net, index); 557 return 0; 558 } 559 560 /* tag management */ 561 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, 562 struct mctp_sock *msk) 563 { 564 struct netns_mctp *mns = &net->mctp; 565 566 lockdep_assert_held(&mns->keys_lock); 567 568 key->expiry = jiffies + mctp_key_lifetime; 569 timer_reduce(&msk->key_expiry, key->expiry); 570 571 /* we hold the net->key_lock here, allowing updates to both 572 * then net and sk 573 */ 574 hlist_add_head_rcu(&key->hlist, &mns->keys); 575 hlist_add_head_rcu(&key->sklist, &msk->keys); 576 refcount_inc(&key->refs); 577 } 578 579 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve 580 * it for the socket msk 581 */ 582 static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, 583 mctp_eid_t saddr, 584 mctp_eid_t daddr, u8 *tagp) 585 { 586 struct net *net = sock_net(&msk->sk); 587 struct netns_mctp *mns = &net->mctp; 588 struct mctp_sk_key *key, *tmp; 589 unsigned long flags; 590 u8 tagbits; 591 592 /* for NULL destination EIDs, we may get a response from any peer */ 593 if (daddr == MCTP_ADDR_NULL) 594 daddr = MCTP_ADDR_ANY; 595 596 /* be optimistic, alloc now */ 597 key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); 598 if (!key) 599 return ERR_PTR(-ENOMEM); 600 601 /* 8 possible tag values */ 602 tagbits = 0xff; 603 604 spin_lock_irqsave(&mns->keys_lock, flags); 605 606 /* Walk through the existing keys, looking for potential conflicting 607 * tags. If we find a conflict, clear that bit from tagbits 608 */ 609 hlist_for_each_entry(tmp, &mns->keys, hlist) { 610 /* We can check the lookup fields (*_addr, tag) without the 611 * lock held, they don't change over the lifetime of the key. 612 */ 613 614 /* if we don't own the tag, it can't conflict */ 615 if (tmp->tag & MCTP_HDR_FLAG_TO) 616 continue; 617 618 if (!(mctp_address_matches(tmp->peer_addr, daddr) && 619 tmp->local_addr == saddr)) 620 continue; 621 622 spin_lock(&tmp->lock); 623 /* key must still be valid. If we find a match, clear the 624 * potential tag value 625 */ 626 if (tmp->valid) 627 tagbits &= ~(1 << tmp->tag); 628 spin_unlock(&tmp->lock); 629 630 if (!tagbits) 631 break; 632 } 633 634 if (tagbits) { 635 key->tag = __ffs(tagbits); 636 mctp_reserve_tag(net, key, msk); 637 trace_mctp_key_acquire(key); 638 639 *tagp = key->tag; 640 } 641 642 spin_unlock_irqrestore(&mns->keys_lock, flags); 643 644 if (!tagbits) { 645 kfree(key); 646 return ERR_PTR(-EBUSY); 647 } 648 649 return key; 650 } 651 652 /* routing lookups */ 653 static bool mctp_rt_match_eid(struct mctp_route *rt, 654 unsigned int net, mctp_eid_t eid) 655 { 656 return READ_ONCE(rt->dev->net) == net && 657 rt->min <= eid && rt->max >= eid; 658 } 659 660 /* compares match, used for duplicate prevention */ 661 static bool mctp_rt_compare_exact(struct mctp_route *rt1, 662 struct mctp_route *rt2) 663 { 664 ASSERT_RTNL(); 665 return rt1->dev->net == rt2->dev->net && 666 rt1->min == rt2->min && 667 rt1->max == rt2->max; 668 } 669 670 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, 671 mctp_eid_t daddr) 672 { 673 struct mctp_route *tmp, *rt = NULL; 674 675 list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { 676 /* TODO: add metrics */ 677 if (mctp_rt_match_eid(tmp, dnet, daddr)) { 678 if (refcount_inc_not_zero(&tmp->refs)) { 679 rt = tmp; 680 break; 681 } 682 } 683 } 684 685 return rt; 686 } 687 688 static struct mctp_route *mctp_route_lookup_null(struct net *net, 689 struct net_device *dev) 690 { 691 struct mctp_route *rt; 692 693 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 694 if (rt->dev->dev == dev && rt->type == RTN_LOCAL && 695 refcount_inc_not_zero(&rt->refs)) 696 return rt; 697 } 698 699 return NULL; 700 } 701 702 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, 703 unsigned int mtu, u8 tag) 704 { 705 const unsigned int hlen = sizeof(struct mctp_hdr); 706 struct mctp_hdr *hdr, *hdr2; 707 unsigned int pos, size; 708 struct sk_buff *skb2; 709 int rc; 710 u8 seq; 711 712 hdr = mctp_hdr(skb); 713 seq = 0; 714 rc = 0; 715 716 if (mtu < hlen + 1) { 717 kfree_skb(skb); 718 return -EMSGSIZE; 719 } 720 721 /* we've got the header */ 722 skb_pull(skb, hlen); 723 724 for (pos = 0; pos < skb->len;) { 725 /* size of message payload */ 726 size = min(mtu - hlen, skb->len - pos); 727 728 skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL); 729 if (!skb2) { 730 rc = -ENOMEM; 731 break; 732 } 733 734 /* generic skb copy */ 735 skb2->protocol = skb->protocol; 736 skb2->priority = skb->priority; 737 skb2->dev = skb->dev; 738 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); 739 740 if (skb->sk) 741 skb_set_owner_w(skb2, skb->sk); 742 743 /* establish packet */ 744 skb_reserve(skb2, MCTP_HEADER_MAXLEN); 745 skb_reset_network_header(skb2); 746 skb_put(skb2, hlen + size); 747 skb2->transport_header = skb2->network_header + hlen; 748 749 /* copy header fields, calculate SOM/EOM flags & seq */ 750 hdr2 = mctp_hdr(skb2); 751 hdr2->ver = hdr->ver; 752 hdr2->dest = hdr->dest; 753 hdr2->src = hdr->src; 754 hdr2->flags_seq_tag = tag & 755 (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 756 757 if (pos == 0) 758 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; 759 760 if (pos + size == skb->len) 761 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; 762 763 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; 764 765 /* copy message payload */ 766 skb_copy_bits(skb, pos, skb_transport_header(skb2), size); 767 768 /* do route */ 769 rc = rt->output(rt, skb2); 770 if (rc) 771 break; 772 773 seq = (seq + 1) & MCTP_HDR_SEQ_MASK; 774 pos += size; 775 } 776 777 consume_skb(skb); 778 return rc; 779 } 780 781 int mctp_local_output(struct sock *sk, struct mctp_route *rt, 782 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) 783 { 784 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 785 struct mctp_skb_cb *cb = mctp_cb(skb); 786 struct mctp_route tmp_rt; 787 struct mctp_sk_key *key; 788 struct net_device *dev; 789 struct mctp_hdr *hdr; 790 unsigned long flags; 791 unsigned int mtu; 792 mctp_eid_t saddr; 793 bool ext_rt; 794 int rc; 795 u8 tag; 796 797 rc = -ENODEV; 798 799 if (rt) { 800 ext_rt = false; 801 dev = NULL; 802 803 if (WARN_ON(!rt->dev)) 804 goto out_release; 805 806 } else if (cb->ifindex) { 807 ext_rt = true; 808 rt = &tmp_rt; 809 810 rcu_read_lock(); 811 dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); 812 if (!dev) { 813 rcu_read_unlock(); 814 return rc; 815 } 816 817 rt->dev = __mctp_dev_get(dev); 818 rcu_read_unlock(); 819 820 if (!rt->dev) 821 goto out_release; 822 823 /* establish temporary route - we set up enough to keep 824 * mctp_route_output happy 825 */ 826 rt->output = mctp_route_output; 827 rt->mtu = 0; 828 829 } else { 830 return -EINVAL; 831 } 832 833 spin_lock_irqsave(&rt->dev->addrs_lock, flags); 834 if (rt->dev->num_addrs == 0) { 835 rc = -EHOSTUNREACH; 836 } else { 837 /* use the outbound interface's first address as our source */ 838 saddr = rt->dev->addrs[0]; 839 rc = 0; 840 } 841 spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); 842 843 if (rc) 844 goto out_release; 845 846 if (req_tag & MCTP_HDR_FLAG_TO) { 847 key = mctp_alloc_local_tag(msk, saddr, daddr, &tag); 848 if (IS_ERR(key)) { 849 rc = PTR_ERR(key); 850 goto out_release; 851 } 852 mctp_skb_set_flow(skb, key); 853 /* done with the key in this scope */ 854 mctp_key_unref(key); 855 tag |= MCTP_HDR_FLAG_TO; 856 } else { 857 key = NULL; 858 tag = req_tag; 859 } 860 861 skb->protocol = htons(ETH_P_MCTP); 862 skb->priority = 0; 863 skb_reset_transport_header(skb); 864 skb_push(skb, sizeof(struct mctp_hdr)); 865 skb_reset_network_header(skb); 866 skb->dev = rt->dev->dev; 867 868 /* cb->net will have been set on initial ingress */ 869 cb->src = saddr; 870 871 /* set up common header fields */ 872 hdr = mctp_hdr(skb); 873 hdr->ver = 1; 874 hdr->dest = daddr; 875 hdr->src = saddr; 876 877 mtu = mctp_route_mtu(rt); 878 879 if (skb->len + sizeof(struct mctp_hdr) <= mtu) { 880 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | 881 MCTP_HDR_FLAG_EOM | tag; 882 rc = rt->output(rt, skb); 883 } else { 884 rc = mctp_do_fragment_route(rt, skb, mtu, tag); 885 } 886 887 out_release: 888 if (!ext_rt) 889 mctp_route_release(rt); 890 891 dev_put(dev); 892 893 return rc; 894 895 } 896 897 /* route management */ 898 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, 899 unsigned int daddr_extent, unsigned int mtu, 900 unsigned char type) 901 { 902 int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); 903 struct net *net = dev_net(mdev->dev); 904 struct mctp_route *rt, *ert; 905 906 if (!mctp_address_ok(daddr_start)) 907 return -EINVAL; 908 909 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) 910 return -EINVAL; 911 912 switch (type) { 913 case RTN_LOCAL: 914 rtfn = mctp_route_input; 915 break; 916 case RTN_UNICAST: 917 rtfn = mctp_route_output; 918 break; 919 default: 920 return -EINVAL; 921 } 922 923 rt = mctp_route_alloc(); 924 if (!rt) 925 return -ENOMEM; 926 927 rt->min = daddr_start; 928 rt->max = daddr_start + daddr_extent; 929 rt->mtu = mtu; 930 rt->dev = mdev; 931 mctp_dev_hold(rt->dev); 932 rt->type = type; 933 rt->output = rtfn; 934 935 ASSERT_RTNL(); 936 /* Prevent duplicate identical routes. */ 937 list_for_each_entry(ert, &net->mctp.routes, list) { 938 if (mctp_rt_compare_exact(rt, ert)) { 939 mctp_route_release(rt); 940 return -EEXIST; 941 } 942 } 943 944 list_add_rcu(&rt->list, &net->mctp.routes); 945 946 return 0; 947 } 948 949 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, 950 unsigned int daddr_extent, unsigned char type) 951 { 952 struct net *net = dev_net(mdev->dev); 953 struct mctp_route *rt, *tmp; 954 mctp_eid_t daddr_end; 955 bool dropped; 956 957 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) 958 return -EINVAL; 959 960 daddr_end = daddr_start + daddr_extent; 961 dropped = false; 962 963 ASSERT_RTNL(); 964 965 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 966 if (rt->dev == mdev && 967 rt->min == daddr_start && rt->max == daddr_end && 968 rt->type == type) { 969 list_del_rcu(&rt->list); 970 /* TODO: immediate RTM_DELROUTE */ 971 mctp_route_release(rt); 972 dropped = true; 973 } 974 } 975 976 return dropped ? 0 : -ENOENT; 977 } 978 979 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) 980 { 981 return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); 982 } 983 984 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) 985 { 986 return mctp_route_remove(mdev, addr, 0, RTN_LOCAL); 987 } 988 989 /* removes all entries for a given device */ 990 void mctp_route_remove_dev(struct mctp_dev *mdev) 991 { 992 struct net *net = dev_net(mdev->dev); 993 struct mctp_route *rt, *tmp; 994 995 ASSERT_RTNL(); 996 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 997 if (rt->dev == mdev) { 998 list_del_rcu(&rt->list); 999 /* TODO: immediate RTM_DELROUTE */ 1000 mctp_route_release(rt); 1001 } 1002 } 1003 } 1004 1005 /* Incoming packet-handling */ 1006 1007 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, 1008 struct packet_type *pt, 1009 struct net_device *orig_dev) 1010 { 1011 struct net *net = dev_net(dev); 1012 struct mctp_dev *mdev; 1013 struct mctp_skb_cb *cb; 1014 struct mctp_route *rt; 1015 struct mctp_hdr *mh; 1016 1017 rcu_read_lock(); 1018 mdev = __mctp_dev_get(dev); 1019 rcu_read_unlock(); 1020 if (!mdev) { 1021 /* basic non-data sanity checks */ 1022 goto err_drop; 1023 } 1024 1025 if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) 1026 goto err_drop; 1027 1028 skb_reset_transport_header(skb); 1029 skb_reset_network_header(skb); 1030 1031 /* We have enough for a header; decode and route */ 1032 mh = mctp_hdr(skb); 1033 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) 1034 goto err_drop; 1035 1036 /* MCTP drivers must populate halen/haddr */ 1037 if (dev->type == ARPHRD_MCTP) { 1038 cb = mctp_cb(skb); 1039 } else { 1040 cb = __mctp_cb(skb); 1041 cb->halen = 0; 1042 } 1043 cb->net = READ_ONCE(mdev->net); 1044 cb->ifindex = dev->ifindex; 1045 1046 rt = mctp_route_lookup(net, cb->net, mh->dest); 1047 1048 /* NULL EID, but addressed to our physical address */ 1049 if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) 1050 rt = mctp_route_lookup_null(net, dev); 1051 1052 if (!rt) 1053 goto err_drop; 1054 1055 rt->output(rt, skb); 1056 mctp_route_release(rt); 1057 1058 return NET_RX_SUCCESS; 1059 1060 err_drop: 1061 kfree_skb(skb); 1062 return NET_RX_DROP; 1063 } 1064 1065 static struct packet_type mctp_packet_type = { 1066 .type = cpu_to_be16(ETH_P_MCTP), 1067 .func = mctp_pkttype_receive, 1068 }; 1069 1070 /* netlink interface */ 1071 1072 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { 1073 [RTA_DST] = { .type = NLA_U8 }, 1074 [RTA_METRICS] = { .type = NLA_NESTED }, 1075 [RTA_OIF] = { .type = NLA_U32 }, 1076 }; 1077 1078 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. 1079 * tb must hold RTA_MAX+1 elements. 1080 */ 1081 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, 1082 struct netlink_ext_ack *extack, 1083 struct nlattr **tb, struct rtmsg **rtm, 1084 struct mctp_dev **mdev, mctp_eid_t *daddr_start) 1085 { 1086 struct net *net = sock_net(skb->sk); 1087 struct net_device *dev; 1088 unsigned int ifindex; 1089 int rc; 1090 1091 rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, 1092 rta_mctp_policy, extack); 1093 if (rc < 0) { 1094 NL_SET_ERR_MSG(extack, "incorrect format"); 1095 return rc; 1096 } 1097 1098 if (!tb[RTA_DST]) { 1099 NL_SET_ERR_MSG(extack, "dst EID missing"); 1100 return -EINVAL; 1101 } 1102 *daddr_start = nla_get_u8(tb[RTA_DST]); 1103 1104 if (!tb[RTA_OIF]) { 1105 NL_SET_ERR_MSG(extack, "ifindex missing"); 1106 return -EINVAL; 1107 } 1108 ifindex = nla_get_u32(tb[RTA_OIF]); 1109 1110 *rtm = nlmsg_data(nlh); 1111 if ((*rtm)->rtm_family != AF_MCTP) { 1112 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP"); 1113 return -EINVAL; 1114 } 1115 1116 dev = __dev_get_by_index(net, ifindex); 1117 if (!dev) { 1118 NL_SET_ERR_MSG(extack, "bad ifindex"); 1119 return -ENODEV; 1120 } 1121 *mdev = mctp_dev_get_rtnl(dev); 1122 if (!*mdev) 1123 return -ENODEV; 1124 1125 if (dev->flags & IFF_LOOPBACK) { 1126 NL_SET_ERR_MSG(extack, "no routes to loopback"); 1127 return -EINVAL; 1128 } 1129 1130 return 0; 1131 } 1132 1133 static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { 1134 [RTAX_MTU] = { .type = NLA_U32 }, 1135 }; 1136 1137 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1138 struct netlink_ext_ack *extack) 1139 { 1140 struct nlattr *tb[RTA_MAX + 1]; 1141 struct nlattr *tbx[RTAX_MAX + 1]; 1142 mctp_eid_t daddr_start; 1143 struct mctp_dev *mdev; 1144 struct rtmsg *rtm; 1145 unsigned int mtu; 1146 int rc; 1147 1148 rc = mctp_route_nlparse(skb, nlh, extack, tb, 1149 &rtm, &mdev, &daddr_start); 1150 if (rc < 0) 1151 return rc; 1152 1153 if (rtm->rtm_type != RTN_UNICAST) { 1154 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); 1155 return -EINVAL; 1156 } 1157 1158 mtu = 0; 1159 if (tb[RTA_METRICS]) { 1160 rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS], 1161 rta_metrics_policy, NULL); 1162 if (rc < 0) 1163 return rc; 1164 if (tbx[RTAX_MTU]) 1165 mtu = nla_get_u32(tbx[RTAX_MTU]); 1166 } 1167 1168 if (rtm->rtm_type != RTN_UNICAST) 1169 return -EINVAL; 1170 1171 rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, 1172 rtm->rtm_type); 1173 return rc; 1174 } 1175 1176 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1177 struct netlink_ext_ack *extack) 1178 { 1179 struct nlattr *tb[RTA_MAX + 1]; 1180 mctp_eid_t daddr_start; 1181 struct mctp_dev *mdev; 1182 struct rtmsg *rtm; 1183 int rc; 1184 1185 rc = mctp_route_nlparse(skb, nlh, extack, tb, 1186 &rtm, &mdev, &daddr_start); 1187 if (rc < 0) 1188 return rc; 1189 1190 /* we only have unicast routes */ 1191 if (rtm->rtm_type != RTN_UNICAST) 1192 return -EINVAL; 1193 1194 rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST); 1195 return rc; 1196 } 1197 1198 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, 1199 u32 portid, u32 seq, int event, unsigned int flags) 1200 { 1201 struct nlmsghdr *nlh; 1202 struct rtmsg *hdr; 1203 void *metrics; 1204 1205 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); 1206 if (!nlh) 1207 return -EMSGSIZE; 1208 1209 hdr = nlmsg_data(nlh); 1210 hdr->rtm_family = AF_MCTP; 1211 1212 /* we use the _len fields as a number of EIDs, rather than 1213 * a number of bits in the address 1214 */ 1215 hdr->rtm_dst_len = rt->max - rt->min; 1216 hdr->rtm_src_len = 0; 1217 hdr->rtm_tos = 0; 1218 hdr->rtm_table = RT_TABLE_DEFAULT; 1219 hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ 1220 hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ 1221 hdr->rtm_type = rt->type; 1222 1223 if (nla_put_u8(skb, RTA_DST, rt->min)) 1224 goto cancel; 1225 1226 metrics = nla_nest_start_noflag(skb, RTA_METRICS); 1227 if (!metrics) 1228 goto cancel; 1229 1230 if (rt->mtu) { 1231 if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) 1232 goto cancel; 1233 } 1234 1235 nla_nest_end(skb, metrics); 1236 1237 if (rt->dev) { 1238 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) 1239 goto cancel; 1240 } 1241 1242 /* TODO: conditional neighbour physaddr? */ 1243 1244 nlmsg_end(skb, nlh); 1245 1246 return 0; 1247 1248 cancel: 1249 nlmsg_cancel(skb, nlh); 1250 return -EMSGSIZE; 1251 } 1252 1253 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) 1254 { 1255 struct net *net = sock_net(skb->sk); 1256 struct mctp_route *rt; 1257 int s_idx, idx; 1258 1259 /* TODO: allow filtering on route data, possibly under 1260 * cb->strict_check 1261 */ 1262 1263 /* TODO: change to struct overlay */ 1264 s_idx = cb->args[0]; 1265 idx = 0; 1266 1267 rcu_read_lock(); 1268 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 1269 if (idx++ < s_idx) 1270 continue; 1271 if (mctp_fill_rtinfo(skb, rt, 1272 NETLINK_CB(cb->skb).portid, 1273 cb->nlh->nlmsg_seq, 1274 RTM_NEWROUTE, NLM_F_MULTI) < 0) 1275 break; 1276 } 1277 1278 rcu_read_unlock(); 1279 cb->args[0] = idx; 1280 1281 return skb->len; 1282 } 1283 1284 /* net namespace implementation */ 1285 static int __net_init mctp_routes_net_init(struct net *net) 1286 { 1287 struct netns_mctp *ns = &net->mctp; 1288 1289 INIT_LIST_HEAD(&ns->routes); 1290 INIT_HLIST_HEAD(&ns->binds); 1291 mutex_init(&ns->bind_lock); 1292 INIT_HLIST_HEAD(&ns->keys); 1293 spin_lock_init(&ns->keys_lock); 1294 WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); 1295 return 0; 1296 } 1297 1298 static void __net_exit mctp_routes_net_exit(struct net *net) 1299 { 1300 struct mctp_route *rt; 1301 1302 rcu_read_lock(); 1303 list_for_each_entry_rcu(rt, &net->mctp.routes, list) 1304 mctp_route_release(rt); 1305 rcu_read_unlock(); 1306 } 1307 1308 static struct pernet_operations mctp_net_ops = { 1309 .init = mctp_routes_net_init, 1310 .exit = mctp_routes_net_exit, 1311 }; 1312 1313 int __init mctp_routes_init(void) 1314 { 1315 dev_add_pack(&mctp_packet_type); 1316 1317 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, 1318 NULL, mctp_dump_rtinfo, 0); 1319 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, 1320 mctp_newroute, NULL, 0); 1321 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, 1322 mctp_delroute, NULL, 0); 1323 1324 return register_pernet_subsys(&mctp_net_ops); 1325 } 1326 1327 void __exit mctp_routes_exit(void) 1328 { 1329 unregister_pernet_subsys(&mctp_net_ops); 1330 rtnl_unregister(PF_MCTP, RTM_DELROUTE); 1331 rtnl_unregister(PF_MCTP, RTM_NEWROUTE); 1332 rtnl_unregister(PF_MCTP, RTM_GETROUTE); 1333 dev_remove_pack(&mctp_packet_type); 1334 } 1335 1336 #if IS_ENABLED(CONFIG_MCTP_TEST) 1337 #include "test/route-test.c" 1338 #endif 1339