1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Management Component Transport Protocol (MCTP) - routing 4 * implementation. 5 * 6 * This is currently based on a simple routing table, with no dst cache. The 7 * number of routes should stay fairly small, so the lookup cost is small. 8 * 9 * Copyright (c) 2021 Code Construct 10 * Copyright (c) 2021 Google 11 */ 12 13 #include <linux/idr.h> 14 #include <linux/mctp.h> 15 #include <linux/netdevice.h> 16 #include <linux/rtnetlink.h> 17 #include <linux/skbuff.h> 18 19 #include <uapi/linux/if_arp.h> 20 21 #include <net/mctp.h> 22 #include <net/mctpdevice.h> 23 #include <net/netlink.h> 24 #include <net/sock.h> 25 26 static const unsigned int mctp_message_maxlen = 64 * 1024; 27 28 /* route output callbacks */ 29 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) 30 { 31 kfree_skb(skb); 32 return 0; 33 } 34 35 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) 36 { 37 struct mctp_skb_cb *cb = mctp_cb(skb); 38 struct mctp_hdr *mh; 39 struct sock *sk; 40 u8 type; 41 42 WARN_ON(!rcu_read_lock_held()); 43 44 /* TODO: look up in skb->cb? */ 45 mh = mctp_hdr(skb); 46 47 if (!skb_headlen(skb)) 48 return NULL; 49 50 type = (*(u8 *)skb->data) & 0x7f; 51 52 sk_for_each_rcu(sk, &net->mctp.binds) { 53 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 54 55 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) 56 continue; 57 58 if (msk->bind_type != type) 59 continue; 60 61 if (msk->bind_addr != MCTP_ADDR_ANY && 62 msk->bind_addr != mh->dest) 63 continue; 64 65 return msk; 66 } 67 68 return NULL; 69 } 70 71 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, 72 mctp_eid_t peer, u8 tag) 73 { 74 if (key->local_addr != local) 75 return false; 76 77 if (key->peer_addr != peer) 78 return false; 79 80 if (key->tag != tag) 81 return false; 82 83 return true; 84 } 85 86 /* returns a key (with key->lock held, and refcounted), or NULL if no such 87 * key exists. 88 */ 89 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, 90 mctp_eid_t peer, 91 unsigned long *irqflags) 92 __acquires(&key->lock) 93 { 94 struct mctp_sk_key *key, *ret; 95 unsigned long flags; 96 struct mctp_hdr *mh; 97 u8 tag; 98 99 mh = mctp_hdr(skb); 100 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 101 102 ret = NULL; 103 spin_lock_irqsave(&net->mctp.keys_lock, flags); 104 105 hlist_for_each_entry(key, &net->mctp.keys, hlist) { 106 if (!mctp_key_match(key, mh->dest, peer, tag)) 107 continue; 108 109 spin_lock(&key->lock); 110 if (key->valid) { 111 refcount_inc(&key->refs); 112 ret = key; 113 break; 114 } 115 spin_unlock(&key->lock); 116 } 117 118 if (ret) { 119 spin_unlock(&net->mctp.keys_lock); 120 *irqflags = flags; 121 } else { 122 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 123 } 124 125 return ret; 126 } 127 128 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, 129 mctp_eid_t local, mctp_eid_t peer, 130 u8 tag, gfp_t gfp) 131 { 132 struct mctp_sk_key *key; 133 134 key = kzalloc(sizeof(*key), gfp); 135 if (!key) 136 return NULL; 137 138 key->peer_addr = peer; 139 key->local_addr = local; 140 key->tag = tag; 141 key->sk = &msk->sk; 142 key->valid = true; 143 spin_lock_init(&key->lock); 144 refcount_set(&key->refs, 1); 145 146 return key; 147 } 148 149 void mctp_key_unref(struct mctp_sk_key *key) 150 { 151 if (refcount_dec_and_test(&key->refs)) 152 kfree(key); 153 } 154 155 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) 156 { 157 struct net *net = sock_net(&msk->sk); 158 struct mctp_sk_key *tmp; 159 unsigned long flags; 160 int rc = 0; 161 162 spin_lock_irqsave(&net->mctp.keys_lock, flags); 163 164 hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { 165 if (mctp_key_match(tmp, key->local_addr, key->peer_addr, 166 key->tag)) { 167 spin_lock(&tmp->lock); 168 if (tmp->valid) 169 rc = -EEXIST; 170 spin_unlock(&tmp->lock); 171 if (rc) 172 break; 173 } 174 } 175 176 if (!rc) { 177 refcount_inc(&key->refs); 178 hlist_add_head(&key->hlist, &net->mctp.keys); 179 hlist_add_head(&key->sklist, &msk->keys); 180 } 181 182 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 183 184 return rc; 185 } 186 187 /* We're done with the key; unset valid and remove from lists. There may still 188 * be outstanding refs on the key though... 189 */ 190 static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, 191 unsigned long flags) 192 __releases(&key->lock) 193 { 194 struct sk_buff *skb; 195 196 skb = key->reasm_head; 197 key->reasm_head = NULL; 198 key->reasm_dead = true; 199 key->valid = false; 200 spin_unlock_irqrestore(&key->lock, flags); 201 202 spin_lock_irqsave(&net->mctp.keys_lock, flags); 203 hlist_del(&key->hlist); 204 hlist_del(&key->sklist); 205 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 206 207 /* one unref for the lists */ 208 mctp_key_unref(key); 209 210 /* and one for the local reference */ 211 mctp_key_unref(key); 212 213 if (skb) 214 kfree_skb(skb); 215 216 } 217 218 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) 219 { 220 struct mctp_hdr *hdr = mctp_hdr(skb); 221 u8 exp_seq, this_seq; 222 223 this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) 224 & MCTP_HDR_SEQ_MASK; 225 226 if (!key->reasm_head) { 227 key->reasm_head = skb; 228 key->reasm_tailp = &(skb_shinfo(skb)->frag_list); 229 key->last_seq = this_seq; 230 return 0; 231 } 232 233 exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; 234 235 if (this_seq != exp_seq) 236 return -EINVAL; 237 238 if (key->reasm_head->len + skb->len > mctp_message_maxlen) 239 return -EINVAL; 240 241 skb->next = NULL; 242 skb->sk = NULL; 243 *key->reasm_tailp = skb; 244 key->reasm_tailp = &skb->next; 245 246 key->last_seq = this_seq; 247 248 key->reasm_head->data_len += skb->len; 249 key->reasm_head->len += skb->len; 250 key->reasm_head->truesize += skb->truesize; 251 252 return 0; 253 } 254 255 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) 256 { 257 struct net *net = dev_net(skb->dev); 258 struct mctp_sk_key *key; 259 struct mctp_sock *msk; 260 struct mctp_hdr *mh; 261 unsigned long f; 262 u8 tag, flags; 263 int rc; 264 265 msk = NULL; 266 rc = -EINVAL; 267 268 /* we may be receiving a locally-routed packet; drop source sk 269 * accounting 270 */ 271 skb_orphan(skb); 272 273 /* ensure we have enough data for a header and a type */ 274 if (skb->len < sizeof(struct mctp_hdr) + 1) 275 goto out; 276 277 /* grab header, advance data ptr */ 278 mh = mctp_hdr(skb); 279 skb_pull(skb, sizeof(struct mctp_hdr)); 280 281 if (mh->ver != 1) 282 goto out; 283 284 flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); 285 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 286 287 rcu_read_lock(); 288 289 /* lookup socket / reasm context, exactly matching (src,dest,tag). 290 * we hold a ref on the key, and key->lock held. 291 */ 292 key = mctp_lookup_key(net, skb, mh->src, &f); 293 294 if (flags & MCTP_HDR_FLAG_SOM) { 295 if (key) { 296 msk = container_of(key->sk, struct mctp_sock, sk); 297 } else { 298 /* first response to a broadcast? do a more general 299 * key lookup to find the socket, but don't use this 300 * key for reassembly - we'll create a more specific 301 * one for future packets if required (ie, !EOM). 302 */ 303 key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); 304 if (key) { 305 msk = container_of(key->sk, 306 struct mctp_sock, sk); 307 spin_unlock_irqrestore(&key->lock, f); 308 mctp_key_unref(key); 309 key = NULL; 310 } 311 } 312 313 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) 314 msk = mctp_lookup_bind(net, skb); 315 316 if (!msk) { 317 rc = -ENOENT; 318 goto out_unlock; 319 } 320 321 /* single-packet message? deliver to socket, clean up any 322 * pending key. 323 */ 324 if (flags & MCTP_HDR_FLAG_EOM) { 325 sock_queue_rcv_skb(&msk->sk, skb); 326 if (key) { 327 /* we've hit a pending reassembly; not much we 328 * can do but drop it 329 */ 330 __mctp_key_unlock_drop(key, net, f); 331 key = NULL; 332 } 333 rc = 0; 334 goto out_unlock; 335 } 336 337 /* broadcast response or a bind() - create a key for further 338 * packets for this message 339 */ 340 if (!key) { 341 key = mctp_key_alloc(msk, mh->dest, mh->src, 342 tag, GFP_ATOMIC); 343 if (!key) { 344 rc = -ENOMEM; 345 goto out_unlock; 346 } 347 348 /* we can queue without the key lock here, as the 349 * key isn't observable yet 350 */ 351 mctp_frag_queue(key, skb); 352 353 /* if the key_add fails, we've raced with another 354 * SOM packet with the same src, dest and tag. There's 355 * no way to distinguish future packets, so all we 356 * can do is drop; we'll free the skb on exit from 357 * this function. 358 */ 359 rc = mctp_key_add(key, msk); 360 if (rc) 361 kfree(key); 362 363 /* we don't need to release key->lock on exit */ 364 key = NULL; 365 366 } else { 367 if (key->reasm_head || key->reasm_dead) { 368 /* duplicate start? drop everything */ 369 __mctp_key_unlock_drop(key, net, f); 370 rc = -EEXIST; 371 key = NULL; 372 } else { 373 rc = mctp_frag_queue(key, skb); 374 } 375 } 376 377 } else if (key) { 378 /* this packet continues a previous message; reassemble 379 * using the message-specific key 380 */ 381 382 /* we need to be continuing an existing reassembly... */ 383 if (!key->reasm_head) 384 rc = -EINVAL; 385 else 386 rc = mctp_frag_queue(key, skb); 387 388 /* end of message? deliver to socket, and we're done with 389 * the reassembly/response key 390 */ 391 if (!rc && flags & MCTP_HDR_FLAG_EOM) { 392 sock_queue_rcv_skb(key->sk, key->reasm_head); 393 key->reasm_head = NULL; 394 __mctp_key_unlock_drop(key, net, f); 395 key = NULL; 396 } 397 398 } else { 399 /* not a start, no matching key */ 400 rc = -ENOENT; 401 } 402 403 out_unlock: 404 rcu_read_unlock(); 405 if (key) { 406 spin_unlock_irqrestore(&key->lock, f); 407 mctp_key_unref(key); 408 } 409 out: 410 if (rc) 411 kfree_skb(skb); 412 return rc; 413 } 414 415 static unsigned int mctp_route_mtu(struct mctp_route *rt) 416 { 417 return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); 418 } 419 420 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) 421 { 422 struct mctp_hdr *hdr = mctp_hdr(skb); 423 char daddr_buf[MAX_ADDR_LEN]; 424 char *daddr = NULL; 425 unsigned int mtu; 426 int rc; 427 428 skb->protocol = htons(ETH_P_MCTP); 429 430 mtu = READ_ONCE(skb->dev->mtu); 431 if (skb->len > mtu) { 432 kfree_skb(skb); 433 return -EMSGSIZE; 434 } 435 436 /* If lookup fails let the device handle daddr==NULL */ 437 if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) 438 daddr = daddr_buf; 439 440 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), 441 daddr, skb->dev->dev_addr, skb->len); 442 if (rc) { 443 kfree_skb(skb); 444 return -EHOSTUNREACH; 445 } 446 447 rc = dev_queue_xmit(skb); 448 if (rc) 449 rc = net_xmit_errno(rc); 450 451 return rc; 452 } 453 454 /* route alloc/release */ 455 static void mctp_route_release(struct mctp_route *rt) 456 { 457 if (refcount_dec_and_test(&rt->refs)) { 458 mctp_dev_put(rt->dev); 459 kfree_rcu(rt, rcu); 460 } 461 } 462 463 /* returns a route with the refcount at 1 */ 464 static struct mctp_route *mctp_route_alloc(void) 465 { 466 struct mctp_route *rt; 467 468 rt = kzalloc(sizeof(*rt), GFP_KERNEL); 469 if (!rt) 470 return NULL; 471 472 INIT_LIST_HEAD(&rt->list); 473 refcount_set(&rt->refs, 1); 474 rt->output = mctp_route_discard; 475 476 return rt; 477 } 478 479 unsigned int mctp_default_net(struct net *net) 480 { 481 return READ_ONCE(net->mctp.default_net); 482 } 483 484 int mctp_default_net_set(struct net *net, unsigned int index) 485 { 486 if (index == 0) 487 return -EINVAL; 488 WRITE_ONCE(net->mctp.default_net, index); 489 return 0; 490 } 491 492 /* tag management */ 493 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, 494 struct mctp_sock *msk) 495 { 496 struct netns_mctp *mns = &net->mctp; 497 498 lockdep_assert_held(&mns->keys_lock); 499 500 /* we hold the net->key_lock here, allowing updates to both 501 * then net and sk 502 */ 503 hlist_add_head_rcu(&key->hlist, &mns->keys); 504 hlist_add_head_rcu(&key->sklist, &msk->keys); 505 refcount_inc(&key->refs); 506 } 507 508 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve 509 * it for the socket msk 510 */ 511 static int mctp_alloc_local_tag(struct mctp_sock *msk, 512 mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp) 513 { 514 struct net *net = sock_net(&msk->sk); 515 struct netns_mctp *mns = &net->mctp; 516 struct mctp_sk_key *key, *tmp; 517 unsigned long flags; 518 int rc = -EAGAIN; 519 u8 tagbits; 520 521 /* for NULL destination EIDs, we may get a response from any peer */ 522 if (daddr == MCTP_ADDR_NULL) 523 daddr = MCTP_ADDR_ANY; 524 525 /* be optimistic, alloc now */ 526 key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); 527 if (!key) 528 return -ENOMEM; 529 530 /* 8 possible tag values */ 531 tagbits = 0xff; 532 533 spin_lock_irqsave(&mns->keys_lock, flags); 534 535 /* Walk through the existing keys, looking for potential conflicting 536 * tags. If we find a conflict, clear that bit from tagbits 537 */ 538 hlist_for_each_entry(tmp, &mns->keys, hlist) { 539 /* We can check the lookup fields (*_addr, tag) without the 540 * lock held, they don't change over the lifetime of the key. 541 */ 542 543 /* if we don't own the tag, it can't conflict */ 544 if (tmp->tag & MCTP_HDR_FLAG_TO) 545 continue; 546 547 if (!((tmp->peer_addr == daddr || 548 tmp->peer_addr == MCTP_ADDR_ANY) && 549 tmp->local_addr == saddr)) 550 continue; 551 552 spin_lock(&tmp->lock); 553 /* key must still be valid. If we find a match, clear the 554 * potential tag value 555 */ 556 if (tmp->valid) 557 tagbits &= ~(1 << tmp->tag); 558 spin_unlock(&tmp->lock); 559 560 if (!tagbits) 561 break; 562 } 563 564 if (tagbits) { 565 key->tag = __ffs(tagbits); 566 mctp_reserve_tag(net, key, msk); 567 *tagp = key->tag; 568 rc = 0; 569 } 570 571 spin_unlock_irqrestore(&mns->keys_lock, flags); 572 573 if (!tagbits) 574 kfree(key); 575 576 return rc; 577 } 578 579 /* routing lookups */ 580 static bool mctp_rt_match_eid(struct mctp_route *rt, 581 unsigned int net, mctp_eid_t eid) 582 { 583 return READ_ONCE(rt->dev->net) == net && 584 rt->min <= eid && rt->max >= eid; 585 } 586 587 /* compares match, used for duplicate prevention */ 588 static bool mctp_rt_compare_exact(struct mctp_route *rt1, 589 struct mctp_route *rt2) 590 { 591 ASSERT_RTNL(); 592 return rt1->dev->net == rt2->dev->net && 593 rt1->min == rt2->min && 594 rt1->max == rt2->max; 595 } 596 597 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, 598 mctp_eid_t daddr) 599 { 600 struct mctp_route *tmp, *rt = NULL; 601 602 list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { 603 /* TODO: add metrics */ 604 if (mctp_rt_match_eid(tmp, dnet, daddr)) { 605 if (refcount_inc_not_zero(&tmp->refs)) { 606 rt = tmp; 607 break; 608 } 609 } 610 } 611 612 return rt; 613 } 614 615 static struct mctp_route *mctp_route_lookup_null(struct net *net, 616 struct net_device *dev) 617 { 618 struct mctp_route *rt; 619 620 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 621 if (rt->dev->dev == dev && rt->type == RTN_LOCAL && 622 refcount_inc_not_zero(&rt->refs)) 623 return rt; 624 } 625 626 return NULL; 627 } 628 629 /* sends a skb to rt and releases the route. */ 630 int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) 631 { 632 int rc; 633 634 rc = rt->output(rt, skb); 635 mctp_route_release(rt); 636 return rc; 637 } 638 639 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, 640 unsigned int mtu, u8 tag) 641 { 642 const unsigned int hlen = sizeof(struct mctp_hdr); 643 struct mctp_hdr *hdr, *hdr2; 644 unsigned int pos, size; 645 struct sk_buff *skb2; 646 int rc; 647 u8 seq; 648 649 hdr = mctp_hdr(skb); 650 seq = 0; 651 rc = 0; 652 653 if (mtu < hlen + 1) { 654 kfree_skb(skb); 655 return -EMSGSIZE; 656 } 657 658 /* we've got the header */ 659 skb_pull(skb, hlen); 660 661 for (pos = 0; pos < skb->len;) { 662 /* size of message payload */ 663 size = min(mtu - hlen, skb->len - pos); 664 665 skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL); 666 if (!skb2) { 667 rc = -ENOMEM; 668 break; 669 } 670 671 /* generic skb copy */ 672 skb2->protocol = skb->protocol; 673 skb2->priority = skb->priority; 674 skb2->dev = skb->dev; 675 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); 676 677 if (skb->sk) 678 skb_set_owner_w(skb2, skb->sk); 679 680 /* establish packet */ 681 skb_reserve(skb2, MCTP_HEADER_MAXLEN); 682 skb_reset_network_header(skb2); 683 skb_put(skb2, hlen + size); 684 skb2->transport_header = skb2->network_header + hlen; 685 686 /* copy header fields, calculate SOM/EOM flags & seq */ 687 hdr2 = mctp_hdr(skb2); 688 hdr2->ver = hdr->ver; 689 hdr2->dest = hdr->dest; 690 hdr2->src = hdr->src; 691 hdr2->flags_seq_tag = tag & 692 (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 693 694 if (pos == 0) 695 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; 696 697 if (pos + size == skb->len) 698 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; 699 700 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; 701 702 /* copy message payload */ 703 skb_copy_bits(skb, pos, skb_transport_header(skb2), size); 704 705 /* do route, but don't drop the rt reference */ 706 rc = rt->output(rt, skb2); 707 if (rc) 708 break; 709 710 seq = (seq + 1) & MCTP_HDR_SEQ_MASK; 711 pos += size; 712 } 713 714 mctp_route_release(rt); 715 consume_skb(skb); 716 return rc; 717 } 718 719 int mctp_local_output(struct sock *sk, struct mctp_route *rt, 720 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) 721 { 722 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 723 struct mctp_skb_cb *cb = mctp_cb(skb); 724 struct mctp_hdr *hdr; 725 unsigned long flags; 726 unsigned int mtu; 727 mctp_eid_t saddr; 728 int rc; 729 u8 tag; 730 731 if (WARN_ON(!rt->dev)) 732 return -EINVAL; 733 734 spin_lock_irqsave(&rt->dev->addrs_lock, flags); 735 if (rt->dev->num_addrs == 0) { 736 rc = -EHOSTUNREACH; 737 } else { 738 /* use the outbound interface's first address as our source */ 739 saddr = rt->dev->addrs[0]; 740 rc = 0; 741 } 742 spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); 743 744 if (rc) 745 return rc; 746 747 if (req_tag & MCTP_HDR_FLAG_TO) { 748 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); 749 if (rc) 750 return rc; 751 tag |= MCTP_HDR_FLAG_TO; 752 } else { 753 tag = req_tag; 754 } 755 756 757 skb->protocol = htons(ETH_P_MCTP); 758 skb->priority = 0; 759 skb_reset_transport_header(skb); 760 skb_push(skb, sizeof(struct mctp_hdr)); 761 skb_reset_network_header(skb); 762 skb->dev = rt->dev->dev; 763 764 /* cb->net will have been set on initial ingress */ 765 cb->src = saddr; 766 767 /* set up common header fields */ 768 hdr = mctp_hdr(skb); 769 hdr->ver = 1; 770 hdr->dest = daddr; 771 hdr->src = saddr; 772 773 mtu = mctp_route_mtu(rt); 774 775 if (skb->len + sizeof(struct mctp_hdr) <= mtu) { 776 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | 777 tag; 778 return mctp_do_route(rt, skb); 779 } else { 780 return mctp_do_fragment_route(rt, skb, mtu, tag); 781 } 782 } 783 784 /* route management */ 785 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, 786 unsigned int daddr_extent, unsigned int mtu, 787 unsigned char type) 788 { 789 int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); 790 struct net *net = dev_net(mdev->dev); 791 struct mctp_route *rt, *ert; 792 793 if (!mctp_address_ok(daddr_start)) 794 return -EINVAL; 795 796 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) 797 return -EINVAL; 798 799 switch (type) { 800 case RTN_LOCAL: 801 rtfn = mctp_route_input; 802 break; 803 case RTN_UNICAST: 804 rtfn = mctp_route_output; 805 break; 806 default: 807 return -EINVAL; 808 } 809 810 rt = mctp_route_alloc(); 811 if (!rt) 812 return -ENOMEM; 813 814 rt->min = daddr_start; 815 rt->max = daddr_start + daddr_extent; 816 rt->mtu = mtu; 817 rt->dev = mdev; 818 mctp_dev_hold(rt->dev); 819 rt->type = type; 820 rt->output = rtfn; 821 822 ASSERT_RTNL(); 823 /* Prevent duplicate identical routes. */ 824 list_for_each_entry(ert, &net->mctp.routes, list) { 825 if (mctp_rt_compare_exact(rt, ert)) { 826 mctp_route_release(rt); 827 return -EEXIST; 828 } 829 } 830 831 list_add_rcu(&rt->list, &net->mctp.routes); 832 833 return 0; 834 } 835 836 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, 837 unsigned int daddr_extent) 838 { 839 struct net *net = dev_net(mdev->dev); 840 struct mctp_route *rt, *tmp; 841 mctp_eid_t daddr_end; 842 bool dropped; 843 844 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) 845 return -EINVAL; 846 847 daddr_end = daddr_start + daddr_extent; 848 dropped = false; 849 850 ASSERT_RTNL(); 851 852 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 853 if (rt->dev == mdev && 854 rt->min == daddr_start && rt->max == daddr_end) { 855 list_del_rcu(&rt->list); 856 /* TODO: immediate RTM_DELROUTE */ 857 mctp_route_release(rt); 858 dropped = true; 859 } 860 } 861 862 return dropped ? 0 : -ENOENT; 863 } 864 865 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) 866 { 867 return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); 868 } 869 870 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) 871 { 872 return mctp_route_remove(mdev, addr, 0); 873 } 874 875 /* removes all entries for a given device */ 876 void mctp_route_remove_dev(struct mctp_dev *mdev) 877 { 878 struct net *net = dev_net(mdev->dev); 879 struct mctp_route *rt, *tmp; 880 881 ASSERT_RTNL(); 882 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 883 if (rt->dev == mdev) { 884 list_del_rcu(&rt->list); 885 /* TODO: immediate RTM_DELROUTE */ 886 mctp_route_release(rt); 887 } 888 } 889 } 890 891 /* Incoming packet-handling */ 892 893 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, 894 struct packet_type *pt, 895 struct net_device *orig_dev) 896 { 897 struct net *net = dev_net(dev); 898 struct mctp_dev *mdev; 899 struct mctp_skb_cb *cb; 900 struct mctp_route *rt; 901 struct mctp_hdr *mh; 902 903 rcu_read_lock(); 904 mdev = __mctp_dev_get(dev); 905 rcu_read_unlock(); 906 if (!mdev) { 907 /* basic non-data sanity checks */ 908 goto err_drop; 909 } 910 911 if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) 912 goto err_drop; 913 914 skb_reset_transport_header(skb); 915 skb_reset_network_header(skb); 916 917 /* We have enough for a header; decode and route */ 918 mh = mctp_hdr(skb); 919 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) 920 goto err_drop; 921 922 cb = __mctp_cb(skb); 923 cb->net = READ_ONCE(mdev->net); 924 925 rt = mctp_route_lookup(net, cb->net, mh->dest); 926 927 /* NULL EID, but addressed to our physical address */ 928 if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) 929 rt = mctp_route_lookup_null(net, dev); 930 931 if (!rt) 932 goto err_drop; 933 934 mctp_do_route(rt, skb); 935 936 return NET_RX_SUCCESS; 937 938 err_drop: 939 kfree_skb(skb); 940 return NET_RX_DROP; 941 } 942 943 static struct packet_type mctp_packet_type = { 944 .type = cpu_to_be16(ETH_P_MCTP), 945 .func = mctp_pkttype_receive, 946 }; 947 948 /* netlink interface */ 949 950 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { 951 [RTA_DST] = { .type = NLA_U8 }, 952 [RTA_METRICS] = { .type = NLA_NESTED }, 953 [RTA_OIF] = { .type = NLA_U32 }, 954 }; 955 956 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. 957 * tb must hold RTA_MAX+1 elements. 958 */ 959 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, 960 struct netlink_ext_ack *extack, 961 struct nlattr **tb, struct rtmsg **rtm, 962 struct mctp_dev **mdev, mctp_eid_t *daddr_start) 963 { 964 struct net *net = sock_net(skb->sk); 965 struct net_device *dev; 966 unsigned int ifindex; 967 int rc; 968 969 rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, 970 rta_mctp_policy, extack); 971 if (rc < 0) { 972 NL_SET_ERR_MSG(extack, "incorrect format"); 973 return rc; 974 } 975 976 if (!tb[RTA_DST]) { 977 NL_SET_ERR_MSG(extack, "dst EID missing"); 978 return -EINVAL; 979 } 980 *daddr_start = nla_get_u8(tb[RTA_DST]); 981 982 if (!tb[RTA_OIF]) { 983 NL_SET_ERR_MSG(extack, "ifindex missing"); 984 return -EINVAL; 985 } 986 ifindex = nla_get_u32(tb[RTA_OIF]); 987 988 *rtm = nlmsg_data(nlh); 989 if ((*rtm)->rtm_family != AF_MCTP) { 990 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP"); 991 return -EINVAL; 992 } 993 994 dev = __dev_get_by_index(net, ifindex); 995 if (!dev) { 996 NL_SET_ERR_MSG(extack, "bad ifindex"); 997 return -ENODEV; 998 } 999 *mdev = mctp_dev_get_rtnl(dev); 1000 if (!*mdev) 1001 return -ENODEV; 1002 1003 if (dev->flags & IFF_LOOPBACK) { 1004 NL_SET_ERR_MSG(extack, "no routes to loopback"); 1005 return -EINVAL; 1006 } 1007 1008 return 0; 1009 } 1010 1011 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1012 struct netlink_ext_ack *extack) 1013 { 1014 struct nlattr *tb[RTA_MAX + 1]; 1015 mctp_eid_t daddr_start; 1016 struct mctp_dev *mdev; 1017 struct rtmsg *rtm; 1018 unsigned int mtu; 1019 int rc; 1020 1021 rc = mctp_route_nlparse(skb, nlh, extack, tb, 1022 &rtm, &mdev, &daddr_start); 1023 if (rc < 0) 1024 return rc; 1025 1026 if (rtm->rtm_type != RTN_UNICAST) { 1027 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); 1028 return -EINVAL; 1029 } 1030 1031 /* TODO: parse mtu from nlparse */ 1032 mtu = 0; 1033 1034 if (rtm->rtm_type != RTN_UNICAST) 1035 return -EINVAL; 1036 1037 rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, 1038 rtm->rtm_type); 1039 return rc; 1040 } 1041 1042 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1043 struct netlink_ext_ack *extack) 1044 { 1045 struct nlattr *tb[RTA_MAX + 1]; 1046 mctp_eid_t daddr_start; 1047 struct mctp_dev *mdev; 1048 struct rtmsg *rtm; 1049 int rc; 1050 1051 rc = mctp_route_nlparse(skb, nlh, extack, tb, 1052 &rtm, &mdev, &daddr_start); 1053 if (rc < 0) 1054 return rc; 1055 1056 /* we only have unicast routes */ 1057 if (rtm->rtm_type != RTN_UNICAST) 1058 return -EINVAL; 1059 1060 rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len); 1061 return rc; 1062 } 1063 1064 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, 1065 u32 portid, u32 seq, int event, unsigned int flags) 1066 { 1067 struct nlmsghdr *nlh; 1068 struct rtmsg *hdr; 1069 void *metrics; 1070 1071 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); 1072 if (!nlh) 1073 return -EMSGSIZE; 1074 1075 hdr = nlmsg_data(nlh); 1076 hdr->rtm_family = AF_MCTP; 1077 1078 /* we use the _len fields as a number of EIDs, rather than 1079 * a number of bits in the address 1080 */ 1081 hdr->rtm_dst_len = rt->max - rt->min; 1082 hdr->rtm_src_len = 0; 1083 hdr->rtm_tos = 0; 1084 hdr->rtm_table = RT_TABLE_DEFAULT; 1085 hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ 1086 hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ 1087 hdr->rtm_type = rt->type; 1088 1089 if (nla_put_u8(skb, RTA_DST, rt->min)) 1090 goto cancel; 1091 1092 metrics = nla_nest_start_noflag(skb, RTA_METRICS); 1093 if (!metrics) 1094 goto cancel; 1095 1096 if (rt->mtu) { 1097 if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) 1098 goto cancel; 1099 } 1100 1101 nla_nest_end(skb, metrics); 1102 1103 if (rt->dev) { 1104 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) 1105 goto cancel; 1106 } 1107 1108 /* TODO: conditional neighbour physaddr? */ 1109 1110 nlmsg_end(skb, nlh); 1111 1112 return 0; 1113 1114 cancel: 1115 nlmsg_cancel(skb, nlh); 1116 return -EMSGSIZE; 1117 } 1118 1119 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) 1120 { 1121 struct net *net = sock_net(skb->sk); 1122 struct mctp_route *rt; 1123 int s_idx, idx; 1124 1125 /* TODO: allow filtering on route data, possibly under 1126 * cb->strict_check 1127 */ 1128 1129 /* TODO: change to struct overlay */ 1130 s_idx = cb->args[0]; 1131 idx = 0; 1132 1133 rcu_read_lock(); 1134 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 1135 if (idx++ < s_idx) 1136 continue; 1137 if (mctp_fill_rtinfo(skb, rt, 1138 NETLINK_CB(cb->skb).portid, 1139 cb->nlh->nlmsg_seq, 1140 RTM_NEWROUTE, NLM_F_MULTI) < 0) 1141 break; 1142 } 1143 1144 rcu_read_unlock(); 1145 cb->args[0] = idx; 1146 1147 return skb->len; 1148 } 1149 1150 /* net namespace implementation */ 1151 static int __net_init mctp_routes_net_init(struct net *net) 1152 { 1153 struct netns_mctp *ns = &net->mctp; 1154 1155 INIT_LIST_HEAD(&ns->routes); 1156 INIT_HLIST_HEAD(&ns->binds); 1157 mutex_init(&ns->bind_lock); 1158 INIT_HLIST_HEAD(&ns->keys); 1159 spin_lock_init(&ns->keys_lock); 1160 WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); 1161 return 0; 1162 } 1163 1164 static void __net_exit mctp_routes_net_exit(struct net *net) 1165 { 1166 struct mctp_route *rt; 1167 1168 rcu_read_lock(); 1169 list_for_each_entry_rcu(rt, &net->mctp.routes, list) 1170 mctp_route_release(rt); 1171 rcu_read_unlock(); 1172 } 1173 1174 static struct pernet_operations mctp_net_ops = { 1175 .init = mctp_routes_net_init, 1176 .exit = mctp_routes_net_exit, 1177 }; 1178 1179 int __init mctp_routes_init(void) 1180 { 1181 dev_add_pack(&mctp_packet_type); 1182 1183 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, 1184 NULL, mctp_dump_rtinfo, 0); 1185 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, 1186 mctp_newroute, NULL, 0); 1187 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, 1188 mctp_delroute, NULL, 0); 1189 1190 return register_pernet_subsys(&mctp_net_ops); 1191 } 1192 1193 void __exit mctp_routes_exit(void) 1194 { 1195 unregister_pernet_subsys(&mctp_net_ops); 1196 rtnl_unregister(PF_MCTP, RTM_DELROUTE); 1197 rtnl_unregister(PF_MCTP, RTM_NEWROUTE); 1198 rtnl_unregister(PF_MCTP, RTM_GETROUTE); 1199 dev_remove_pack(&mctp_packet_type); 1200 } 1201