xref: /openbmc/linux/net/mctp/route.c (revision 9144f784f852f9a125cabe9927b986d909bfa439)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Management Component Transport Protocol (MCTP) - routing
4   * implementation.
5   *
6   * This is currently based on a simple routing table, with no dst cache. The
7   * number of routes should stay fairly small, so the lookup cost is small.
8   *
9   * Copyright (c) 2021 Code Construct
10   * Copyright (c) 2021 Google
11   */
12  
13  #include <linux/idr.h>
14  #include <linux/kconfig.h>
15  #include <linux/mctp.h>
16  #include <linux/netdevice.h>
17  #include <linux/rtnetlink.h>
18  #include <linux/skbuff.h>
19  
20  #include <uapi/linux/if_arp.h>
21  
22  #include <net/mctp.h>
23  #include <net/mctpdevice.h>
24  #include <net/netlink.h>
25  #include <net/sock.h>
26  
27  #include <trace/events/mctp.h>
28  
29  static const unsigned int mctp_message_maxlen = 64 * 1024;
30  static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
31  
32  static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
33  
34  /* route output callbacks */
mctp_route_discard(struct mctp_route * route,struct sk_buff * skb)35  static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
36  {
37  	kfree_skb(skb);
38  	return 0;
39  }
40  
mctp_lookup_bind(struct net * net,struct sk_buff * skb)41  static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
42  {
43  	struct mctp_skb_cb *cb = mctp_cb(skb);
44  	struct mctp_hdr *mh;
45  	struct sock *sk;
46  	u8 type;
47  
48  	WARN_ON(!rcu_read_lock_held());
49  
50  	/* TODO: look up in skb->cb? */
51  	mh = mctp_hdr(skb);
52  
53  	if (!skb_headlen(skb))
54  		return NULL;
55  
56  	type = (*(u8 *)skb->data) & 0x7f;
57  
58  	sk_for_each_rcu(sk, &net->mctp.binds) {
59  		struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
60  
61  		if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
62  			continue;
63  
64  		if (msk->bind_type != type)
65  			continue;
66  
67  		if (!mctp_address_matches(msk->bind_addr, mh->dest))
68  			continue;
69  
70  		return msk;
71  	}
72  
73  	return NULL;
74  }
75  
mctp_key_match(struct mctp_sk_key * key,mctp_eid_t local,mctp_eid_t peer,u8 tag)76  static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
77  			   mctp_eid_t peer, u8 tag)
78  {
79  	if (!mctp_address_matches(key->local_addr, local))
80  		return false;
81  
82  	if (key->peer_addr != peer)
83  		return false;
84  
85  	if (key->tag != tag)
86  		return false;
87  
88  	return true;
89  }
90  
91  /* returns a key (with key->lock held, and refcounted), or NULL if no such
92   * key exists.
93   */
mctp_lookup_key(struct net * net,struct sk_buff * skb,mctp_eid_t peer,unsigned long * irqflags)94  static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
95  					   mctp_eid_t peer,
96  					   unsigned long *irqflags)
97  	__acquires(&key->lock)
98  {
99  	struct mctp_sk_key *key, *ret;
100  	unsigned long flags;
101  	struct mctp_hdr *mh;
102  	u8 tag;
103  
104  	mh = mctp_hdr(skb);
105  	tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
106  
107  	ret = NULL;
108  	spin_lock_irqsave(&net->mctp.keys_lock, flags);
109  
110  	hlist_for_each_entry(key, &net->mctp.keys, hlist) {
111  		if (!mctp_key_match(key, mh->dest, peer, tag))
112  			continue;
113  
114  		spin_lock(&key->lock);
115  		if (key->valid) {
116  			refcount_inc(&key->refs);
117  			ret = key;
118  			break;
119  		}
120  		spin_unlock(&key->lock);
121  	}
122  
123  	if (ret) {
124  		spin_unlock(&net->mctp.keys_lock);
125  		*irqflags = flags;
126  	} else {
127  		spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
128  	}
129  
130  	return ret;
131  }
132  
mctp_key_alloc(struct mctp_sock * msk,mctp_eid_t local,mctp_eid_t peer,u8 tag,gfp_t gfp)133  static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
134  					  mctp_eid_t local, mctp_eid_t peer,
135  					  u8 tag, gfp_t gfp)
136  {
137  	struct mctp_sk_key *key;
138  
139  	key = kzalloc(sizeof(*key), gfp);
140  	if (!key)
141  		return NULL;
142  
143  	key->peer_addr = peer;
144  	key->local_addr = local;
145  	key->tag = tag;
146  	key->sk = &msk->sk;
147  	key->valid = true;
148  	spin_lock_init(&key->lock);
149  	refcount_set(&key->refs, 1);
150  	sock_hold(key->sk);
151  
152  	return key;
153  }
154  
mctp_key_unref(struct mctp_sk_key * key)155  void mctp_key_unref(struct mctp_sk_key *key)
156  {
157  	unsigned long flags;
158  
159  	if (!refcount_dec_and_test(&key->refs))
160  		return;
161  
162  	/* even though no refs exist here, the lock allows us to stay
163  	 * consistent with the locking requirement of mctp_dev_release_key
164  	 */
165  	spin_lock_irqsave(&key->lock, flags);
166  	mctp_dev_release_key(key->dev, key);
167  	spin_unlock_irqrestore(&key->lock, flags);
168  
169  	sock_put(key->sk);
170  	kfree(key);
171  }
172  
mctp_key_add(struct mctp_sk_key * key,struct mctp_sock * msk)173  static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
174  {
175  	struct net *net = sock_net(&msk->sk);
176  	struct mctp_sk_key *tmp;
177  	unsigned long flags;
178  	int rc = 0;
179  
180  	spin_lock_irqsave(&net->mctp.keys_lock, flags);
181  
182  	if (sock_flag(&msk->sk, SOCK_DEAD)) {
183  		rc = -EINVAL;
184  		goto out_unlock;
185  	}
186  
187  	hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
188  		if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
189  				   key->tag)) {
190  			spin_lock(&tmp->lock);
191  			if (tmp->valid)
192  				rc = -EEXIST;
193  			spin_unlock(&tmp->lock);
194  			if (rc)
195  				break;
196  		}
197  	}
198  
199  	if (!rc) {
200  		refcount_inc(&key->refs);
201  		key->expiry = jiffies + mctp_key_lifetime;
202  		timer_reduce(&msk->key_expiry, key->expiry);
203  
204  		hlist_add_head(&key->hlist, &net->mctp.keys);
205  		hlist_add_head(&key->sklist, &msk->keys);
206  	}
207  
208  out_unlock:
209  	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
210  
211  	return rc;
212  }
213  
214  /* Helper for mctp_route_input().
215   * We're done with the key; unlock and unref the key.
216   * For the usual case of automatic expiry we remove the key from lists.
217   * In the case that manual allocation is set on a key we release the lock
218   * and local ref, reset reassembly, but don't remove from lists.
219   */
__mctp_key_done_in(struct mctp_sk_key * key,struct net * net,unsigned long flags,unsigned long reason)220  static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
221  			       unsigned long flags, unsigned long reason)
222  __releases(&key->lock)
223  {
224  	struct sk_buff *skb;
225  
226  	trace_mctp_key_release(key, reason);
227  	skb = key->reasm_head;
228  	key->reasm_head = NULL;
229  
230  	if (!key->manual_alloc) {
231  		key->reasm_dead = true;
232  		key->valid = false;
233  		mctp_dev_release_key(key->dev, key);
234  	}
235  	spin_unlock_irqrestore(&key->lock, flags);
236  
237  	if (!key->manual_alloc) {
238  		spin_lock_irqsave(&net->mctp.keys_lock, flags);
239  		if (!hlist_unhashed(&key->hlist)) {
240  			hlist_del_init(&key->hlist);
241  			hlist_del_init(&key->sklist);
242  			mctp_key_unref(key);
243  		}
244  		spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
245  	}
246  
247  	/* and one for the local reference */
248  	mctp_key_unref(key);
249  
250  	kfree_skb(skb);
251  }
252  
253  #ifdef CONFIG_MCTP_FLOWS
mctp_skb_set_flow(struct sk_buff * skb,struct mctp_sk_key * key)254  static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key)
255  {
256  	struct mctp_flow *flow;
257  
258  	flow = skb_ext_add(skb, SKB_EXT_MCTP);
259  	if (!flow)
260  		return;
261  
262  	refcount_inc(&key->refs);
263  	flow->key = key;
264  }
265  
mctp_flow_prepare_output(struct sk_buff * skb,struct mctp_dev * dev)266  static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
267  {
268  	struct mctp_sk_key *key;
269  	struct mctp_flow *flow;
270  
271  	flow = skb_ext_find(skb, SKB_EXT_MCTP);
272  	if (!flow)
273  		return;
274  
275  	key = flow->key;
276  
277  	if (WARN_ON(key->dev && key->dev != dev))
278  		return;
279  
280  	mctp_dev_set_key(dev, key);
281  }
282  #else
mctp_skb_set_flow(struct sk_buff * skb,struct mctp_sk_key * key)283  static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
mctp_flow_prepare_output(struct sk_buff * skb,struct mctp_dev * dev)284  static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
285  #endif
286  
mctp_frag_queue(struct mctp_sk_key * key,struct sk_buff * skb)287  static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
288  {
289  	struct mctp_hdr *hdr = mctp_hdr(skb);
290  	u8 exp_seq, this_seq;
291  
292  	this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
293  		& MCTP_HDR_SEQ_MASK;
294  
295  	if (!key->reasm_head) {
296  		key->reasm_head = skb;
297  		key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
298  		key->last_seq = this_seq;
299  		return 0;
300  	}
301  
302  	exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
303  
304  	if (this_seq != exp_seq)
305  		return -EINVAL;
306  
307  	if (key->reasm_head->len + skb->len > mctp_message_maxlen)
308  		return -EINVAL;
309  
310  	skb->next = NULL;
311  	skb->sk = NULL;
312  	*key->reasm_tailp = skb;
313  	key->reasm_tailp = &skb->next;
314  
315  	key->last_seq = this_seq;
316  
317  	key->reasm_head->data_len += skb->len;
318  	key->reasm_head->len += skb->len;
319  	key->reasm_head->truesize += skb->truesize;
320  
321  	return 0;
322  }
323  
mctp_route_input(struct mctp_route * route,struct sk_buff * skb)324  static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
325  {
326  	struct mctp_sk_key *key, *any_key = NULL;
327  	struct net *net = dev_net(skb->dev);
328  	struct mctp_sock *msk;
329  	struct mctp_hdr *mh;
330  	unsigned long f;
331  	u8 tag, flags;
332  	int rc;
333  
334  	msk = NULL;
335  	rc = -EINVAL;
336  
337  	/* We may be receiving a locally-routed packet; drop source sk
338  	 * accounting.
339  	 *
340  	 * From here, we will either queue the skb - either to a frag_queue, or
341  	 * to a receiving socket. When that succeeds, we clear the skb pointer;
342  	 * a non-NULL skb on exit will be otherwise unowned, and hence
343  	 * kfree_skb()-ed.
344  	 */
345  	skb_orphan(skb);
346  
347  	/* ensure we have enough data for a header and a type */
348  	if (skb->len < sizeof(struct mctp_hdr) + 1)
349  		goto out;
350  
351  	/* grab header, advance data ptr */
352  	mh = mctp_hdr(skb);
353  	skb_pull(skb, sizeof(struct mctp_hdr));
354  
355  	if (mh->ver != 1)
356  		goto out;
357  
358  	flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
359  	tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
360  
361  	rcu_read_lock();
362  
363  	/* lookup socket / reasm context, exactly matching (src,dest,tag).
364  	 * we hold a ref on the key, and key->lock held.
365  	 */
366  	key = mctp_lookup_key(net, skb, mh->src, &f);
367  
368  	if (flags & MCTP_HDR_FLAG_SOM) {
369  		if (key) {
370  			msk = container_of(key->sk, struct mctp_sock, sk);
371  		} else {
372  			/* first response to a broadcast? do a more general
373  			 * key lookup to find the socket, but don't use this
374  			 * key for reassembly - we'll create a more specific
375  			 * one for future packets if required (ie, !EOM).
376  			 */
377  			any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
378  			if (any_key) {
379  				msk = container_of(any_key->sk,
380  						   struct mctp_sock, sk);
381  				spin_unlock_irqrestore(&any_key->lock, f);
382  			}
383  		}
384  
385  		if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
386  			msk = mctp_lookup_bind(net, skb);
387  
388  		if (!msk) {
389  			rc = -ENOENT;
390  			goto out_unlock;
391  		}
392  
393  		/* single-packet message? deliver to socket, clean up any
394  		 * pending key.
395  		 */
396  		if (flags & MCTP_HDR_FLAG_EOM) {
397  			rc = sock_queue_rcv_skb(&msk->sk, skb);
398  			if (!rc)
399  				skb = NULL;
400  			if (key) {
401  				/* we've hit a pending reassembly; not much we
402  				 * can do but drop it
403  				 */
404  				__mctp_key_done_in(key, net, f,
405  						   MCTP_TRACE_KEY_REPLIED);
406  				key = NULL;
407  			}
408  			goto out_unlock;
409  		}
410  
411  		/* broadcast response or a bind() - create a key for further
412  		 * packets for this message
413  		 */
414  		if (!key) {
415  			key = mctp_key_alloc(msk, mh->dest, mh->src,
416  					     tag, GFP_ATOMIC);
417  			if (!key) {
418  				rc = -ENOMEM;
419  				goto out_unlock;
420  			}
421  
422  			/* we can queue without the key lock here, as the
423  			 * key isn't observable yet
424  			 */
425  			mctp_frag_queue(key, skb);
426  
427  			/* if the key_add fails, we've raced with another
428  			 * SOM packet with the same src, dest and tag. There's
429  			 * no way to distinguish future packets, so all we
430  			 * can do is drop; we'll free the skb on exit from
431  			 * this function.
432  			 */
433  			rc = mctp_key_add(key, msk);
434  			if (!rc) {
435  				trace_mctp_key_acquire(key);
436  				skb = NULL;
437  			}
438  
439  			/* we don't need to release key->lock on exit, so
440  			 * clean up here and suppress the unlock via
441  			 * setting to NULL
442  			 */
443  			mctp_key_unref(key);
444  			key = NULL;
445  
446  		} else {
447  			if (key->reasm_head || key->reasm_dead) {
448  				/* duplicate start? drop everything */
449  				__mctp_key_done_in(key, net, f,
450  						   MCTP_TRACE_KEY_INVALIDATED);
451  				rc = -EEXIST;
452  				key = NULL;
453  			} else {
454  				rc = mctp_frag_queue(key, skb);
455  				if (!rc)
456  					skb = NULL;
457  			}
458  		}
459  
460  	} else if (key) {
461  		/* this packet continues a previous message; reassemble
462  		 * using the message-specific key
463  		 */
464  
465  		/* we need to be continuing an existing reassembly... */
466  		if (!key->reasm_head)
467  			rc = -EINVAL;
468  		else
469  			rc = mctp_frag_queue(key, skb);
470  
471  		if (rc)
472  			goto out_unlock;
473  
474  		/* we've queued; the queue owns the skb now */
475  		skb = NULL;
476  
477  		/* end of message? deliver to socket, and we're done with
478  		 * the reassembly/response key
479  		 */
480  		if (flags & MCTP_HDR_FLAG_EOM) {
481  			rc = sock_queue_rcv_skb(key->sk, key->reasm_head);
482  			if (!rc)
483  				key->reasm_head = NULL;
484  			__mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
485  			key = NULL;
486  		}
487  
488  	} else {
489  		/* not a start, no matching key */
490  		rc = -ENOENT;
491  	}
492  
493  out_unlock:
494  	rcu_read_unlock();
495  	if (key) {
496  		spin_unlock_irqrestore(&key->lock, f);
497  		mctp_key_unref(key);
498  	}
499  	if (any_key)
500  		mctp_key_unref(any_key);
501  out:
502  	kfree_skb(skb);
503  	return rc;
504  }
505  
mctp_route_mtu(struct mctp_route * rt)506  static unsigned int mctp_route_mtu(struct mctp_route *rt)
507  {
508  	return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
509  }
510  
mctp_route_output(struct mctp_route * route,struct sk_buff * skb)511  static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
512  {
513  	struct mctp_skb_cb *cb = mctp_cb(skb);
514  	struct mctp_hdr *hdr = mctp_hdr(skb);
515  	char daddr_buf[MAX_ADDR_LEN];
516  	char *daddr = NULL;
517  	unsigned int mtu;
518  	int rc;
519  
520  	skb->protocol = htons(ETH_P_MCTP);
521  
522  	mtu = READ_ONCE(skb->dev->mtu);
523  	if (skb->len > mtu) {
524  		kfree_skb(skb);
525  		return -EMSGSIZE;
526  	}
527  
528  	if (cb->ifindex) {
529  		/* direct route; use the hwaddr we stashed in sendmsg */
530  		if (cb->halen != skb->dev->addr_len) {
531  			/* sanity check, sendmsg should have already caught this */
532  			kfree_skb(skb);
533  			return -EMSGSIZE;
534  		}
535  		daddr = cb->haddr;
536  	} else {
537  		/* If lookup fails let the device handle daddr==NULL */
538  		if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
539  			daddr = daddr_buf;
540  	}
541  
542  	rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
543  			     daddr, skb->dev->dev_addr, skb->len);
544  	if (rc < 0) {
545  		kfree_skb(skb);
546  		return -EHOSTUNREACH;
547  	}
548  
549  	mctp_flow_prepare_output(skb, route->dev);
550  
551  	rc = dev_queue_xmit(skb);
552  	if (rc)
553  		rc = net_xmit_errno(rc);
554  
555  	return rc;
556  }
557  
558  /* route alloc/release */
mctp_route_release(struct mctp_route * rt)559  static void mctp_route_release(struct mctp_route *rt)
560  {
561  	if (refcount_dec_and_test(&rt->refs)) {
562  		mctp_dev_put(rt->dev);
563  		kfree_rcu(rt, rcu);
564  	}
565  }
566  
567  /* returns a route with the refcount at 1 */
mctp_route_alloc(void)568  static struct mctp_route *mctp_route_alloc(void)
569  {
570  	struct mctp_route *rt;
571  
572  	rt = kzalloc(sizeof(*rt), GFP_KERNEL);
573  	if (!rt)
574  		return NULL;
575  
576  	INIT_LIST_HEAD(&rt->list);
577  	refcount_set(&rt->refs, 1);
578  	rt->output = mctp_route_discard;
579  
580  	return rt;
581  }
582  
mctp_default_net(struct net * net)583  unsigned int mctp_default_net(struct net *net)
584  {
585  	return READ_ONCE(net->mctp.default_net);
586  }
587  
mctp_default_net_set(struct net * net,unsigned int index)588  int mctp_default_net_set(struct net *net, unsigned int index)
589  {
590  	if (index == 0)
591  		return -EINVAL;
592  	WRITE_ONCE(net->mctp.default_net, index);
593  	return 0;
594  }
595  
596  /* tag management */
mctp_reserve_tag(struct net * net,struct mctp_sk_key * key,struct mctp_sock * msk)597  static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
598  			     struct mctp_sock *msk)
599  {
600  	struct netns_mctp *mns = &net->mctp;
601  
602  	lockdep_assert_held(&mns->keys_lock);
603  
604  	key->expiry = jiffies + mctp_key_lifetime;
605  	timer_reduce(&msk->key_expiry, key->expiry);
606  
607  	/* we hold the net->key_lock here, allowing updates to both
608  	 * then net and sk
609  	 */
610  	hlist_add_head_rcu(&key->hlist, &mns->keys);
611  	hlist_add_head_rcu(&key->sklist, &msk->keys);
612  	refcount_inc(&key->refs);
613  }
614  
615  /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
616   * it for the socket msk
617   */
mctp_alloc_local_tag(struct mctp_sock * msk,mctp_eid_t daddr,mctp_eid_t saddr,bool manual,u8 * tagp)618  struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
619  					 mctp_eid_t daddr, mctp_eid_t saddr,
620  					 bool manual, u8 *tagp)
621  {
622  	struct net *net = sock_net(&msk->sk);
623  	struct netns_mctp *mns = &net->mctp;
624  	struct mctp_sk_key *key, *tmp;
625  	unsigned long flags;
626  	u8 tagbits;
627  
628  	/* for NULL destination EIDs, we may get a response from any peer */
629  	if (daddr == MCTP_ADDR_NULL)
630  		daddr = MCTP_ADDR_ANY;
631  
632  	/* be optimistic, alloc now */
633  	key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
634  	if (!key)
635  		return ERR_PTR(-ENOMEM);
636  
637  	/* 8 possible tag values */
638  	tagbits = 0xff;
639  
640  	spin_lock_irqsave(&mns->keys_lock, flags);
641  
642  	/* Walk through the existing keys, looking for potential conflicting
643  	 * tags. If we find a conflict, clear that bit from tagbits
644  	 */
645  	hlist_for_each_entry(tmp, &mns->keys, hlist) {
646  		/* We can check the lookup fields (*_addr, tag) without the
647  		 * lock held, they don't change over the lifetime of the key.
648  		 */
649  
650  		/* if we don't own the tag, it can't conflict */
651  		if (tmp->tag & MCTP_HDR_FLAG_TO)
652  			continue;
653  
654  		if (!(mctp_address_matches(tmp->peer_addr, daddr) &&
655  		      mctp_address_matches(tmp->local_addr, saddr)))
656  			continue;
657  
658  		spin_lock(&tmp->lock);
659  		/* key must still be valid. If we find a match, clear the
660  		 * potential tag value
661  		 */
662  		if (tmp->valid)
663  			tagbits &= ~(1 << tmp->tag);
664  		spin_unlock(&tmp->lock);
665  
666  		if (!tagbits)
667  			break;
668  	}
669  
670  	if (tagbits) {
671  		key->tag = __ffs(tagbits);
672  		mctp_reserve_tag(net, key, msk);
673  		trace_mctp_key_acquire(key);
674  
675  		key->manual_alloc = manual;
676  		*tagp = key->tag;
677  	}
678  
679  	spin_unlock_irqrestore(&mns->keys_lock, flags);
680  
681  	if (!tagbits) {
682  		mctp_key_unref(key);
683  		return ERR_PTR(-EBUSY);
684  	}
685  
686  	return key;
687  }
688  
mctp_lookup_prealloc_tag(struct mctp_sock * msk,mctp_eid_t daddr,u8 req_tag,u8 * tagp)689  static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
690  						    mctp_eid_t daddr,
691  						    u8 req_tag, u8 *tagp)
692  {
693  	struct net *net = sock_net(&msk->sk);
694  	struct netns_mctp *mns = &net->mctp;
695  	struct mctp_sk_key *key, *tmp;
696  	unsigned long flags;
697  
698  	req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
699  	key = NULL;
700  
701  	spin_lock_irqsave(&mns->keys_lock, flags);
702  
703  	hlist_for_each_entry(tmp, &mns->keys, hlist) {
704  		if (tmp->tag != req_tag)
705  			continue;
706  
707  		if (!mctp_address_matches(tmp->peer_addr, daddr))
708  			continue;
709  
710  		if (!tmp->manual_alloc)
711  			continue;
712  
713  		spin_lock(&tmp->lock);
714  		if (tmp->valid) {
715  			key = tmp;
716  			refcount_inc(&key->refs);
717  			spin_unlock(&tmp->lock);
718  			break;
719  		}
720  		spin_unlock(&tmp->lock);
721  	}
722  	spin_unlock_irqrestore(&mns->keys_lock, flags);
723  
724  	if (!key)
725  		return ERR_PTR(-ENOENT);
726  
727  	if (tagp)
728  		*tagp = key->tag;
729  
730  	return key;
731  }
732  
733  /* routing lookups */
mctp_rt_match_eid(struct mctp_route * rt,unsigned int net,mctp_eid_t eid)734  static bool mctp_rt_match_eid(struct mctp_route *rt,
735  			      unsigned int net, mctp_eid_t eid)
736  {
737  	return READ_ONCE(rt->dev->net) == net &&
738  		rt->min <= eid && rt->max >= eid;
739  }
740  
741  /* compares match, used for duplicate prevention */
mctp_rt_compare_exact(struct mctp_route * rt1,struct mctp_route * rt2)742  static bool mctp_rt_compare_exact(struct mctp_route *rt1,
743  				  struct mctp_route *rt2)
744  {
745  	ASSERT_RTNL();
746  	return rt1->dev->net == rt2->dev->net &&
747  		rt1->min == rt2->min &&
748  		rt1->max == rt2->max;
749  }
750  
mctp_route_lookup(struct net * net,unsigned int dnet,mctp_eid_t daddr)751  struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
752  				     mctp_eid_t daddr)
753  {
754  	struct mctp_route *tmp, *rt = NULL;
755  
756  	rcu_read_lock();
757  
758  	list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
759  		/* TODO: add metrics */
760  		if (mctp_rt_match_eid(tmp, dnet, daddr)) {
761  			if (refcount_inc_not_zero(&tmp->refs)) {
762  				rt = tmp;
763  				break;
764  			}
765  		}
766  	}
767  
768  	rcu_read_unlock();
769  
770  	return rt;
771  }
772  
mctp_route_lookup_null(struct net * net,struct net_device * dev)773  static struct mctp_route *mctp_route_lookup_null(struct net *net,
774  						 struct net_device *dev)
775  {
776  	struct mctp_route *tmp, *rt = NULL;
777  
778  	rcu_read_lock();
779  
780  	list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
781  		if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL &&
782  		    refcount_inc_not_zero(&tmp->refs)) {
783  			rt = tmp;
784  			break;
785  		}
786  	}
787  
788  	rcu_read_unlock();
789  
790  	return rt;
791  }
792  
mctp_do_fragment_route(struct mctp_route * rt,struct sk_buff * skb,unsigned int mtu,u8 tag)793  static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
794  				  unsigned int mtu, u8 tag)
795  {
796  	const unsigned int hlen = sizeof(struct mctp_hdr);
797  	struct mctp_hdr *hdr, *hdr2;
798  	unsigned int pos, size, headroom;
799  	struct sk_buff *skb2;
800  	int rc;
801  	u8 seq;
802  
803  	hdr = mctp_hdr(skb);
804  	seq = 0;
805  	rc = 0;
806  
807  	if (mtu < hlen + 1) {
808  		kfree_skb(skb);
809  		return -EMSGSIZE;
810  	}
811  
812  	/* keep same headroom as the original skb */
813  	headroom = skb_headroom(skb);
814  
815  	/* we've got the header */
816  	skb_pull(skb, hlen);
817  
818  	for (pos = 0; pos < skb->len;) {
819  		/* size of message payload */
820  		size = min(mtu - hlen, skb->len - pos);
821  
822  		skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL);
823  		if (!skb2) {
824  			rc = -ENOMEM;
825  			break;
826  		}
827  
828  		/* generic skb copy */
829  		skb2->protocol = skb->protocol;
830  		skb2->priority = skb->priority;
831  		skb2->dev = skb->dev;
832  		memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
833  
834  		if (skb->sk)
835  			skb_set_owner_w(skb2, skb->sk);
836  
837  		/* establish packet */
838  		skb_reserve(skb2, headroom);
839  		skb_reset_network_header(skb2);
840  		skb_put(skb2, hlen + size);
841  		skb2->transport_header = skb2->network_header + hlen;
842  
843  		/* copy header fields, calculate SOM/EOM flags & seq */
844  		hdr2 = mctp_hdr(skb2);
845  		hdr2->ver = hdr->ver;
846  		hdr2->dest = hdr->dest;
847  		hdr2->src = hdr->src;
848  		hdr2->flags_seq_tag = tag &
849  			(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
850  
851  		if (pos == 0)
852  			hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
853  
854  		if (pos + size == skb->len)
855  			hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
856  
857  		hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
858  
859  		/* copy message payload */
860  		skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
861  
862  		/* we need to copy the extensions, for MCTP flow data */
863  		skb_ext_copy(skb2, skb);
864  
865  		/* do route */
866  		rc = rt->output(rt, skb2);
867  		if (rc)
868  			break;
869  
870  		seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
871  		pos += size;
872  	}
873  
874  	consume_skb(skb);
875  	return rc;
876  }
877  
mctp_local_output(struct sock * sk,struct mctp_route * rt,struct sk_buff * skb,mctp_eid_t daddr,u8 req_tag)878  int mctp_local_output(struct sock *sk, struct mctp_route *rt,
879  		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
880  {
881  	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
882  	struct mctp_skb_cb *cb = mctp_cb(skb);
883  	struct mctp_route tmp_rt = {0};
884  	struct mctp_sk_key *key;
885  	struct mctp_hdr *hdr;
886  	unsigned long flags;
887  	unsigned int mtu;
888  	mctp_eid_t saddr;
889  	bool ext_rt;
890  	int rc;
891  	u8 tag;
892  
893  	rc = -ENODEV;
894  
895  	if (rt) {
896  		ext_rt = false;
897  		if (WARN_ON(!rt->dev))
898  			goto out_release;
899  
900  	} else if (cb->ifindex) {
901  		struct net_device *dev;
902  
903  		ext_rt = true;
904  		rt = &tmp_rt;
905  
906  		rcu_read_lock();
907  		dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
908  		if (!dev) {
909  			rcu_read_unlock();
910  			goto out_free;
911  		}
912  		rt->dev = __mctp_dev_get(dev);
913  		rcu_read_unlock();
914  
915  		if (!rt->dev)
916  			goto out_release;
917  
918  		/* establish temporary route - we set up enough to keep
919  		 * mctp_route_output happy
920  		 */
921  		rt->output = mctp_route_output;
922  		rt->mtu = 0;
923  
924  	} else {
925  		rc = -EINVAL;
926  		goto out_free;
927  	}
928  
929  	spin_lock_irqsave(&rt->dev->addrs_lock, flags);
930  	if (rt->dev->num_addrs == 0) {
931  		rc = -EHOSTUNREACH;
932  	} else {
933  		/* use the outbound interface's first address as our source */
934  		saddr = rt->dev->addrs[0];
935  		rc = 0;
936  	}
937  	spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
938  
939  	if (rc)
940  		goto out_release;
941  
942  	if (req_tag & MCTP_TAG_OWNER) {
943  		if (req_tag & MCTP_TAG_PREALLOC)
944  			key = mctp_lookup_prealloc_tag(msk, daddr,
945  						       req_tag, &tag);
946  		else
947  			key = mctp_alloc_local_tag(msk, daddr, saddr,
948  						   false, &tag);
949  
950  		if (IS_ERR(key)) {
951  			rc = PTR_ERR(key);
952  			goto out_release;
953  		}
954  		mctp_skb_set_flow(skb, key);
955  		/* done with the key in this scope */
956  		mctp_key_unref(key);
957  		tag |= MCTP_HDR_FLAG_TO;
958  	} else {
959  		key = NULL;
960  		tag = req_tag & MCTP_TAG_MASK;
961  	}
962  
963  	skb->protocol = htons(ETH_P_MCTP);
964  	skb->priority = 0;
965  	skb_reset_transport_header(skb);
966  	skb_push(skb, sizeof(struct mctp_hdr));
967  	skb_reset_network_header(skb);
968  	skb->dev = rt->dev->dev;
969  
970  	/* cb->net will have been set on initial ingress */
971  	cb->src = saddr;
972  
973  	/* set up common header fields */
974  	hdr = mctp_hdr(skb);
975  	hdr->ver = 1;
976  	hdr->dest = daddr;
977  	hdr->src = saddr;
978  
979  	mtu = mctp_route_mtu(rt);
980  
981  	if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
982  		hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
983  			MCTP_HDR_FLAG_EOM | tag;
984  		rc = rt->output(rt, skb);
985  	} else {
986  		rc = mctp_do_fragment_route(rt, skb, mtu, tag);
987  	}
988  
989  	/* route output functions consume the skb, even on error */
990  	skb = NULL;
991  
992  out_release:
993  	if (!ext_rt)
994  		mctp_route_release(rt);
995  
996  	mctp_dev_put(tmp_rt.dev);
997  
998  out_free:
999  	kfree_skb(skb);
1000  	return rc;
1001  }
1002  
1003  /* route management */
mctp_route_add(struct mctp_dev * mdev,mctp_eid_t daddr_start,unsigned int daddr_extent,unsigned int mtu,unsigned char type)1004  static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
1005  			  unsigned int daddr_extent, unsigned int mtu,
1006  			  unsigned char type)
1007  {
1008  	int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
1009  	struct net *net = dev_net(mdev->dev);
1010  	struct mctp_route *rt, *ert;
1011  
1012  	if (!mctp_address_unicast(daddr_start))
1013  		return -EINVAL;
1014  
1015  	if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
1016  		return -EINVAL;
1017  
1018  	switch (type) {
1019  	case RTN_LOCAL:
1020  		rtfn = mctp_route_input;
1021  		break;
1022  	case RTN_UNICAST:
1023  		rtfn = mctp_route_output;
1024  		break;
1025  	default:
1026  		return -EINVAL;
1027  	}
1028  
1029  	rt = mctp_route_alloc();
1030  	if (!rt)
1031  		return -ENOMEM;
1032  
1033  	rt->min = daddr_start;
1034  	rt->max = daddr_start + daddr_extent;
1035  	rt->mtu = mtu;
1036  	rt->dev = mdev;
1037  	mctp_dev_hold(rt->dev);
1038  	rt->type = type;
1039  	rt->output = rtfn;
1040  
1041  	ASSERT_RTNL();
1042  	/* Prevent duplicate identical routes. */
1043  	list_for_each_entry(ert, &net->mctp.routes, list) {
1044  		if (mctp_rt_compare_exact(rt, ert)) {
1045  			mctp_route_release(rt);
1046  			return -EEXIST;
1047  		}
1048  	}
1049  
1050  	list_add_rcu(&rt->list, &net->mctp.routes);
1051  
1052  	return 0;
1053  }
1054  
mctp_route_remove(struct mctp_dev * mdev,mctp_eid_t daddr_start,unsigned int daddr_extent,unsigned char type)1055  static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
1056  			     unsigned int daddr_extent, unsigned char type)
1057  {
1058  	struct net *net = dev_net(mdev->dev);
1059  	struct mctp_route *rt, *tmp;
1060  	mctp_eid_t daddr_end;
1061  	bool dropped;
1062  
1063  	if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
1064  		return -EINVAL;
1065  
1066  	daddr_end = daddr_start + daddr_extent;
1067  	dropped = false;
1068  
1069  	ASSERT_RTNL();
1070  
1071  	list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
1072  		if (rt->dev == mdev &&
1073  		    rt->min == daddr_start && rt->max == daddr_end &&
1074  		    rt->type == type) {
1075  			list_del_rcu(&rt->list);
1076  			/* TODO: immediate RTM_DELROUTE */
1077  			mctp_route_release(rt);
1078  			dropped = true;
1079  		}
1080  	}
1081  
1082  	return dropped ? 0 : -ENOENT;
1083  }
1084  
mctp_route_add_local(struct mctp_dev * mdev,mctp_eid_t addr)1085  int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
1086  {
1087  	return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
1088  }
1089  
mctp_route_remove_local(struct mctp_dev * mdev,mctp_eid_t addr)1090  int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
1091  {
1092  	return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
1093  }
1094  
1095  /* removes all entries for a given device */
mctp_route_remove_dev(struct mctp_dev * mdev)1096  void mctp_route_remove_dev(struct mctp_dev *mdev)
1097  {
1098  	struct net *net = dev_net(mdev->dev);
1099  	struct mctp_route *rt, *tmp;
1100  
1101  	ASSERT_RTNL();
1102  	list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
1103  		if (rt->dev == mdev) {
1104  			list_del_rcu(&rt->list);
1105  			/* TODO: immediate RTM_DELROUTE */
1106  			mctp_route_release(rt);
1107  		}
1108  	}
1109  }
1110  
1111  /* Incoming packet-handling */
1112  
mctp_pkttype_receive(struct sk_buff * skb,struct net_device * dev,struct packet_type * pt,struct net_device * orig_dev)1113  static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
1114  				struct packet_type *pt,
1115  				struct net_device *orig_dev)
1116  {
1117  	struct net *net = dev_net(dev);
1118  	struct mctp_dev *mdev;
1119  	struct mctp_skb_cb *cb;
1120  	struct mctp_route *rt;
1121  	struct mctp_hdr *mh;
1122  
1123  	rcu_read_lock();
1124  	mdev = __mctp_dev_get(dev);
1125  	rcu_read_unlock();
1126  	if (!mdev) {
1127  		/* basic non-data sanity checks */
1128  		goto err_drop;
1129  	}
1130  
1131  	if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
1132  		goto err_drop;
1133  
1134  	skb_reset_transport_header(skb);
1135  	skb_reset_network_header(skb);
1136  
1137  	/* We have enough for a header; decode and route */
1138  	mh = mctp_hdr(skb);
1139  	if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
1140  		goto err_drop;
1141  
1142  	/* source must be valid unicast or null; drop reserved ranges and
1143  	 * broadcast
1144  	 */
1145  	if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src)))
1146  		goto err_drop;
1147  
1148  	/* dest address: as above, but allow broadcast */
1149  	if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
1150  	      mctp_address_broadcast(mh->dest)))
1151  		goto err_drop;
1152  
1153  	/* MCTP drivers must populate halen/haddr */
1154  	if (dev->type == ARPHRD_MCTP) {
1155  		cb = mctp_cb(skb);
1156  	} else {
1157  		cb = __mctp_cb(skb);
1158  		cb->halen = 0;
1159  	}
1160  	cb->net = READ_ONCE(mdev->net);
1161  	cb->ifindex = dev->ifindex;
1162  
1163  	rt = mctp_route_lookup(net, cb->net, mh->dest);
1164  
1165  	/* NULL EID, but addressed to our physical address */
1166  	if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
1167  		rt = mctp_route_lookup_null(net, dev);
1168  
1169  	if (!rt)
1170  		goto err_drop;
1171  
1172  	rt->output(rt, skb);
1173  	mctp_route_release(rt);
1174  	mctp_dev_put(mdev);
1175  
1176  	return NET_RX_SUCCESS;
1177  
1178  err_drop:
1179  	kfree_skb(skb);
1180  	mctp_dev_put(mdev);
1181  	return NET_RX_DROP;
1182  }
1183  
1184  static struct packet_type mctp_packet_type = {
1185  	.type = cpu_to_be16(ETH_P_MCTP),
1186  	.func = mctp_pkttype_receive,
1187  };
1188  
1189  /* netlink interface */
1190  
1191  static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
1192  	[RTA_DST]		= { .type = NLA_U8 },
1193  	[RTA_METRICS]		= { .type = NLA_NESTED },
1194  	[RTA_OIF]		= { .type = NLA_U32 },
1195  };
1196  
1197  /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
1198   * tb must hold RTA_MAX+1 elements.
1199   */
mctp_route_nlparse(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr ** tb,struct rtmsg ** rtm,struct mctp_dev ** mdev,mctp_eid_t * daddr_start)1200  static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
1201  			      struct netlink_ext_ack *extack,
1202  			      struct nlattr **tb, struct rtmsg **rtm,
1203  			      struct mctp_dev **mdev, mctp_eid_t *daddr_start)
1204  {
1205  	struct net *net = sock_net(skb->sk);
1206  	struct net_device *dev;
1207  	unsigned int ifindex;
1208  	int rc;
1209  
1210  	rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
1211  			 rta_mctp_policy, extack);
1212  	if (rc < 0) {
1213  		NL_SET_ERR_MSG(extack, "incorrect format");
1214  		return rc;
1215  	}
1216  
1217  	if (!tb[RTA_DST]) {
1218  		NL_SET_ERR_MSG(extack, "dst EID missing");
1219  		return -EINVAL;
1220  	}
1221  	*daddr_start = nla_get_u8(tb[RTA_DST]);
1222  
1223  	if (!tb[RTA_OIF]) {
1224  		NL_SET_ERR_MSG(extack, "ifindex missing");
1225  		return -EINVAL;
1226  	}
1227  	ifindex = nla_get_u32(tb[RTA_OIF]);
1228  
1229  	*rtm = nlmsg_data(nlh);
1230  	if ((*rtm)->rtm_family != AF_MCTP) {
1231  		NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
1232  		return -EINVAL;
1233  	}
1234  
1235  	dev = __dev_get_by_index(net, ifindex);
1236  	if (!dev) {
1237  		NL_SET_ERR_MSG(extack, "bad ifindex");
1238  		return -ENODEV;
1239  	}
1240  	*mdev = mctp_dev_get_rtnl(dev);
1241  	if (!*mdev)
1242  		return -ENODEV;
1243  
1244  	if (dev->flags & IFF_LOOPBACK) {
1245  		NL_SET_ERR_MSG(extack, "no routes to loopback");
1246  		return -EINVAL;
1247  	}
1248  
1249  	return 0;
1250  }
1251  
1252  static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
1253  	[RTAX_MTU]		= { .type = NLA_U32 },
1254  };
1255  
mctp_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1256  static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1257  			 struct netlink_ext_ack *extack)
1258  {
1259  	struct nlattr *tb[RTA_MAX + 1];
1260  	struct nlattr *tbx[RTAX_MAX + 1];
1261  	mctp_eid_t daddr_start;
1262  	struct mctp_dev *mdev;
1263  	struct rtmsg *rtm;
1264  	unsigned int mtu;
1265  	int rc;
1266  
1267  	rc = mctp_route_nlparse(skb, nlh, extack, tb,
1268  				&rtm, &mdev, &daddr_start);
1269  	if (rc < 0)
1270  		return rc;
1271  
1272  	if (rtm->rtm_type != RTN_UNICAST) {
1273  		NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
1274  		return -EINVAL;
1275  	}
1276  
1277  	mtu = 0;
1278  	if (tb[RTA_METRICS]) {
1279  		rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
1280  				      rta_metrics_policy, NULL);
1281  		if (rc < 0)
1282  			return rc;
1283  		if (tbx[RTAX_MTU])
1284  			mtu = nla_get_u32(tbx[RTAX_MTU]);
1285  	}
1286  
1287  	rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
1288  			    rtm->rtm_type);
1289  	return rc;
1290  }
1291  
mctp_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1292  static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1293  			 struct netlink_ext_ack *extack)
1294  {
1295  	struct nlattr *tb[RTA_MAX + 1];
1296  	mctp_eid_t daddr_start;
1297  	struct mctp_dev *mdev;
1298  	struct rtmsg *rtm;
1299  	int rc;
1300  
1301  	rc = mctp_route_nlparse(skb, nlh, extack, tb,
1302  				&rtm, &mdev, &daddr_start);
1303  	if (rc < 0)
1304  		return rc;
1305  
1306  	/* we only have unicast routes */
1307  	if (rtm->rtm_type != RTN_UNICAST)
1308  		return -EINVAL;
1309  
1310  	rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
1311  	return rc;
1312  }
1313  
mctp_fill_rtinfo(struct sk_buff * skb,struct mctp_route * rt,u32 portid,u32 seq,int event,unsigned int flags)1314  static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
1315  			    u32 portid, u32 seq, int event, unsigned int flags)
1316  {
1317  	struct nlmsghdr *nlh;
1318  	struct rtmsg *hdr;
1319  	void *metrics;
1320  
1321  	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
1322  	if (!nlh)
1323  		return -EMSGSIZE;
1324  
1325  	hdr = nlmsg_data(nlh);
1326  	hdr->rtm_family = AF_MCTP;
1327  
1328  	/* we use the _len fields as a number of EIDs, rather than
1329  	 * a number of bits in the address
1330  	 */
1331  	hdr->rtm_dst_len = rt->max - rt->min;
1332  	hdr->rtm_src_len = 0;
1333  	hdr->rtm_tos = 0;
1334  	hdr->rtm_table = RT_TABLE_DEFAULT;
1335  	hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1336  	hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
1337  	hdr->rtm_type = rt->type;
1338  
1339  	if (nla_put_u8(skb, RTA_DST, rt->min))
1340  		goto cancel;
1341  
1342  	metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1343  	if (!metrics)
1344  		goto cancel;
1345  
1346  	if (rt->mtu) {
1347  		if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1348  			goto cancel;
1349  	}
1350  
1351  	nla_nest_end(skb, metrics);
1352  
1353  	if (rt->dev) {
1354  		if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1355  			goto cancel;
1356  	}
1357  
1358  	/* TODO: conditional neighbour physaddr? */
1359  
1360  	nlmsg_end(skb, nlh);
1361  
1362  	return 0;
1363  
1364  cancel:
1365  	nlmsg_cancel(skb, nlh);
1366  	return -EMSGSIZE;
1367  }
1368  
mctp_dump_rtinfo(struct sk_buff * skb,struct netlink_callback * cb)1369  static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1370  {
1371  	struct net *net = sock_net(skb->sk);
1372  	struct mctp_route *rt;
1373  	int s_idx, idx;
1374  
1375  	/* TODO: allow filtering on route data, possibly under
1376  	 * cb->strict_check
1377  	 */
1378  
1379  	/* TODO: change to struct overlay */
1380  	s_idx = cb->args[0];
1381  	idx = 0;
1382  
1383  	rcu_read_lock();
1384  	list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1385  		if (idx++ < s_idx)
1386  			continue;
1387  		if (mctp_fill_rtinfo(skb, rt,
1388  				     NETLINK_CB(cb->skb).portid,
1389  				     cb->nlh->nlmsg_seq,
1390  				     RTM_NEWROUTE, NLM_F_MULTI) < 0)
1391  			break;
1392  	}
1393  
1394  	rcu_read_unlock();
1395  	cb->args[0] = idx;
1396  
1397  	return skb->len;
1398  }
1399  
1400  /* net namespace implementation */
mctp_routes_net_init(struct net * net)1401  static int __net_init mctp_routes_net_init(struct net *net)
1402  {
1403  	struct netns_mctp *ns = &net->mctp;
1404  
1405  	INIT_LIST_HEAD(&ns->routes);
1406  	INIT_HLIST_HEAD(&ns->binds);
1407  	mutex_init(&ns->bind_lock);
1408  	INIT_HLIST_HEAD(&ns->keys);
1409  	spin_lock_init(&ns->keys_lock);
1410  	WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
1411  	return 0;
1412  }
1413  
mctp_routes_net_exit(struct net * net)1414  static void __net_exit mctp_routes_net_exit(struct net *net)
1415  {
1416  	struct mctp_route *rt;
1417  
1418  	rcu_read_lock();
1419  	list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1420  		mctp_route_release(rt);
1421  	rcu_read_unlock();
1422  }
1423  
1424  static struct pernet_operations mctp_net_ops = {
1425  	.init = mctp_routes_net_init,
1426  	.exit = mctp_routes_net_exit,
1427  };
1428  
1429  static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = {
1430  	{THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0},
1431  	{THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0},
1432  	{THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0},
1433  };
1434  
mctp_routes_init(void)1435  int __init mctp_routes_init(void)
1436  {
1437  	int err;
1438  
1439  	dev_add_pack(&mctp_packet_type);
1440  
1441  	err = register_pernet_subsys(&mctp_net_ops);
1442  	if (err)
1443  		goto err_pernet;
1444  
1445  	err = rtnl_register_many(mctp_route_rtnl_msg_handlers);
1446  	if (err)
1447  		goto err_rtnl;
1448  
1449  	return 0;
1450  
1451  err_rtnl:
1452  	unregister_pernet_subsys(&mctp_net_ops);
1453  err_pernet:
1454  	dev_remove_pack(&mctp_packet_type);
1455  	return err;
1456  }
1457  
mctp_routes_exit(void)1458  void mctp_routes_exit(void)
1459  {
1460  	rtnl_unregister_many(mctp_route_rtnl_msg_handlers);
1461  	unregister_pernet_subsys(&mctp_net_ops);
1462  	dev_remove_pack(&mctp_packet_type);
1463  }
1464  
1465  #if IS_ENABLED(CONFIG_MCTP_TEST)
1466  #include "test/route-test.c"
1467  #endif
1468