xref: /openbmc/linux/net/core/fib_rules.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /*
2  * net/core/fib_rules.c		Generic Routing Rules
3  *
4  *	This program is free software; you can redistribute it and/or
5  *	modify it under the terms of the GNU General Public License as
6  *	published by the Free Software Foundation, version 2.
7  *
8  * Authors:	Thomas Graf <tgraf@suug.ch>
9  */
10 
11 #include <linux/types.h>
12 #include <linux/kernel.h>
13 #include <linux/list.h>
14 #include <net/net_namespace.h>
15 #include <net/sock.h>
16 #include <net/fib_rules.h>
17 
18 static LIST_HEAD(rules_ops);
19 static DEFINE_SPINLOCK(rules_mod_lock);
20 
21 static void notify_rule_change(int event, struct fib_rule *rule,
22 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
23 			       u32 pid);
24 
25 static struct fib_rules_ops *lookup_rules_ops(int family)
26 {
27 	struct fib_rules_ops *ops;
28 
29 	rcu_read_lock();
30 	list_for_each_entry_rcu(ops, &rules_ops, list) {
31 		if (ops->family == family) {
32 			if (!try_module_get(ops->owner))
33 				ops = NULL;
34 			rcu_read_unlock();
35 			return ops;
36 		}
37 	}
38 	rcu_read_unlock();
39 
40 	return NULL;
41 }
42 
43 static void rules_ops_put(struct fib_rules_ops *ops)
44 {
45 	if (ops)
46 		module_put(ops->owner);
47 }
48 
49 static void flush_route_cache(struct fib_rules_ops *ops)
50 {
51 	if (ops->flush_cache)
52 		ops->flush_cache();
53 }
54 
55 int fib_rules_register(struct fib_rules_ops *ops)
56 {
57 	int err = -EEXIST;
58 	struct fib_rules_ops *o;
59 
60 	if (ops->rule_size < sizeof(struct fib_rule))
61 		return -EINVAL;
62 
63 	if (ops->match == NULL || ops->configure == NULL ||
64 	    ops->compare == NULL || ops->fill == NULL ||
65 	    ops->action == NULL)
66 		return -EINVAL;
67 
68 	spin_lock(&rules_mod_lock);
69 	list_for_each_entry(o, &rules_ops, list)
70 		if (ops->family == o->family)
71 			goto errout;
72 
73 	list_add_tail_rcu(&ops->list, &rules_ops);
74 	err = 0;
75 errout:
76 	spin_unlock(&rules_mod_lock);
77 
78 	return err;
79 }
80 
81 EXPORT_SYMBOL_GPL(fib_rules_register);
82 
83 static void cleanup_ops(struct fib_rules_ops *ops)
84 {
85 	struct fib_rule *rule, *tmp;
86 
87 	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
88 		list_del_rcu(&rule->list);
89 		fib_rule_put(rule);
90 	}
91 }
92 
93 int fib_rules_unregister(struct fib_rules_ops *ops)
94 {
95 	int err = 0;
96 	struct fib_rules_ops *o;
97 
98 	spin_lock(&rules_mod_lock);
99 	list_for_each_entry(o, &rules_ops, list) {
100 		if (o == ops) {
101 			list_del_rcu(&o->list);
102 			cleanup_ops(ops);
103 			goto out;
104 		}
105 	}
106 
107 	err = -ENOENT;
108 out:
109 	spin_unlock(&rules_mod_lock);
110 
111 	synchronize_rcu();
112 
113 	return err;
114 }
115 
116 EXPORT_SYMBOL_GPL(fib_rules_unregister);
117 
118 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
119 			  struct flowi *fl, int flags)
120 {
121 	int ret = 0;
122 
123 	if (rule->ifindex && (rule->ifindex != fl->iif))
124 		goto out;
125 
126 	if ((rule->mark ^ fl->mark) & rule->mark_mask)
127 		goto out;
128 
129 	ret = ops->match(rule, fl, flags);
130 out:
131 	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
132 }
133 
134 int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
135 		     int flags, struct fib_lookup_arg *arg)
136 {
137 	struct fib_rule *rule;
138 	int err;
139 
140 	rcu_read_lock();
141 
142 	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
143 jumped:
144 		if (!fib_rule_match(rule, ops, fl, flags))
145 			continue;
146 
147 		if (rule->action == FR_ACT_GOTO) {
148 			struct fib_rule *target;
149 
150 			target = rcu_dereference(rule->ctarget);
151 			if (target == NULL) {
152 				continue;
153 			} else {
154 				rule = target;
155 				goto jumped;
156 			}
157 		} else if (rule->action == FR_ACT_NOP)
158 			continue;
159 		else
160 			err = ops->action(rule, fl, flags, arg);
161 
162 		if (err != -EAGAIN) {
163 			fib_rule_get(rule);
164 			arg->rule = rule;
165 			goto out;
166 		}
167 	}
168 
169 	err = -ESRCH;
170 out:
171 	rcu_read_unlock();
172 
173 	return err;
174 }
175 
176 EXPORT_SYMBOL_GPL(fib_rules_lookup);
177 
178 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
179 			    struct fib_rules_ops *ops)
180 {
181 	int err = -EINVAL;
182 
183 	if (frh->src_len)
184 		if (tb[FRA_SRC] == NULL ||
185 		    frh->src_len > (ops->addr_size * 8) ||
186 		    nla_len(tb[FRA_SRC]) != ops->addr_size)
187 			goto errout;
188 
189 	if (frh->dst_len)
190 		if (tb[FRA_DST] == NULL ||
191 		    frh->dst_len > (ops->addr_size * 8) ||
192 		    nla_len(tb[FRA_DST]) != ops->addr_size)
193 			goto errout;
194 
195 	err = 0;
196 errout:
197 	return err;
198 }
199 
200 static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
201 {
202 	struct net *net = skb->sk->sk_net;
203 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
204 	struct fib_rules_ops *ops = NULL;
205 	struct fib_rule *rule, *r, *last = NULL;
206 	struct nlattr *tb[FRA_MAX+1];
207 	int err = -EINVAL, unresolved = 0;
208 
209 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
210 		goto errout;
211 
212 	ops = lookup_rules_ops(frh->family);
213 	if (ops == NULL) {
214 		err = EAFNOSUPPORT;
215 		goto errout;
216 	}
217 
218 	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
219 	if (err < 0)
220 		goto errout;
221 
222 	err = validate_rulemsg(frh, tb, ops);
223 	if (err < 0)
224 		goto errout;
225 
226 	rule = kzalloc(ops->rule_size, GFP_KERNEL);
227 	if (rule == NULL) {
228 		err = -ENOMEM;
229 		goto errout;
230 	}
231 
232 	if (tb[FRA_PRIORITY])
233 		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
234 
235 	if (tb[FRA_IFNAME]) {
236 		struct net_device *dev;
237 
238 		rule->ifindex = -1;
239 		nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
240 		dev = __dev_get_by_name(net, rule->ifname);
241 		if (dev)
242 			rule->ifindex = dev->ifindex;
243 	}
244 
245 	if (tb[FRA_FWMARK]) {
246 		rule->mark = nla_get_u32(tb[FRA_FWMARK]);
247 		if (rule->mark)
248 			/* compatibility: if the mark value is non-zero all bits
249 			 * are compared unless a mask is explicitly specified.
250 			 */
251 			rule->mark_mask = 0xFFFFFFFF;
252 	}
253 
254 	if (tb[FRA_FWMASK])
255 		rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
256 
257 	rule->action = frh->action;
258 	rule->flags = frh->flags;
259 	rule->table = frh_get_table(frh, tb);
260 
261 	if (!rule->pref && ops->default_pref)
262 		rule->pref = ops->default_pref();
263 
264 	err = -EINVAL;
265 	if (tb[FRA_GOTO]) {
266 		if (rule->action != FR_ACT_GOTO)
267 			goto errout_free;
268 
269 		rule->target = nla_get_u32(tb[FRA_GOTO]);
270 		/* Backward jumps are prohibited to avoid endless loops */
271 		if (rule->target <= rule->pref)
272 			goto errout_free;
273 
274 		list_for_each_entry(r, &ops->rules_list, list) {
275 			if (r->pref == rule->target) {
276 				rule->ctarget = r;
277 				break;
278 			}
279 		}
280 
281 		if (rule->ctarget == NULL)
282 			unresolved = 1;
283 	} else if (rule->action == FR_ACT_GOTO)
284 		goto errout_free;
285 
286 	err = ops->configure(rule, skb, nlh, frh, tb);
287 	if (err < 0)
288 		goto errout_free;
289 
290 	list_for_each_entry(r, &ops->rules_list, list) {
291 		if (r->pref > rule->pref)
292 			break;
293 		last = r;
294 	}
295 
296 	fib_rule_get(rule);
297 
298 	if (ops->unresolved_rules) {
299 		/*
300 		 * There are unresolved goto rules in the list, check if
301 		 * any of them are pointing to this new rule.
302 		 */
303 		list_for_each_entry(r, &ops->rules_list, list) {
304 			if (r->action == FR_ACT_GOTO &&
305 			    r->target == rule->pref) {
306 				BUG_ON(r->ctarget != NULL);
307 				rcu_assign_pointer(r->ctarget, rule);
308 				if (--ops->unresolved_rules == 0)
309 					break;
310 			}
311 		}
312 	}
313 
314 	if (rule->action == FR_ACT_GOTO)
315 		ops->nr_goto_rules++;
316 
317 	if (unresolved)
318 		ops->unresolved_rules++;
319 
320 	if (last)
321 		list_add_rcu(&rule->list, &last->list);
322 	else
323 		list_add_rcu(&rule->list, &ops->rules_list);
324 
325 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
326 	flush_route_cache(ops);
327 	rules_ops_put(ops);
328 	return 0;
329 
330 errout_free:
331 	kfree(rule);
332 errout:
333 	rules_ops_put(ops);
334 	return err;
335 }
336 
337 static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
338 {
339 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
340 	struct fib_rules_ops *ops = NULL;
341 	struct fib_rule *rule, *tmp;
342 	struct nlattr *tb[FRA_MAX+1];
343 	int err = -EINVAL;
344 
345 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
346 		goto errout;
347 
348 	ops = lookup_rules_ops(frh->family);
349 	if (ops == NULL) {
350 		err = EAFNOSUPPORT;
351 		goto errout;
352 	}
353 
354 	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
355 	if (err < 0)
356 		goto errout;
357 
358 	err = validate_rulemsg(frh, tb, ops);
359 	if (err < 0)
360 		goto errout;
361 
362 	list_for_each_entry(rule, &ops->rules_list, list) {
363 		if (frh->action && (frh->action != rule->action))
364 			continue;
365 
366 		if (frh->table && (frh_get_table(frh, tb) != rule->table))
367 			continue;
368 
369 		if (tb[FRA_PRIORITY] &&
370 		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
371 			continue;
372 
373 		if (tb[FRA_IFNAME] &&
374 		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
375 			continue;
376 
377 		if (tb[FRA_FWMARK] &&
378 		    (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
379 			continue;
380 
381 		if (tb[FRA_FWMASK] &&
382 		    (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
383 			continue;
384 
385 		if (!ops->compare(rule, frh, tb))
386 			continue;
387 
388 		if (rule->flags & FIB_RULE_PERMANENT) {
389 			err = -EPERM;
390 			goto errout;
391 		}
392 
393 		list_del_rcu(&rule->list);
394 
395 		if (rule->action == FR_ACT_GOTO)
396 			ops->nr_goto_rules--;
397 
398 		/*
399 		 * Check if this rule is a target to any of them. If so,
400 		 * disable them. As this operation is eventually very
401 		 * expensive, it is only performed if goto rules have
402 		 * actually been added.
403 		 */
404 		if (ops->nr_goto_rules > 0) {
405 			list_for_each_entry(tmp, &ops->rules_list, list) {
406 				if (tmp->ctarget == rule) {
407 					rcu_assign_pointer(tmp->ctarget, NULL);
408 					ops->unresolved_rules++;
409 				}
410 			}
411 		}
412 
413 		synchronize_rcu();
414 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
415 				   NETLINK_CB(skb).pid);
416 		fib_rule_put(rule);
417 		flush_route_cache(ops);
418 		rules_ops_put(ops);
419 		return 0;
420 	}
421 
422 	err = -ENOENT;
423 errout:
424 	rules_ops_put(ops);
425 	return err;
426 }
427 
428 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
429 					 struct fib_rule *rule)
430 {
431 	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
432 			 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
433 			 + nla_total_size(4) /* FRA_PRIORITY */
434 			 + nla_total_size(4) /* FRA_TABLE */
435 			 + nla_total_size(4) /* FRA_FWMARK */
436 			 + nla_total_size(4); /* FRA_FWMASK */
437 
438 	if (ops->nlmsg_payload)
439 		payload += ops->nlmsg_payload(rule);
440 
441 	return payload;
442 }
443 
444 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
445 			    u32 pid, u32 seq, int type, int flags,
446 			    struct fib_rules_ops *ops)
447 {
448 	struct nlmsghdr *nlh;
449 	struct fib_rule_hdr *frh;
450 
451 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
452 	if (nlh == NULL)
453 		return -EMSGSIZE;
454 
455 	frh = nlmsg_data(nlh);
456 	frh->table = rule->table;
457 	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
458 	frh->res1 = 0;
459 	frh->res2 = 0;
460 	frh->action = rule->action;
461 	frh->flags = rule->flags;
462 
463 	if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
464 		frh->flags |= FIB_RULE_UNRESOLVED;
465 
466 	if (rule->ifname[0]) {
467 		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
468 
469 		if (rule->ifindex == -1)
470 			frh->flags |= FIB_RULE_DEV_DETACHED;
471 	}
472 
473 	if (rule->pref)
474 		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
475 
476 	if (rule->mark)
477 		NLA_PUT_U32(skb, FRA_FWMARK, rule->mark);
478 
479 	if (rule->mark_mask || rule->mark)
480 		NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
481 
482 	if (rule->target)
483 		NLA_PUT_U32(skb, FRA_GOTO, rule->target);
484 
485 	if (ops->fill(rule, skb, nlh, frh) < 0)
486 		goto nla_put_failure;
487 
488 	return nlmsg_end(skb, nlh);
489 
490 nla_put_failure:
491 	nlmsg_cancel(skb, nlh);
492 	return -EMSGSIZE;
493 }
494 
495 static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
496 		      struct fib_rules_ops *ops)
497 {
498 	int idx = 0;
499 	struct fib_rule *rule;
500 
501 	list_for_each_entry(rule, &ops->rules_list, list) {
502 		if (idx < cb->args[1])
503 			goto skip;
504 
505 		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
506 				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
507 				     NLM_F_MULTI, ops) < 0)
508 			break;
509 skip:
510 		idx++;
511 	}
512 	cb->args[1] = idx;
513 	rules_ops_put(ops);
514 
515 	return skb->len;
516 }
517 
518 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
519 {
520 	struct fib_rules_ops *ops;
521 	int idx = 0, family;
522 
523 	family = rtnl_msg_family(cb->nlh);
524 	if (family != AF_UNSPEC) {
525 		/* Protocol specific dump request */
526 		ops = lookup_rules_ops(family);
527 		if (ops == NULL)
528 			return -EAFNOSUPPORT;
529 
530 		return dump_rules(skb, cb, ops);
531 	}
532 
533 	rcu_read_lock();
534 	list_for_each_entry_rcu(ops, &rules_ops, list) {
535 		if (idx < cb->args[0] || !try_module_get(ops->owner))
536 			goto skip;
537 
538 		if (dump_rules(skb, cb, ops) < 0)
539 			break;
540 
541 		cb->args[1] = 0;
542 	skip:
543 		idx++;
544 	}
545 	rcu_read_unlock();
546 	cb->args[0] = idx;
547 
548 	return skb->len;
549 }
550 
551 static void notify_rule_change(int event, struct fib_rule *rule,
552 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
553 			       u32 pid)
554 {
555 	struct sk_buff *skb;
556 	int err = -ENOBUFS;
557 
558 	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
559 	if (skb == NULL)
560 		goto errout;
561 
562 	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
563 	if (err < 0) {
564 		/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
565 		WARN_ON(err == -EMSGSIZE);
566 		kfree_skb(skb);
567 		goto errout;
568 	}
569 	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
570 errout:
571 	if (err < 0)
572 		rtnl_set_sk_err(ops->nlgroup, err);
573 }
574 
575 static void attach_rules(struct list_head *rules, struct net_device *dev)
576 {
577 	struct fib_rule *rule;
578 
579 	list_for_each_entry(rule, rules, list) {
580 		if (rule->ifindex == -1 &&
581 		    strcmp(dev->name, rule->ifname) == 0)
582 			rule->ifindex = dev->ifindex;
583 	}
584 }
585 
586 static void detach_rules(struct list_head *rules, struct net_device *dev)
587 {
588 	struct fib_rule *rule;
589 
590 	list_for_each_entry(rule, rules, list)
591 		if (rule->ifindex == dev->ifindex)
592 			rule->ifindex = -1;
593 }
594 
595 
596 static int fib_rules_event(struct notifier_block *this, unsigned long event,
597 			    void *ptr)
598 {
599 	struct net_device *dev = ptr;
600 	struct fib_rules_ops *ops;
601 
602 	if (dev->nd_net != &init_net)
603 		return NOTIFY_DONE;
604 
605 	ASSERT_RTNL();
606 	rcu_read_lock();
607 
608 	switch (event) {
609 	case NETDEV_REGISTER:
610 		list_for_each_entry(ops, &rules_ops, list)
611 			attach_rules(&ops->rules_list, dev);
612 		break;
613 
614 	case NETDEV_UNREGISTER:
615 		list_for_each_entry(ops, &rules_ops, list)
616 			detach_rules(&ops->rules_list, dev);
617 		break;
618 	}
619 
620 	rcu_read_unlock();
621 
622 	return NOTIFY_DONE;
623 }
624 
625 static struct notifier_block fib_rules_notifier = {
626 	.notifier_call = fib_rules_event,
627 };
628 
629 static int __init fib_rules_init(void)
630 {
631 	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
632 	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
633 	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
634 
635 	return register_netdevice_notifier(&fib_rules_notifier);
636 }
637 
638 subsys_initcall(fib_rules_init);
639