xref: /openbmc/linux/net/core/fib_rules.c (revision 070ed82e)
1 /*
2  * net/core/fib_rules.c		Generic Routing Rules
3  *
4  *	This program is free software; you can redistribute it and/or
5  *	modify it under the terms of the GNU General Public License as
6  *	published by the Free Software Foundation, version 2.
7  *
8  * Authors:	Thomas Graf <tgraf@suug.ch>
9  */
10 
11 #include <linux/types.h>
12 #include <linux/kernel.h>
13 #include <linux/slab.h>
14 #include <linux/list.h>
15 #include <linux/module.h>
16 #include <net/net_namespace.h>
17 #include <net/sock.h>
18 #include <net/fib_rules.h>
19 #include <net/ip_tunnels.h>
20 
21 static const struct fib_kuid_range fib_kuid_range_unset = {
22 	KUIDT_INIT(0),
23 	KUIDT_INIT(~0),
24 };
25 
26 bool fib_rule_matchall(const struct fib_rule *rule)
27 {
28 	if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
29 	    rule->flags)
30 		return false;
31 	if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
32 		return false;
33 	if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
34 	    !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
35 		return false;
36 	return true;
37 }
38 EXPORT_SYMBOL_GPL(fib_rule_matchall);
39 
40 int fib_default_rule_add(struct fib_rules_ops *ops,
41 			 u32 pref, u32 table, u32 flags)
42 {
43 	struct fib_rule *r;
44 
45 	r = kzalloc(ops->rule_size, GFP_KERNEL);
46 	if (r == NULL)
47 		return -ENOMEM;
48 
49 	refcount_set(&r->refcnt, 1);
50 	r->action = FR_ACT_TO_TBL;
51 	r->pref = pref;
52 	r->table = table;
53 	r->flags = flags;
54 	r->fr_net = ops->fro_net;
55 	r->uid_range = fib_kuid_range_unset;
56 
57 	r->suppress_prefixlen = -1;
58 	r->suppress_ifgroup = -1;
59 
60 	/* The lock is not required here, the list in unreacheable
61 	 * at the moment this function is called */
62 	list_add_tail(&r->list, &ops->rules_list);
63 	return 0;
64 }
65 EXPORT_SYMBOL(fib_default_rule_add);
66 
67 static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
68 {
69 	struct list_head *pos;
70 	struct fib_rule *rule;
71 
72 	if (!list_empty(&ops->rules_list)) {
73 		pos = ops->rules_list.next;
74 		if (pos->next != &ops->rules_list) {
75 			rule = list_entry(pos->next, struct fib_rule, list);
76 			if (rule->pref)
77 				return rule->pref - 1;
78 		}
79 	}
80 
81 	return 0;
82 }
83 
84 static void notify_rule_change(int event, struct fib_rule *rule,
85 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
86 			       u32 pid);
87 
88 static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
89 {
90 	struct fib_rules_ops *ops;
91 
92 	rcu_read_lock();
93 	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
94 		if (ops->family == family) {
95 			if (!try_module_get(ops->owner))
96 				ops = NULL;
97 			rcu_read_unlock();
98 			return ops;
99 		}
100 	}
101 	rcu_read_unlock();
102 
103 	return NULL;
104 }
105 
106 static void rules_ops_put(struct fib_rules_ops *ops)
107 {
108 	if (ops)
109 		module_put(ops->owner);
110 }
111 
112 static void flush_route_cache(struct fib_rules_ops *ops)
113 {
114 	if (ops->flush_cache)
115 		ops->flush_cache(ops);
116 }
117 
118 static int __fib_rules_register(struct fib_rules_ops *ops)
119 {
120 	int err = -EEXIST;
121 	struct fib_rules_ops *o;
122 	struct net *net;
123 
124 	net = ops->fro_net;
125 
126 	if (ops->rule_size < sizeof(struct fib_rule))
127 		return -EINVAL;
128 
129 	if (ops->match == NULL || ops->configure == NULL ||
130 	    ops->compare == NULL || ops->fill == NULL ||
131 	    ops->action == NULL)
132 		return -EINVAL;
133 
134 	spin_lock(&net->rules_mod_lock);
135 	list_for_each_entry(o, &net->rules_ops, list)
136 		if (ops->family == o->family)
137 			goto errout;
138 
139 	list_add_tail_rcu(&ops->list, &net->rules_ops);
140 	err = 0;
141 errout:
142 	spin_unlock(&net->rules_mod_lock);
143 
144 	return err;
145 }
146 
147 struct fib_rules_ops *
148 fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
149 {
150 	struct fib_rules_ops *ops;
151 	int err;
152 
153 	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
154 	if (ops == NULL)
155 		return ERR_PTR(-ENOMEM);
156 
157 	INIT_LIST_HEAD(&ops->rules_list);
158 	ops->fro_net = net;
159 
160 	err = __fib_rules_register(ops);
161 	if (err) {
162 		kfree(ops);
163 		ops = ERR_PTR(err);
164 	}
165 
166 	return ops;
167 }
168 EXPORT_SYMBOL_GPL(fib_rules_register);
169 
170 static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
171 {
172 	struct fib_rule *rule, *tmp;
173 
174 	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
175 		list_del_rcu(&rule->list);
176 		if (ops->delete)
177 			ops->delete(rule);
178 		fib_rule_put(rule);
179 	}
180 }
181 
182 void fib_rules_unregister(struct fib_rules_ops *ops)
183 {
184 	struct net *net = ops->fro_net;
185 
186 	spin_lock(&net->rules_mod_lock);
187 	list_del_rcu(&ops->list);
188 	spin_unlock(&net->rules_mod_lock);
189 
190 	fib_rules_cleanup_ops(ops);
191 	kfree_rcu(ops, rcu);
192 }
193 EXPORT_SYMBOL_GPL(fib_rules_unregister);
194 
195 static int uid_range_set(struct fib_kuid_range *range)
196 {
197 	return uid_valid(range->start) && uid_valid(range->end);
198 }
199 
200 static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
201 {
202 	struct fib_rule_uid_range *in;
203 	struct fib_kuid_range out;
204 
205 	in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);
206 
207 	out.start = make_kuid(current_user_ns(), in->start);
208 	out.end = make_kuid(current_user_ns(), in->end);
209 
210 	return out;
211 }
212 
213 static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
214 {
215 	struct fib_rule_uid_range out = {
216 		from_kuid_munged(current_user_ns(), range->start),
217 		from_kuid_munged(current_user_ns(), range->end)
218 	};
219 
220 	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
221 }
222 
223 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
224 			  struct flowi *fl, int flags,
225 			  struct fib_lookup_arg *arg)
226 {
227 	int ret = 0;
228 
229 	if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
230 		goto out;
231 
232 	if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
233 		goto out;
234 
235 	if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
236 		goto out;
237 
238 	if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
239 		goto out;
240 
241 	if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
242 		goto out;
243 
244 	if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
245 	    uid_gt(fl->flowi_uid, rule->uid_range.end))
246 		goto out;
247 
248 	ret = ops->match(rule, fl, flags);
249 out:
250 	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
251 }
252 
253 int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
254 		     int flags, struct fib_lookup_arg *arg)
255 {
256 	struct fib_rule *rule;
257 	int err;
258 
259 	rcu_read_lock();
260 
261 	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
262 jumped:
263 		if (!fib_rule_match(rule, ops, fl, flags, arg))
264 			continue;
265 
266 		if (rule->action == FR_ACT_GOTO) {
267 			struct fib_rule *target;
268 
269 			target = rcu_dereference(rule->ctarget);
270 			if (target == NULL) {
271 				continue;
272 			} else {
273 				rule = target;
274 				goto jumped;
275 			}
276 		} else if (rule->action == FR_ACT_NOP)
277 			continue;
278 		else
279 			err = ops->action(rule, fl, flags, arg);
280 
281 		if (!err && ops->suppress && ops->suppress(rule, arg))
282 			continue;
283 
284 		if (err != -EAGAIN) {
285 			if ((arg->flags & FIB_LOOKUP_NOREF) ||
286 			    likely(refcount_inc_not_zero(&rule->refcnt))) {
287 				arg->rule = rule;
288 				goto out;
289 			}
290 			break;
291 		}
292 	}
293 
294 	err = -ESRCH;
295 out:
296 	rcu_read_unlock();
297 
298 	return err;
299 }
300 EXPORT_SYMBOL_GPL(fib_rules_lookup);
301 
302 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
303 			    struct fib_rules_ops *ops)
304 {
305 	int err = -EINVAL;
306 
307 	if (frh->src_len)
308 		if (tb[FRA_SRC] == NULL ||
309 		    frh->src_len > (ops->addr_size * 8) ||
310 		    nla_len(tb[FRA_SRC]) != ops->addr_size)
311 			goto errout;
312 
313 	if (frh->dst_len)
314 		if (tb[FRA_DST] == NULL ||
315 		    frh->dst_len > (ops->addr_size * 8) ||
316 		    nla_len(tb[FRA_DST]) != ops->addr_size)
317 			goto errout;
318 
319 	err = 0;
320 errout:
321 	return err;
322 }
323 
324 static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
325 		       struct nlattr **tb, struct fib_rule *rule)
326 {
327 	struct fib_rule *r;
328 
329 	list_for_each_entry(r, &ops->rules_list, list) {
330 		if (r->action != rule->action)
331 			continue;
332 
333 		if (r->table != rule->table)
334 			continue;
335 
336 		if (r->pref != rule->pref)
337 			continue;
338 
339 		if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
340 			continue;
341 
342 		if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
343 			continue;
344 
345 		if (r->mark != rule->mark)
346 			continue;
347 
348 		if (r->mark_mask != rule->mark_mask)
349 			continue;
350 
351 		if (r->tun_id != rule->tun_id)
352 			continue;
353 
354 		if (r->fr_net != rule->fr_net)
355 			continue;
356 
357 		if (r->l3mdev != rule->l3mdev)
358 			continue;
359 
360 		if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
361 		    !uid_eq(r->uid_range.end, rule->uid_range.end))
362 			continue;
363 
364 		if (!ops->compare(r, frh, tb))
365 			continue;
366 		return 1;
367 	}
368 	return 0;
369 }
370 
371 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
372 		   struct netlink_ext_ack *extack)
373 {
374 	struct net *net = sock_net(skb->sk);
375 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
376 	struct fib_rules_ops *ops = NULL;
377 	struct fib_rule *rule, *r, *last = NULL;
378 	struct nlattr *tb[FRA_MAX+1];
379 	int err = -EINVAL, unresolved = 0;
380 
381 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
382 		goto errout;
383 
384 	ops = lookup_rules_ops(net, frh->family);
385 	if (ops == NULL) {
386 		err = -EAFNOSUPPORT;
387 		goto errout;
388 	}
389 
390 	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
391 	if (err < 0)
392 		goto errout;
393 
394 	err = validate_rulemsg(frh, tb, ops);
395 	if (err < 0)
396 		goto errout;
397 
398 	rule = kzalloc(ops->rule_size, GFP_KERNEL);
399 	if (rule == NULL) {
400 		err = -ENOMEM;
401 		goto errout;
402 	}
403 	refcount_set(&rule->refcnt, 1);
404 	rule->fr_net = net;
405 
406 	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
407 	                              : fib_default_rule_pref(ops);
408 
409 	if (tb[FRA_IIFNAME]) {
410 		struct net_device *dev;
411 
412 		rule->iifindex = -1;
413 		nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
414 		dev = __dev_get_by_name(net, rule->iifname);
415 		if (dev)
416 			rule->iifindex = dev->ifindex;
417 	}
418 
419 	if (tb[FRA_OIFNAME]) {
420 		struct net_device *dev;
421 
422 		rule->oifindex = -1;
423 		nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
424 		dev = __dev_get_by_name(net, rule->oifname);
425 		if (dev)
426 			rule->oifindex = dev->ifindex;
427 	}
428 
429 	if (tb[FRA_FWMARK]) {
430 		rule->mark = nla_get_u32(tb[FRA_FWMARK]);
431 		if (rule->mark)
432 			/* compatibility: if the mark value is non-zero all bits
433 			 * are compared unless a mask is explicitly specified.
434 			 */
435 			rule->mark_mask = 0xFFFFFFFF;
436 	}
437 
438 	if (tb[FRA_FWMASK])
439 		rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
440 
441 	if (tb[FRA_TUN_ID])
442 		rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
443 
444 	err = -EINVAL;
445 	if (tb[FRA_L3MDEV]) {
446 #ifdef CONFIG_NET_L3_MASTER_DEV
447 		rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
448 		if (rule->l3mdev != 1)
449 #endif
450 			goto errout_free;
451 	}
452 
453 	rule->action = frh->action;
454 	rule->flags = frh->flags;
455 	rule->table = frh_get_table(frh, tb);
456 	if (tb[FRA_SUPPRESS_PREFIXLEN])
457 		rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
458 	else
459 		rule->suppress_prefixlen = -1;
460 
461 	if (tb[FRA_SUPPRESS_IFGROUP])
462 		rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
463 	else
464 		rule->suppress_ifgroup = -1;
465 
466 	if (tb[FRA_GOTO]) {
467 		if (rule->action != FR_ACT_GOTO)
468 			goto errout_free;
469 
470 		rule->target = nla_get_u32(tb[FRA_GOTO]);
471 		/* Backward jumps are prohibited to avoid endless loops */
472 		if (rule->target <= rule->pref)
473 			goto errout_free;
474 
475 		list_for_each_entry(r, &ops->rules_list, list) {
476 			if (r->pref == rule->target) {
477 				RCU_INIT_POINTER(rule->ctarget, r);
478 				break;
479 			}
480 		}
481 
482 		if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
483 			unresolved = 1;
484 	} else if (rule->action == FR_ACT_GOTO)
485 		goto errout_free;
486 
487 	if (rule->l3mdev && rule->table)
488 		goto errout_free;
489 
490 	if (tb[FRA_UID_RANGE]) {
491 		if (current_user_ns() != net->user_ns) {
492 			err = -EPERM;
493 			goto errout_free;
494 		}
495 
496 		rule->uid_range = nla_get_kuid_range(tb);
497 
498 		if (!uid_range_set(&rule->uid_range) ||
499 		    !uid_lte(rule->uid_range.start, rule->uid_range.end))
500 			goto errout_free;
501 	} else {
502 		rule->uid_range = fib_kuid_range_unset;
503 	}
504 
505 	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
506 	    rule_exists(ops, frh, tb, rule)) {
507 		err = -EEXIST;
508 		goto errout_free;
509 	}
510 
511 	err = ops->configure(rule, skb, frh, tb);
512 	if (err < 0)
513 		goto errout_free;
514 
515 	list_for_each_entry(r, &ops->rules_list, list) {
516 		if (r->pref > rule->pref)
517 			break;
518 		last = r;
519 	}
520 
521 	if (last)
522 		list_add_rcu(&rule->list, &last->list);
523 	else
524 		list_add_rcu(&rule->list, &ops->rules_list);
525 
526 	if (ops->unresolved_rules) {
527 		/*
528 		 * There are unresolved goto rules in the list, check if
529 		 * any of them are pointing to this new rule.
530 		 */
531 		list_for_each_entry(r, &ops->rules_list, list) {
532 			if (r->action == FR_ACT_GOTO &&
533 			    r->target == rule->pref &&
534 			    rtnl_dereference(r->ctarget) == NULL) {
535 				rcu_assign_pointer(r->ctarget, rule);
536 				if (--ops->unresolved_rules == 0)
537 					break;
538 			}
539 		}
540 	}
541 
542 	if (rule->action == FR_ACT_GOTO)
543 		ops->nr_goto_rules++;
544 
545 	if (unresolved)
546 		ops->unresolved_rules++;
547 
548 	if (rule->tun_id)
549 		ip_tunnel_need_metadata();
550 
551 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
552 	flush_route_cache(ops);
553 	rules_ops_put(ops);
554 	return 0;
555 
556 errout_free:
557 	kfree(rule);
558 errout:
559 	rules_ops_put(ops);
560 	return err;
561 }
562 EXPORT_SYMBOL_GPL(fib_nl_newrule);
563 
564 int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
565 		   struct netlink_ext_ack *extack)
566 {
567 	struct net *net = sock_net(skb->sk);
568 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
569 	struct fib_rules_ops *ops = NULL;
570 	struct fib_rule *rule, *r;
571 	struct nlattr *tb[FRA_MAX+1];
572 	struct fib_kuid_range range;
573 	int err = -EINVAL;
574 
575 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
576 		goto errout;
577 
578 	ops = lookup_rules_ops(net, frh->family);
579 	if (ops == NULL) {
580 		err = -EAFNOSUPPORT;
581 		goto errout;
582 	}
583 
584 	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
585 	if (err < 0)
586 		goto errout;
587 
588 	err = validate_rulemsg(frh, tb, ops);
589 	if (err < 0)
590 		goto errout;
591 
592 	if (tb[FRA_UID_RANGE]) {
593 		range = nla_get_kuid_range(tb);
594 		if (!uid_range_set(&range)) {
595 			err = -EINVAL;
596 			goto errout;
597 		}
598 	} else {
599 		range = fib_kuid_range_unset;
600 	}
601 
602 	list_for_each_entry(rule, &ops->rules_list, list) {
603 		if (frh->action && (frh->action != rule->action))
604 			continue;
605 
606 		if (frh_get_table(frh, tb) &&
607 		    (frh_get_table(frh, tb) != rule->table))
608 			continue;
609 
610 		if (tb[FRA_PRIORITY] &&
611 		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
612 			continue;
613 
614 		if (tb[FRA_IIFNAME] &&
615 		    nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
616 			continue;
617 
618 		if (tb[FRA_OIFNAME] &&
619 		    nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
620 			continue;
621 
622 		if (tb[FRA_FWMARK] &&
623 		    (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
624 			continue;
625 
626 		if (tb[FRA_FWMASK] &&
627 		    (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
628 			continue;
629 
630 		if (tb[FRA_TUN_ID] &&
631 		    (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
632 			continue;
633 
634 		if (tb[FRA_L3MDEV] &&
635 		    (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
636 			continue;
637 
638 		if (uid_range_set(&range) &&
639 		    (!uid_eq(rule->uid_range.start, range.start) ||
640 		     !uid_eq(rule->uid_range.end, range.end)))
641 			continue;
642 
643 		if (!ops->compare(rule, frh, tb))
644 			continue;
645 
646 		if (rule->flags & FIB_RULE_PERMANENT) {
647 			err = -EPERM;
648 			goto errout;
649 		}
650 
651 		if (ops->delete) {
652 			err = ops->delete(rule);
653 			if (err)
654 				goto errout;
655 		}
656 
657 		if (rule->tun_id)
658 			ip_tunnel_unneed_metadata();
659 
660 		list_del_rcu(&rule->list);
661 
662 		if (rule->action == FR_ACT_GOTO) {
663 			ops->nr_goto_rules--;
664 			if (rtnl_dereference(rule->ctarget) == NULL)
665 				ops->unresolved_rules--;
666 		}
667 
668 		/*
669 		 * Check if this rule is a target to any of them. If so,
670 		 * adjust to the next one with the same preference or
671 		 * disable them. As this operation is eventually very
672 		 * expensive, it is only performed if goto rules, except
673 		 * current if it is goto rule, have actually been added.
674 		 */
675 		if (ops->nr_goto_rules > 0) {
676 			struct fib_rule *n;
677 
678 			n = list_next_entry(rule, list);
679 			if (&n->list == &ops->rules_list || n->pref != rule->pref)
680 				n = NULL;
681 			list_for_each_entry(r, &ops->rules_list, list) {
682 				if (rtnl_dereference(r->ctarget) != rule)
683 					continue;
684 				rcu_assign_pointer(r->ctarget, n);
685 				if (!n)
686 					ops->unresolved_rules++;
687 			}
688 		}
689 
690 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
691 				   NETLINK_CB(skb).portid);
692 		fib_rule_put(rule);
693 		flush_route_cache(ops);
694 		rules_ops_put(ops);
695 		return 0;
696 	}
697 
698 	err = -ENOENT;
699 errout:
700 	rules_ops_put(ops);
701 	return err;
702 }
703 EXPORT_SYMBOL_GPL(fib_nl_delrule);
704 
705 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
706 					 struct fib_rule *rule)
707 {
708 	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
709 			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
710 			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
711 			 + nla_total_size(4) /* FRA_PRIORITY */
712 			 + nla_total_size(4) /* FRA_TABLE */
713 			 + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
714 			 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
715 			 + nla_total_size(4) /* FRA_FWMARK */
716 			 + nla_total_size(4) /* FRA_FWMASK */
717 			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
718 			 + nla_total_size(sizeof(struct fib_kuid_range));
719 
720 	if (ops->nlmsg_payload)
721 		payload += ops->nlmsg_payload(rule);
722 
723 	return payload;
724 }
725 
726 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
727 			    u32 pid, u32 seq, int type, int flags,
728 			    struct fib_rules_ops *ops)
729 {
730 	struct nlmsghdr *nlh;
731 	struct fib_rule_hdr *frh;
732 
733 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
734 	if (nlh == NULL)
735 		return -EMSGSIZE;
736 
737 	frh = nlmsg_data(nlh);
738 	frh->family = ops->family;
739 	frh->table = rule->table;
740 	if (nla_put_u32(skb, FRA_TABLE, rule->table))
741 		goto nla_put_failure;
742 	if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
743 		goto nla_put_failure;
744 	frh->res1 = 0;
745 	frh->res2 = 0;
746 	frh->action = rule->action;
747 	frh->flags = rule->flags;
748 
749 	if (rule->action == FR_ACT_GOTO &&
750 	    rcu_access_pointer(rule->ctarget) == NULL)
751 		frh->flags |= FIB_RULE_UNRESOLVED;
752 
753 	if (rule->iifname[0]) {
754 		if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
755 			goto nla_put_failure;
756 		if (rule->iifindex == -1)
757 			frh->flags |= FIB_RULE_IIF_DETACHED;
758 	}
759 
760 	if (rule->oifname[0]) {
761 		if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
762 			goto nla_put_failure;
763 		if (rule->oifindex == -1)
764 			frh->flags |= FIB_RULE_OIF_DETACHED;
765 	}
766 
767 	if ((rule->pref &&
768 	     nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
769 	    (rule->mark &&
770 	     nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
771 	    ((rule->mark_mask || rule->mark) &&
772 	     nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
773 	    (rule->target &&
774 	     nla_put_u32(skb, FRA_GOTO, rule->target)) ||
775 	    (rule->tun_id &&
776 	     nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
777 	    (rule->l3mdev &&
778 	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
779 	    (uid_range_set(&rule->uid_range) &&
780 	     nla_put_uid_range(skb, &rule->uid_range)))
781 		goto nla_put_failure;
782 
783 	if (rule->suppress_ifgroup != -1) {
784 		if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
785 			goto nla_put_failure;
786 	}
787 
788 	if (ops->fill(rule, skb, frh) < 0)
789 		goto nla_put_failure;
790 
791 	nlmsg_end(skb, nlh);
792 	return 0;
793 
794 nla_put_failure:
795 	nlmsg_cancel(skb, nlh);
796 	return -EMSGSIZE;
797 }
798 
799 static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
800 		      struct fib_rules_ops *ops)
801 {
802 	int idx = 0;
803 	struct fib_rule *rule;
804 	int err = 0;
805 
806 	rcu_read_lock();
807 	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
808 		if (idx < cb->args[1])
809 			goto skip;
810 
811 		err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
812 				       cb->nlh->nlmsg_seq, RTM_NEWRULE,
813 				       NLM_F_MULTI, ops);
814 		if (err)
815 			break;
816 skip:
817 		idx++;
818 	}
819 	rcu_read_unlock();
820 	cb->args[1] = idx;
821 	rules_ops_put(ops);
822 
823 	return err;
824 }
825 
826 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
827 {
828 	struct net *net = sock_net(skb->sk);
829 	struct fib_rules_ops *ops;
830 	int idx = 0, family;
831 
832 	family = rtnl_msg_family(cb->nlh);
833 	if (family != AF_UNSPEC) {
834 		/* Protocol specific dump request */
835 		ops = lookup_rules_ops(net, family);
836 		if (ops == NULL)
837 			return -EAFNOSUPPORT;
838 
839 		dump_rules(skb, cb, ops);
840 
841 		return skb->len;
842 	}
843 
844 	rcu_read_lock();
845 	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
846 		if (idx < cb->args[0] || !try_module_get(ops->owner))
847 			goto skip;
848 
849 		if (dump_rules(skb, cb, ops) < 0)
850 			break;
851 
852 		cb->args[1] = 0;
853 skip:
854 		idx++;
855 	}
856 	rcu_read_unlock();
857 	cb->args[0] = idx;
858 
859 	return skb->len;
860 }
861 
862 static void notify_rule_change(int event, struct fib_rule *rule,
863 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
864 			       u32 pid)
865 {
866 	struct net *net;
867 	struct sk_buff *skb;
868 	int err = -ENOBUFS;
869 
870 	net = ops->fro_net;
871 	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
872 	if (skb == NULL)
873 		goto errout;
874 
875 	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
876 	if (err < 0) {
877 		/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
878 		WARN_ON(err == -EMSGSIZE);
879 		kfree_skb(skb);
880 		goto errout;
881 	}
882 
883 	rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
884 	return;
885 errout:
886 	if (err < 0)
887 		rtnl_set_sk_err(net, ops->nlgroup, err);
888 }
889 
890 static void attach_rules(struct list_head *rules, struct net_device *dev)
891 {
892 	struct fib_rule *rule;
893 
894 	list_for_each_entry(rule, rules, list) {
895 		if (rule->iifindex == -1 &&
896 		    strcmp(dev->name, rule->iifname) == 0)
897 			rule->iifindex = dev->ifindex;
898 		if (rule->oifindex == -1 &&
899 		    strcmp(dev->name, rule->oifname) == 0)
900 			rule->oifindex = dev->ifindex;
901 	}
902 }
903 
904 static void detach_rules(struct list_head *rules, struct net_device *dev)
905 {
906 	struct fib_rule *rule;
907 
908 	list_for_each_entry(rule, rules, list) {
909 		if (rule->iifindex == dev->ifindex)
910 			rule->iifindex = -1;
911 		if (rule->oifindex == dev->ifindex)
912 			rule->oifindex = -1;
913 	}
914 }
915 
916 
917 static int fib_rules_event(struct notifier_block *this, unsigned long event,
918 			   void *ptr)
919 {
920 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
921 	struct net *net = dev_net(dev);
922 	struct fib_rules_ops *ops;
923 
924 	ASSERT_RTNL();
925 
926 	switch (event) {
927 	case NETDEV_REGISTER:
928 		list_for_each_entry(ops, &net->rules_ops, list)
929 			attach_rules(&ops->rules_list, dev);
930 		break;
931 
932 	case NETDEV_CHANGENAME:
933 		list_for_each_entry(ops, &net->rules_ops, list) {
934 			detach_rules(&ops->rules_list, dev);
935 			attach_rules(&ops->rules_list, dev);
936 		}
937 		break;
938 
939 	case NETDEV_UNREGISTER:
940 		list_for_each_entry(ops, &net->rules_ops, list)
941 			detach_rules(&ops->rules_list, dev);
942 		break;
943 	}
944 
945 	return NOTIFY_DONE;
946 }
947 
948 static struct notifier_block fib_rules_notifier = {
949 	.notifier_call = fib_rules_event,
950 };
951 
952 static int __net_init fib_rules_net_init(struct net *net)
953 {
954 	INIT_LIST_HEAD(&net->rules_ops);
955 	spin_lock_init(&net->rules_mod_lock);
956 	return 0;
957 }
958 
959 static struct pernet_operations fib_rules_net_ops = {
960 	.init = fib_rules_net_init,
961 };
962 
963 static int __init fib_rules_init(void)
964 {
965 	int err;
966 	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
967 	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
968 	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
969 
970 	err = register_pernet_subsys(&fib_rules_net_ops);
971 	if (err < 0)
972 		goto fail;
973 
974 	err = register_netdevice_notifier(&fib_rules_notifier);
975 	if (err < 0)
976 		goto fail_unregister;
977 
978 	return 0;
979 
980 fail_unregister:
981 	unregister_pernet_subsys(&fib_rules_net_ops);
982 fail:
983 	rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
984 	rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
985 	rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
986 	return err;
987 }
988 
989 subsys_initcall(fib_rules_init);
990