1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/kmod.h> 19 #include <linux/list.h> 20 #include <linux/spinlock.h> 21 #include <linux/workqueue.h> 22 #include <linux/notifier.h> 23 #include <linux/netdevice.h> 24 #include <linux/netfilter.h> 25 #include <linux/module.h> 26 #include <linux/cache.h> 27 #include <linux/audit.h> 28 #include <net/dst.h> 29 #include <net/flow.h> 30 #include <net/xfrm.h> 31 #include <net/ip.h> 32 #ifdef CONFIG_XFRM_STATISTICS 33 #include <net/snmp.h> 34 #endif 35 36 #include "xfrm_hash.h" 37 38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) 39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) 40 #define XFRM_MAX_QUEUE_LEN 100 41 42 static struct dst_entry *xfrm_policy_sk_bundles; 43 44 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); 45 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] 46 __read_mostly; 47 48 static struct kmem_cache *xfrm_dst_cache __read_mostly; 49 50 static void xfrm_init_pmtu(struct dst_entry *dst); 51 static int stale_bundle(struct dst_entry *dst); 52 static int xfrm_bundle_ok(struct xfrm_dst *xdst); 53 static void xfrm_policy_queue_process(unsigned long arg); 54 55 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 56 int dir); 57 58 static inline bool 59 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 60 { 61 const struct flowi4 *fl4 = &fl->u.ip4; 62 63 return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && 64 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && 65 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && 66 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && 67 (fl4->flowi4_proto == sel->proto || !sel->proto) && 68 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); 69 } 70 71 static inline bool 72 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 73 { 74 const struct flowi6 *fl6 = &fl->u.ip6; 75 76 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && 77 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && 78 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && 79 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && 80 (fl6->flowi6_proto == sel->proto || !sel->proto) && 81 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); 82 } 83 84 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, 85 unsigned short family) 86 { 87 switch (family) { 88 case AF_INET: 89 return __xfrm4_selector_match(sel, fl); 90 case AF_INET6: 91 return __xfrm6_selector_match(sel, fl); 92 } 93 return false; 94 } 95 96 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 97 { 98 struct xfrm_policy_afinfo *afinfo; 99 100 if (unlikely(family >= NPROTO)) 101 return NULL; 102 rcu_read_lock(); 103 afinfo = rcu_dereference(xfrm_policy_afinfo[family]); 104 if (unlikely(!afinfo)) 105 rcu_read_unlock(); 106 return afinfo; 107 } 108 109 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 110 { 111 rcu_read_unlock(); 112 } 113 114 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, 115 const xfrm_address_t *saddr, 116 const xfrm_address_t *daddr, 117 int family) 118 { 119 struct xfrm_policy_afinfo *afinfo; 120 struct dst_entry *dst; 121 122 afinfo = xfrm_policy_get_afinfo(family); 123 if (unlikely(afinfo == NULL)) 124 return ERR_PTR(-EAFNOSUPPORT); 125 126 dst = afinfo->dst_lookup(net, tos, saddr, daddr); 127 128 xfrm_policy_put_afinfo(afinfo); 129 130 return dst; 131 } 132 133 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, 134 xfrm_address_t *prev_saddr, 135 xfrm_address_t *prev_daddr, 136 int family) 137 { 138 struct net *net = xs_net(x); 139 xfrm_address_t *saddr = &x->props.saddr; 140 xfrm_address_t *daddr = &x->id.daddr; 141 struct dst_entry *dst; 142 143 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { 144 saddr = x->coaddr; 145 daddr = prev_daddr; 146 } 147 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { 148 saddr = prev_saddr; 149 daddr = x->coaddr; 150 } 151 152 dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); 153 154 if (!IS_ERR(dst)) { 155 if (prev_saddr != saddr) 156 memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); 157 if (prev_daddr != daddr) 158 memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); 159 } 160 161 return dst; 162 } 163 164 static inline unsigned long make_jiffies(long secs) 165 { 166 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 167 return MAX_SCHEDULE_TIMEOUT-1; 168 else 169 return secs*HZ; 170 } 171 172 static void xfrm_policy_timer(unsigned long data) 173 { 174 struct xfrm_policy *xp = (struct xfrm_policy *)data; 175 unsigned long now = get_seconds(); 176 long next = LONG_MAX; 177 int warn = 0; 178 int dir; 179 180 read_lock(&xp->lock); 181 182 if (unlikely(xp->walk.dead)) 183 goto out; 184 185 dir = xfrm_policy_id2dir(xp->index); 186 187 if (xp->lft.hard_add_expires_seconds) { 188 long tmo = xp->lft.hard_add_expires_seconds + 189 xp->curlft.add_time - now; 190 if (tmo <= 0) 191 goto expired; 192 if (tmo < next) 193 next = tmo; 194 } 195 if (xp->lft.hard_use_expires_seconds) { 196 long tmo = xp->lft.hard_use_expires_seconds + 197 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 198 if (tmo <= 0) 199 goto expired; 200 if (tmo < next) 201 next = tmo; 202 } 203 if (xp->lft.soft_add_expires_seconds) { 204 long tmo = xp->lft.soft_add_expires_seconds + 205 xp->curlft.add_time - now; 206 if (tmo <= 0) { 207 warn = 1; 208 tmo = XFRM_KM_TIMEOUT; 209 } 210 if (tmo < next) 211 next = tmo; 212 } 213 if (xp->lft.soft_use_expires_seconds) { 214 long tmo = xp->lft.soft_use_expires_seconds + 215 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 216 if (tmo <= 0) { 217 warn = 1; 218 tmo = XFRM_KM_TIMEOUT; 219 } 220 if (tmo < next) 221 next = tmo; 222 } 223 224 if (warn) 225 km_policy_expired(xp, dir, 0, 0); 226 if (next != LONG_MAX && 227 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 228 xfrm_pol_hold(xp); 229 230 out: 231 read_unlock(&xp->lock); 232 xfrm_pol_put(xp); 233 return; 234 235 expired: 236 read_unlock(&xp->lock); 237 if (!xfrm_policy_delete(xp, dir)) 238 km_policy_expired(xp, dir, 1, 0); 239 xfrm_pol_put(xp); 240 } 241 242 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) 243 { 244 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 245 246 if (unlikely(pol->walk.dead)) 247 flo = NULL; 248 else 249 xfrm_pol_hold(pol); 250 251 return flo; 252 } 253 254 static int xfrm_policy_flo_check(struct flow_cache_object *flo) 255 { 256 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 257 258 return !pol->walk.dead; 259 } 260 261 static void xfrm_policy_flo_delete(struct flow_cache_object *flo) 262 { 263 xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); 264 } 265 266 static const struct flow_cache_ops xfrm_policy_fc_ops = { 267 .get = xfrm_policy_flo_get, 268 .check = xfrm_policy_flo_check, 269 .delete = xfrm_policy_flo_delete, 270 }; 271 272 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 273 * SPD calls. 274 */ 275 276 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) 277 { 278 struct xfrm_policy *policy; 279 280 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 281 282 if (policy) { 283 write_pnet(&policy->xp_net, net); 284 INIT_LIST_HEAD(&policy->walk.all); 285 INIT_HLIST_NODE(&policy->bydst); 286 INIT_HLIST_NODE(&policy->byidx); 287 rwlock_init(&policy->lock); 288 atomic_set(&policy->refcnt, 1); 289 skb_queue_head_init(&policy->polq.hold_queue); 290 setup_timer(&policy->timer, xfrm_policy_timer, 291 (unsigned long)policy); 292 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, 293 (unsigned long)policy); 294 policy->flo.ops = &xfrm_policy_fc_ops; 295 } 296 return policy; 297 } 298 EXPORT_SYMBOL(xfrm_policy_alloc); 299 300 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 301 302 void xfrm_policy_destroy(struct xfrm_policy *policy) 303 { 304 BUG_ON(!policy->walk.dead); 305 306 if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) 307 BUG(); 308 309 security_xfrm_policy_free(policy->security); 310 kfree(policy); 311 } 312 EXPORT_SYMBOL(xfrm_policy_destroy); 313 314 static void xfrm_queue_purge(struct sk_buff_head *list) 315 { 316 struct sk_buff *skb; 317 318 while ((skb = skb_dequeue(list)) != NULL) 319 kfree_skb(skb); 320 } 321 322 /* Rule must be locked. Release descentant resources, announce 323 * entry dead. The rule must be unlinked from lists to the moment. 324 */ 325 326 static void xfrm_policy_kill(struct xfrm_policy *policy) 327 { 328 policy->walk.dead = 1; 329 330 atomic_inc(&policy->genid); 331 332 if (del_timer(&policy->polq.hold_timer)) 333 xfrm_pol_put(policy); 334 xfrm_queue_purge(&policy->polq.hold_queue); 335 336 if (del_timer(&policy->timer)) 337 xfrm_pol_put(policy); 338 339 xfrm_pol_put(policy); 340 } 341 342 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 343 344 static inline unsigned int idx_hash(struct net *net, u32 index) 345 { 346 return __idx_hash(index, net->xfrm.policy_idx_hmask); 347 } 348 349 static struct hlist_head *policy_hash_bysel(struct net *net, 350 const struct xfrm_selector *sel, 351 unsigned short family, int dir) 352 { 353 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 354 unsigned int hash = __sel_hash(sel, family, hmask); 355 356 return (hash == hmask + 1 ? 357 &net->xfrm.policy_inexact[dir] : 358 net->xfrm.policy_bydst[dir].table + hash); 359 } 360 361 static struct hlist_head *policy_hash_direct(struct net *net, 362 const xfrm_address_t *daddr, 363 const xfrm_address_t *saddr, 364 unsigned short family, int dir) 365 { 366 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 367 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 368 369 return net->xfrm.policy_bydst[dir].table + hash; 370 } 371 372 static void xfrm_dst_hash_transfer(struct hlist_head *list, 373 struct hlist_head *ndsttable, 374 unsigned int nhashmask) 375 { 376 struct hlist_node *tmp, *entry0 = NULL; 377 struct xfrm_policy *pol; 378 unsigned int h0 = 0; 379 380 redo: 381 hlist_for_each_entry_safe(pol, tmp, list, bydst) { 382 unsigned int h; 383 384 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 385 pol->family, nhashmask); 386 if (!entry0) { 387 hlist_del(&pol->bydst); 388 hlist_add_head(&pol->bydst, ndsttable+h); 389 h0 = h; 390 } else { 391 if (h != h0) 392 continue; 393 hlist_del(&pol->bydst); 394 hlist_add_after(entry0, &pol->bydst); 395 } 396 entry0 = &pol->bydst; 397 } 398 if (!hlist_empty(list)) { 399 entry0 = NULL; 400 goto redo; 401 } 402 } 403 404 static void xfrm_idx_hash_transfer(struct hlist_head *list, 405 struct hlist_head *nidxtable, 406 unsigned int nhashmask) 407 { 408 struct hlist_node *tmp; 409 struct xfrm_policy *pol; 410 411 hlist_for_each_entry_safe(pol, tmp, list, byidx) { 412 unsigned int h; 413 414 h = __idx_hash(pol->index, nhashmask); 415 hlist_add_head(&pol->byidx, nidxtable+h); 416 } 417 } 418 419 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) 420 { 421 return ((old_hmask + 1) << 1) - 1; 422 } 423 424 static void xfrm_bydst_resize(struct net *net, int dir) 425 { 426 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 427 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 428 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 429 struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; 430 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 431 int i; 432 433 if (!ndst) 434 return; 435 436 write_lock_bh(&net->xfrm.xfrm_policy_lock); 437 438 for (i = hmask; i >= 0; i--) 439 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 440 441 net->xfrm.policy_bydst[dir].table = ndst; 442 net->xfrm.policy_bydst[dir].hmask = nhashmask; 443 444 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 445 446 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 447 } 448 449 static void xfrm_byidx_resize(struct net *net, int total) 450 { 451 unsigned int hmask = net->xfrm.policy_idx_hmask; 452 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 453 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 454 struct hlist_head *oidx = net->xfrm.policy_byidx; 455 struct hlist_head *nidx = xfrm_hash_alloc(nsize); 456 int i; 457 458 if (!nidx) 459 return; 460 461 write_lock_bh(&net->xfrm.xfrm_policy_lock); 462 463 for (i = hmask; i >= 0; i--) 464 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 465 466 net->xfrm.policy_byidx = nidx; 467 net->xfrm.policy_idx_hmask = nhashmask; 468 469 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 470 471 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 472 } 473 474 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) 475 { 476 unsigned int cnt = net->xfrm.policy_count[dir]; 477 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 478 479 if (total) 480 *total += cnt; 481 482 if ((hmask + 1) < xfrm_policy_hashmax && 483 cnt > hmask) 484 return 1; 485 486 return 0; 487 } 488 489 static inline int xfrm_byidx_should_resize(struct net *net, int total) 490 { 491 unsigned int hmask = net->xfrm.policy_idx_hmask; 492 493 if ((hmask + 1) < xfrm_policy_hashmax && 494 total > hmask) 495 return 1; 496 497 return 0; 498 } 499 500 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) 501 { 502 read_lock_bh(&net->xfrm.xfrm_policy_lock); 503 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; 504 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; 505 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; 506 si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; 507 si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; 508 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 509 si->spdhcnt = net->xfrm.policy_idx_hmask; 510 si->spdhmcnt = xfrm_policy_hashmax; 511 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 512 } 513 EXPORT_SYMBOL(xfrm_spd_getinfo); 514 515 static DEFINE_MUTEX(hash_resize_mutex); 516 static void xfrm_hash_resize(struct work_struct *work) 517 { 518 struct net *net = container_of(work, struct net, xfrm.policy_hash_work); 519 int dir, total; 520 521 mutex_lock(&hash_resize_mutex); 522 523 total = 0; 524 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 525 if (xfrm_bydst_should_resize(net, dir, &total)) 526 xfrm_bydst_resize(net, dir); 527 } 528 if (xfrm_byidx_should_resize(net, total)) 529 xfrm_byidx_resize(net, total); 530 531 mutex_unlock(&hash_resize_mutex); 532 } 533 534 /* Generate new index... KAME seems to generate them ordered by cost 535 * of an absolute inpredictability of ordering of rules. This will not pass. */ 536 static u32 xfrm_gen_index(struct net *net, int dir, u32 index) 537 { 538 static u32 idx_generator; 539 540 for (;;) { 541 struct hlist_head *list; 542 struct xfrm_policy *p; 543 u32 idx; 544 int found; 545 546 if (!index) { 547 idx = (idx_generator | dir); 548 idx_generator += 8; 549 } else { 550 idx = index; 551 index = 0; 552 } 553 554 if (idx == 0) 555 idx = 8; 556 list = net->xfrm.policy_byidx + idx_hash(net, idx); 557 found = 0; 558 hlist_for_each_entry(p, list, byidx) { 559 if (p->index == idx) { 560 found = 1; 561 break; 562 } 563 } 564 if (!found) 565 return idx; 566 } 567 } 568 569 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) 570 { 571 u32 *p1 = (u32 *) s1; 572 u32 *p2 = (u32 *) s2; 573 int len = sizeof(struct xfrm_selector) / sizeof(u32); 574 int i; 575 576 for (i = 0; i < len; i++) { 577 if (p1[i] != p2[i]) 578 return 1; 579 } 580 581 return 0; 582 } 583 584 static void xfrm_policy_requeue(struct xfrm_policy *old, 585 struct xfrm_policy *new) 586 { 587 struct xfrm_policy_queue *pq = &old->polq; 588 struct sk_buff_head list; 589 590 __skb_queue_head_init(&list); 591 592 spin_lock_bh(&pq->hold_queue.lock); 593 skb_queue_splice_init(&pq->hold_queue, &list); 594 if (del_timer(&pq->hold_timer)) 595 xfrm_pol_put(old); 596 spin_unlock_bh(&pq->hold_queue.lock); 597 598 if (skb_queue_empty(&list)) 599 return; 600 601 pq = &new->polq; 602 603 spin_lock_bh(&pq->hold_queue.lock); 604 skb_queue_splice(&list, &pq->hold_queue); 605 pq->timeout = XFRM_QUEUE_TMO_MIN; 606 if (!mod_timer(&pq->hold_timer, jiffies)) 607 xfrm_pol_hold(new); 608 spin_unlock_bh(&pq->hold_queue.lock); 609 } 610 611 static bool xfrm_policy_mark_match(struct xfrm_policy *policy, 612 struct xfrm_policy *pol) 613 { 614 u32 mark = policy->mark.v & policy->mark.m; 615 616 if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) 617 return true; 618 619 if ((mark & pol->mark.m) == pol->mark.v && 620 policy->priority == pol->priority) 621 return true; 622 623 return false; 624 } 625 626 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 627 { 628 struct net *net = xp_net(policy); 629 struct xfrm_policy *pol; 630 struct xfrm_policy *delpol; 631 struct hlist_head *chain; 632 struct hlist_node *newpos; 633 634 write_lock_bh(&net->xfrm.xfrm_policy_lock); 635 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); 636 delpol = NULL; 637 newpos = NULL; 638 hlist_for_each_entry(pol, chain, bydst) { 639 if (pol->type == policy->type && 640 !selector_cmp(&pol->selector, &policy->selector) && 641 xfrm_policy_mark_match(policy, pol) && 642 xfrm_sec_ctx_match(pol->security, policy->security) && 643 !WARN_ON(delpol)) { 644 if (excl) { 645 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 646 return -EEXIST; 647 } 648 delpol = pol; 649 if (policy->priority > pol->priority) 650 continue; 651 } else if (policy->priority >= pol->priority) { 652 newpos = &pol->bydst; 653 continue; 654 } 655 if (delpol) 656 break; 657 } 658 if (newpos) 659 hlist_add_after(newpos, &policy->bydst); 660 else 661 hlist_add_head(&policy->bydst, chain); 662 xfrm_pol_hold(policy); 663 net->xfrm.policy_count[dir]++; 664 atomic_inc(&flow_cache_genid); 665 666 /* After previous checking, family can either be AF_INET or AF_INET6 */ 667 if (policy->family == AF_INET) 668 rt_genid_bump_ipv4(net); 669 else 670 rt_genid_bump_ipv6(net); 671 672 if (delpol) { 673 xfrm_policy_requeue(delpol, policy); 674 __xfrm_policy_unlink(delpol, dir); 675 } 676 policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); 677 hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); 678 policy->curlft.add_time = get_seconds(); 679 policy->curlft.use_time = 0; 680 if (!mod_timer(&policy->timer, jiffies + HZ)) 681 xfrm_pol_hold(policy); 682 list_add(&policy->walk.all, &net->xfrm.policy_all); 683 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 684 685 if (delpol) 686 xfrm_policy_kill(delpol); 687 else if (xfrm_bydst_should_resize(net, dir, NULL)) 688 schedule_work(&net->xfrm.policy_hash_work); 689 690 return 0; 691 } 692 EXPORT_SYMBOL(xfrm_policy_insert); 693 694 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, 695 int dir, struct xfrm_selector *sel, 696 struct xfrm_sec_ctx *ctx, int delete, 697 int *err) 698 { 699 struct xfrm_policy *pol, *ret; 700 struct hlist_head *chain; 701 702 *err = 0; 703 write_lock_bh(&net->xfrm.xfrm_policy_lock); 704 chain = policy_hash_bysel(net, sel, sel->family, dir); 705 ret = NULL; 706 hlist_for_each_entry(pol, chain, bydst) { 707 if (pol->type == type && 708 (mark & pol->mark.m) == pol->mark.v && 709 !selector_cmp(sel, &pol->selector) && 710 xfrm_sec_ctx_match(ctx, pol->security)) { 711 xfrm_pol_hold(pol); 712 if (delete) { 713 *err = security_xfrm_policy_delete( 714 pol->security); 715 if (*err) { 716 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 717 return pol; 718 } 719 __xfrm_policy_unlink(pol, dir); 720 } 721 ret = pol; 722 break; 723 } 724 } 725 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 726 727 if (ret && delete) 728 xfrm_policy_kill(ret); 729 return ret; 730 } 731 EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 732 733 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, 734 int dir, u32 id, int delete, int *err) 735 { 736 struct xfrm_policy *pol, *ret; 737 struct hlist_head *chain; 738 739 *err = -ENOENT; 740 if (xfrm_policy_id2dir(id) != dir) 741 return NULL; 742 743 *err = 0; 744 write_lock_bh(&net->xfrm.xfrm_policy_lock); 745 chain = net->xfrm.policy_byidx + idx_hash(net, id); 746 ret = NULL; 747 hlist_for_each_entry(pol, chain, byidx) { 748 if (pol->type == type && pol->index == id && 749 (mark & pol->mark.m) == pol->mark.v) { 750 xfrm_pol_hold(pol); 751 if (delete) { 752 *err = security_xfrm_policy_delete( 753 pol->security); 754 if (*err) { 755 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 756 return pol; 757 } 758 __xfrm_policy_unlink(pol, dir); 759 } 760 ret = pol; 761 break; 762 } 763 } 764 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 765 766 if (ret && delete) 767 xfrm_policy_kill(ret); 768 return ret; 769 } 770 EXPORT_SYMBOL(xfrm_policy_byid); 771 772 #ifdef CONFIG_SECURITY_NETWORK_XFRM 773 static inline int 774 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 775 { 776 int dir, err = 0; 777 778 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 779 struct xfrm_policy *pol; 780 int i; 781 782 hlist_for_each_entry(pol, 783 &net->xfrm.policy_inexact[dir], bydst) { 784 if (pol->type != type) 785 continue; 786 err = security_xfrm_policy_delete(pol->security); 787 if (err) { 788 xfrm_audit_policy_delete(pol, 0, 789 audit_info->loginuid, 790 audit_info->sessionid, 791 audit_info->secid); 792 return err; 793 } 794 } 795 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 796 hlist_for_each_entry(pol, 797 net->xfrm.policy_bydst[dir].table + i, 798 bydst) { 799 if (pol->type != type) 800 continue; 801 err = security_xfrm_policy_delete( 802 pol->security); 803 if (err) { 804 xfrm_audit_policy_delete(pol, 0, 805 audit_info->loginuid, 806 audit_info->sessionid, 807 audit_info->secid); 808 return err; 809 } 810 } 811 } 812 } 813 return err; 814 } 815 #else 816 static inline int 817 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 818 { 819 return 0; 820 } 821 #endif 822 823 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) 824 { 825 int dir, err = 0, cnt = 0; 826 827 write_lock_bh(&net->xfrm.xfrm_policy_lock); 828 829 err = xfrm_policy_flush_secctx_check(net, type, audit_info); 830 if (err) 831 goto out; 832 833 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 834 struct xfrm_policy *pol; 835 int i; 836 837 again1: 838 hlist_for_each_entry(pol, 839 &net->xfrm.policy_inexact[dir], bydst) { 840 if (pol->type != type) 841 continue; 842 __xfrm_policy_unlink(pol, dir); 843 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 844 cnt++; 845 846 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, 847 audit_info->sessionid, 848 audit_info->secid); 849 850 xfrm_policy_kill(pol); 851 852 write_lock_bh(&net->xfrm.xfrm_policy_lock); 853 goto again1; 854 } 855 856 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 857 again2: 858 hlist_for_each_entry(pol, 859 net->xfrm.policy_bydst[dir].table + i, 860 bydst) { 861 if (pol->type != type) 862 continue; 863 __xfrm_policy_unlink(pol, dir); 864 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 865 cnt++; 866 867 xfrm_audit_policy_delete(pol, 1, 868 audit_info->loginuid, 869 audit_info->sessionid, 870 audit_info->secid); 871 xfrm_policy_kill(pol); 872 873 write_lock_bh(&net->xfrm.xfrm_policy_lock); 874 goto again2; 875 } 876 } 877 878 } 879 if (!cnt) 880 err = -ESRCH; 881 out: 882 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 883 return err; 884 } 885 EXPORT_SYMBOL(xfrm_policy_flush); 886 887 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, 888 int (*func)(struct xfrm_policy *, int, int, void*), 889 void *data) 890 { 891 struct xfrm_policy *pol; 892 struct xfrm_policy_walk_entry *x; 893 int error = 0; 894 895 if (walk->type >= XFRM_POLICY_TYPE_MAX && 896 walk->type != XFRM_POLICY_TYPE_ANY) 897 return -EINVAL; 898 899 if (list_empty(&walk->walk.all) && walk->seq != 0) 900 return 0; 901 902 write_lock_bh(&net->xfrm.xfrm_policy_lock); 903 if (list_empty(&walk->walk.all)) 904 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); 905 else 906 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); 907 list_for_each_entry_from(x, &net->xfrm.policy_all, all) { 908 if (x->dead) 909 continue; 910 pol = container_of(x, struct xfrm_policy, walk); 911 if (walk->type != XFRM_POLICY_TYPE_ANY && 912 walk->type != pol->type) 913 continue; 914 error = func(pol, xfrm_policy_id2dir(pol->index), 915 walk->seq, data); 916 if (error) { 917 list_move_tail(&walk->walk.all, &x->all); 918 goto out; 919 } 920 walk->seq++; 921 } 922 if (walk->seq == 0) { 923 error = -ENOENT; 924 goto out; 925 } 926 list_del_init(&walk->walk.all); 927 out: 928 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 929 return error; 930 } 931 EXPORT_SYMBOL(xfrm_policy_walk); 932 933 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) 934 { 935 INIT_LIST_HEAD(&walk->walk.all); 936 walk->walk.dead = 1; 937 walk->type = type; 938 walk->seq = 0; 939 } 940 EXPORT_SYMBOL(xfrm_policy_walk_init); 941 942 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) 943 { 944 if (list_empty(&walk->walk.all)) 945 return; 946 947 write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ 948 list_del(&walk->walk.all); 949 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 950 } 951 EXPORT_SYMBOL(xfrm_policy_walk_done); 952 953 /* 954 * Find policy to apply to this flow. 955 * 956 * Returns 0 if policy found, else an -errno. 957 */ 958 static int xfrm_policy_match(const struct xfrm_policy *pol, 959 const struct flowi *fl, 960 u8 type, u16 family, int dir) 961 { 962 const struct xfrm_selector *sel = &pol->selector; 963 int ret = -ESRCH; 964 bool match; 965 966 if (pol->family != family || 967 (fl->flowi_mark & pol->mark.m) != pol->mark.v || 968 pol->type != type) 969 return ret; 970 971 match = xfrm_selector_match(sel, fl, family); 972 if (match) 973 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, 974 dir); 975 976 return ret; 977 } 978 979 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, 980 const struct flowi *fl, 981 u16 family, u8 dir) 982 { 983 int err; 984 struct xfrm_policy *pol, *ret; 985 const xfrm_address_t *daddr, *saddr; 986 struct hlist_head *chain; 987 u32 priority = ~0U; 988 989 daddr = xfrm_flowi_daddr(fl, family); 990 saddr = xfrm_flowi_saddr(fl, family); 991 if (unlikely(!daddr || !saddr)) 992 return NULL; 993 994 read_lock_bh(&net->xfrm.xfrm_policy_lock); 995 chain = policy_hash_direct(net, daddr, saddr, family, dir); 996 ret = NULL; 997 hlist_for_each_entry(pol, chain, bydst) { 998 err = xfrm_policy_match(pol, fl, type, family, dir); 999 if (err) { 1000 if (err == -ESRCH) 1001 continue; 1002 else { 1003 ret = ERR_PTR(err); 1004 goto fail; 1005 } 1006 } else { 1007 ret = pol; 1008 priority = ret->priority; 1009 break; 1010 } 1011 } 1012 chain = &net->xfrm.policy_inexact[dir]; 1013 hlist_for_each_entry(pol, chain, bydst) { 1014 err = xfrm_policy_match(pol, fl, type, family, dir); 1015 if (err) { 1016 if (err == -ESRCH) 1017 continue; 1018 else { 1019 ret = ERR_PTR(err); 1020 goto fail; 1021 } 1022 } else if (pol->priority < priority) { 1023 ret = pol; 1024 break; 1025 } 1026 } 1027 if (ret) 1028 xfrm_pol_hold(ret); 1029 fail: 1030 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1031 1032 return ret; 1033 } 1034 1035 static struct xfrm_policy * 1036 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) 1037 { 1038 #ifdef CONFIG_XFRM_SUB_POLICY 1039 struct xfrm_policy *pol; 1040 1041 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 1042 if (pol != NULL) 1043 return pol; 1044 #endif 1045 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1046 } 1047 1048 static int flow_to_policy_dir(int dir) 1049 { 1050 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1051 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1052 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1053 return dir; 1054 1055 switch (dir) { 1056 default: 1057 case FLOW_DIR_IN: 1058 return XFRM_POLICY_IN; 1059 case FLOW_DIR_OUT: 1060 return XFRM_POLICY_OUT; 1061 case FLOW_DIR_FWD: 1062 return XFRM_POLICY_FWD; 1063 } 1064 } 1065 1066 static struct flow_cache_object * 1067 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, 1068 u8 dir, struct flow_cache_object *old_obj, void *ctx) 1069 { 1070 struct xfrm_policy *pol; 1071 1072 if (old_obj) 1073 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 1074 1075 pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); 1076 if (IS_ERR_OR_NULL(pol)) 1077 return ERR_CAST(pol); 1078 1079 /* Resolver returns two references: 1080 * one for cache and one for caller of flow_cache_lookup() */ 1081 xfrm_pol_hold(pol); 1082 1083 return &pol->flo; 1084 } 1085 1086 static inline int policy_to_flow_dir(int dir) 1087 { 1088 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1089 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1090 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1091 return dir; 1092 switch (dir) { 1093 default: 1094 case XFRM_POLICY_IN: 1095 return FLOW_DIR_IN; 1096 case XFRM_POLICY_OUT: 1097 return FLOW_DIR_OUT; 1098 case XFRM_POLICY_FWD: 1099 return FLOW_DIR_FWD; 1100 } 1101 } 1102 1103 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, 1104 const struct flowi *fl) 1105 { 1106 struct xfrm_policy *pol; 1107 struct net *net = sock_net(sk); 1108 1109 read_lock_bh(&net->xfrm.xfrm_policy_lock); 1110 if ((pol = sk->sk_policy[dir]) != NULL) { 1111 bool match = xfrm_selector_match(&pol->selector, fl, 1112 sk->sk_family); 1113 int err = 0; 1114 1115 if (match) { 1116 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1117 pol = NULL; 1118 goto out; 1119 } 1120 err = security_xfrm_policy_lookup(pol->security, 1121 fl->flowi_secid, 1122 policy_to_flow_dir(dir)); 1123 if (!err) 1124 xfrm_pol_hold(pol); 1125 else if (err == -ESRCH) 1126 pol = NULL; 1127 else 1128 pol = ERR_PTR(err); 1129 } else 1130 pol = NULL; 1131 } 1132 out: 1133 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1134 return pol; 1135 } 1136 1137 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1138 { 1139 struct net *net = xp_net(pol); 1140 struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, 1141 pol->family, dir); 1142 1143 list_add(&pol->walk.all, &net->xfrm.policy_all); 1144 hlist_add_head(&pol->bydst, chain); 1145 hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); 1146 net->xfrm.policy_count[dir]++; 1147 xfrm_pol_hold(pol); 1148 1149 if (xfrm_bydst_should_resize(net, dir, NULL)) 1150 schedule_work(&net->xfrm.policy_hash_work); 1151 } 1152 1153 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1154 int dir) 1155 { 1156 struct net *net = xp_net(pol); 1157 1158 if (hlist_unhashed(&pol->bydst)) 1159 return NULL; 1160 1161 hlist_del_init(&pol->bydst); 1162 hlist_del(&pol->byidx); 1163 list_del(&pol->walk.all); 1164 net->xfrm.policy_count[dir]--; 1165 1166 return pol; 1167 } 1168 1169 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1170 { 1171 struct net *net = xp_net(pol); 1172 1173 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1174 pol = __xfrm_policy_unlink(pol, dir); 1175 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1176 if (pol) { 1177 xfrm_policy_kill(pol); 1178 return 0; 1179 } 1180 return -ENOENT; 1181 } 1182 EXPORT_SYMBOL(xfrm_policy_delete); 1183 1184 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1185 { 1186 struct net *net = xp_net(pol); 1187 struct xfrm_policy *old_pol; 1188 1189 #ifdef CONFIG_XFRM_SUB_POLICY 1190 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) 1191 return -EINVAL; 1192 #endif 1193 1194 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1195 old_pol = sk->sk_policy[dir]; 1196 sk->sk_policy[dir] = pol; 1197 if (pol) { 1198 pol->curlft.add_time = get_seconds(); 1199 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); 1200 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1201 } 1202 if (old_pol) { 1203 if (pol) 1204 xfrm_policy_requeue(old_pol, pol); 1205 1206 /* Unlinking succeeds always. This is the only function 1207 * allowed to delete or replace socket policy. 1208 */ 1209 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1210 } 1211 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1212 1213 if (old_pol) { 1214 xfrm_policy_kill(old_pol); 1215 } 1216 return 0; 1217 } 1218 1219 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) 1220 { 1221 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); 1222 struct net *net = xp_net(old); 1223 1224 if (newp) { 1225 newp->selector = old->selector; 1226 if (security_xfrm_policy_clone(old->security, 1227 &newp->security)) { 1228 kfree(newp); 1229 return NULL; /* ENOMEM */ 1230 } 1231 newp->lft = old->lft; 1232 newp->curlft = old->curlft; 1233 newp->mark = old->mark; 1234 newp->action = old->action; 1235 newp->flags = old->flags; 1236 newp->xfrm_nr = old->xfrm_nr; 1237 newp->index = old->index; 1238 newp->type = old->type; 1239 memcpy(newp->xfrm_vec, old->xfrm_vec, 1240 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1241 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1242 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 1243 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1244 xfrm_pol_put(newp); 1245 } 1246 return newp; 1247 } 1248 1249 int __xfrm_sk_clone_policy(struct sock *sk) 1250 { 1251 struct xfrm_policy *p0 = sk->sk_policy[0], 1252 *p1 = sk->sk_policy[1]; 1253 1254 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 1255 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 1256 return -ENOMEM; 1257 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 1258 return -ENOMEM; 1259 return 0; 1260 } 1261 1262 static int 1263 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, 1264 unsigned short family) 1265 { 1266 int err; 1267 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1268 1269 if (unlikely(afinfo == NULL)) 1270 return -EINVAL; 1271 err = afinfo->get_saddr(net, local, remote); 1272 xfrm_policy_put_afinfo(afinfo); 1273 return err; 1274 } 1275 1276 /* Resolve list of templates for the flow, given policy. */ 1277 1278 static int 1279 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, 1280 struct xfrm_state **xfrm, unsigned short family) 1281 { 1282 struct net *net = xp_net(policy); 1283 int nx; 1284 int i, error; 1285 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1286 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1287 xfrm_address_t tmp; 1288 1289 for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { 1290 struct xfrm_state *x; 1291 xfrm_address_t *remote = daddr; 1292 xfrm_address_t *local = saddr; 1293 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1294 1295 if (tmpl->mode == XFRM_MODE_TUNNEL || 1296 tmpl->mode == XFRM_MODE_BEET) { 1297 remote = &tmpl->id.daddr; 1298 local = &tmpl->saddr; 1299 if (xfrm_addr_any(local, tmpl->encap_family)) { 1300 error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); 1301 if (error) 1302 goto fail; 1303 local = &tmp; 1304 } 1305 } 1306 1307 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1308 1309 if (x && x->km.state == XFRM_STATE_VALID) { 1310 xfrm[nx++] = x; 1311 daddr = remote; 1312 saddr = local; 1313 continue; 1314 } 1315 if (x) { 1316 error = (x->km.state == XFRM_STATE_ERROR ? 1317 -EINVAL : -EAGAIN); 1318 xfrm_state_put(x); 1319 } else if (error == -ESRCH) { 1320 error = -EAGAIN; 1321 } 1322 1323 if (!tmpl->optional) 1324 goto fail; 1325 } 1326 return nx; 1327 1328 fail: 1329 for (nx--; nx >= 0; nx--) 1330 xfrm_state_put(xfrm[nx]); 1331 return error; 1332 } 1333 1334 static int 1335 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, 1336 struct xfrm_state **xfrm, unsigned short family) 1337 { 1338 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1339 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1340 int cnx = 0; 1341 int error; 1342 int ret; 1343 int i; 1344 1345 for (i = 0; i < npols; i++) { 1346 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { 1347 error = -ENOBUFS; 1348 goto fail; 1349 } 1350 1351 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); 1352 if (ret < 0) { 1353 error = ret; 1354 goto fail; 1355 } else 1356 cnx += ret; 1357 } 1358 1359 /* found states are sorted for outbound processing */ 1360 if (npols > 1) 1361 xfrm_state_sort(xfrm, tpp, cnx, family); 1362 1363 return cnx; 1364 1365 fail: 1366 for (cnx--; cnx >= 0; cnx--) 1367 xfrm_state_put(tpp[cnx]); 1368 return error; 1369 1370 } 1371 1372 /* Check that the bundle accepts the flow and its components are 1373 * still valid. 1374 */ 1375 1376 static inline int xfrm_get_tos(const struct flowi *fl, int family) 1377 { 1378 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1379 int tos; 1380 1381 if (!afinfo) 1382 return -EINVAL; 1383 1384 tos = afinfo->get_tos(fl); 1385 1386 xfrm_policy_put_afinfo(afinfo); 1387 1388 return tos; 1389 } 1390 1391 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) 1392 { 1393 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1394 struct dst_entry *dst = &xdst->u.dst; 1395 1396 if (xdst->route == NULL) { 1397 /* Dummy bundle - if it has xfrms we were not 1398 * able to build bundle as template resolution failed. 1399 * It means we need to try again resolving. */ 1400 if (xdst->num_xfrms > 0) 1401 return NULL; 1402 } else if (dst->flags & DST_XFRM_QUEUE) { 1403 return NULL; 1404 } else { 1405 /* Real bundle */ 1406 if (stale_bundle(dst)) 1407 return NULL; 1408 } 1409 1410 dst_hold(dst); 1411 return flo; 1412 } 1413 1414 static int xfrm_bundle_flo_check(struct flow_cache_object *flo) 1415 { 1416 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1417 struct dst_entry *dst = &xdst->u.dst; 1418 1419 if (!xdst->route) 1420 return 0; 1421 if (stale_bundle(dst)) 1422 return 0; 1423 1424 return 1; 1425 } 1426 1427 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) 1428 { 1429 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1430 struct dst_entry *dst = &xdst->u.dst; 1431 1432 dst_free(dst); 1433 } 1434 1435 static const struct flow_cache_ops xfrm_bundle_fc_ops = { 1436 .get = xfrm_bundle_flo_get, 1437 .check = xfrm_bundle_flo_check, 1438 .delete = xfrm_bundle_flo_delete, 1439 }; 1440 1441 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) 1442 { 1443 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1444 struct dst_ops *dst_ops; 1445 struct xfrm_dst *xdst; 1446 1447 if (!afinfo) 1448 return ERR_PTR(-EINVAL); 1449 1450 switch (family) { 1451 case AF_INET: 1452 dst_ops = &net->xfrm.xfrm4_dst_ops; 1453 break; 1454 #if IS_ENABLED(CONFIG_IPV6) 1455 case AF_INET6: 1456 dst_ops = &net->xfrm.xfrm6_dst_ops; 1457 break; 1458 #endif 1459 default: 1460 BUG(); 1461 } 1462 xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); 1463 1464 if (likely(xdst)) { 1465 struct dst_entry *dst = &xdst->u.dst; 1466 1467 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); 1468 xdst->flo.ops = &xfrm_bundle_fc_ops; 1469 if (afinfo->init_dst) 1470 afinfo->init_dst(net, xdst); 1471 } else 1472 xdst = ERR_PTR(-ENOBUFS); 1473 1474 xfrm_policy_put_afinfo(afinfo); 1475 1476 return xdst; 1477 } 1478 1479 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, 1480 int nfheader_len) 1481 { 1482 struct xfrm_policy_afinfo *afinfo = 1483 xfrm_policy_get_afinfo(dst->ops->family); 1484 int err; 1485 1486 if (!afinfo) 1487 return -EINVAL; 1488 1489 err = afinfo->init_path(path, dst, nfheader_len); 1490 1491 xfrm_policy_put_afinfo(afinfo); 1492 1493 return err; 1494 } 1495 1496 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 1497 const struct flowi *fl) 1498 { 1499 struct xfrm_policy_afinfo *afinfo = 1500 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1501 int err; 1502 1503 if (!afinfo) 1504 return -EINVAL; 1505 1506 err = afinfo->fill_dst(xdst, dev, fl); 1507 1508 xfrm_policy_put_afinfo(afinfo); 1509 1510 return err; 1511 } 1512 1513 1514 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 1515 * all the metrics... Shortly, bundle a bundle. 1516 */ 1517 1518 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1519 struct xfrm_state **xfrm, int nx, 1520 const struct flowi *fl, 1521 struct dst_entry *dst) 1522 { 1523 struct net *net = xp_net(policy); 1524 unsigned long now = jiffies; 1525 struct net_device *dev; 1526 struct xfrm_mode *inner_mode; 1527 struct dst_entry *dst_prev = NULL; 1528 struct dst_entry *dst0 = NULL; 1529 int i = 0; 1530 int err; 1531 int header_len = 0; 1532 int nfheader_len = 0; 1533 int trailer_len = 0; 1534 int tos; 1535 int family = policy->selector.family; 1536 xfrm_address_t saddr, daddr; 1537 1538 xfrm_flowi_addr_get(fl, &saddr, &daddr, family); 1539 1540 tos = xfrm_get_tos(fl, family); 1541 err = tos; 1542 if (tos < 0) 1543 goto put_states; 1544 1545 dst_hold(dst); 1546 1547 for (; i < nx; i++) { 1548 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); 1549 struct dst_entry *dst1 = &xdst->u.dst; 1550 1551 err = PTR_ERR(xdst); 1552 if (IS_ERR(xdst)) { 1553 dst_release(dst); 1554 goto put_states; 1555 } 1556 1557 if (xfrm[i]->sel.family == AF_UNSPEC) { 1558 inner_mode = xfrm_ip2inner_mode(xfrm[i], 1559 xfrm_af2proto(family)); 1560 if (!inner_mode) { 1561 err = -EAFNOSUPPORT; 1562 dst_release(dst); 1563 goto put_states; 1564 } 1565 } else 1566 inner_mode = xfrm[i]->inner_mode; 1567 1568 if (!dst_prev) 1569 dst0 = dst1; 1570 else { 1571 dst_prev->child = dst_clone(dst1); 1572 dst1->flags |= DST_NOHASH; 1573 } 1574 1575 xdst->route = dst; 1576 dst_copy_metrics(dst1, dst); 1577 1578 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 1579 family = xfrm[i]->props.family; 1580 dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, 1581 family); 1582 err = PTR_ERR(dst); 1583 if (IS_ERR(dst)) 1584 goto put_states; 1585 } else 1586 dst_hold(dst); 1587 1588 dst1->xfrm = xfrm[i]; 1589 xdst->xfrm_genid = xfrm[i]->genid; 1590 1591 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1592 dst1->flags |= DST_HOST; 1593 dst1->lastuse = now; 1594 1595 dst1->input = dst_discard; 1596 dst1->output = inner_mode->afinfo->output; 1597 1598 dst1->next = dst_prev; 1599 dst_prev = dst1; 1600 1601 header_len += xfrm[i]->props.header_len; 1602 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1603 nfheader_len += xfrm[i]->props.header_len; 1604 trailer_len += xfrm[i]->props.trailer_len; 1605 } 1606 1607 dst_prev->child = dst; 1608 dst0->path = dst; 1609 1610 err = -ENODEV; 1611 dev = dst->dev; 1612 if (!dev) 1613 goto free_dst; 1614 1615 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1616 xfrm_init_pmtu(dst_prev); 1617 1618 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { 1619 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; 1620 1621 err = xfrm_fill_dst(xdst, dev, fl); 1622 if (err) 1623 goto free_dst; 1624 1625 dst_prev->header_len = header_len; 1626 dst_prev->trailer_len = trailer_len; 1627 header_len -= xdst->u.dst.xfrm->props.header_len; 1628 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1629 } 1630 1631 out: 1632 return dst0; 1633 1634 put_states: 1635 for (; i < nx; i++) 1636 xfrm_state_put(xfrm[i]); 1637 free_dst: 1638 if (dst0) 1639 dst_free(dst0); 1640 dst0 = ERR_PTR(err); 1641 goto out; 1642 } 1643 1644 #ifdef CONFIG_XFRM_SUB_POLICY 1645 static int xfrm_dst_alloc_copy(void **target, const void *src, int size) 1646 { 1647 if (!*target) { 1648 *target = kmalloc(size, GFP_ATOMIC); 1649 if (!*target) 1650 return -ENOMEM; 1651 } 1652 1653 memcpy(*target, src, size); 1654 return 0; 1655 } 1656 #endif 1657 1658 static int xfrm_dst_update_parent(struct dst_entry *dst, 1659 const struct xfrm_selector *sel) 1660 { 1661 #ifdef CONFIG_XFRM_SUB_POLICY 1662 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1663 return xfrm_dst_alloc_copy((void **)&(xdst->partner), 1664 sel, sizeof(*sel)); 1665 #else 1666 return 0; 1667 #endif 1668 } 1669 1670 static int xfrm_dst_update_origin(struct dst_entry *dst, 1671 const struct flowi *fl) 1672 { 1673 #ifdef CONFIG_XFRM_SUB_POLICY 1674 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1675 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); 1676 #else 1677 return 0; 1678 #endif 1679 } 1680 1681 static int xfrm_expand_policies(const struct flowi *fl, u16 family, 1682 struct xfrm_policy **pols, 1683 int *num_pols, int *num_xfrms) 1684 { 1685 int i; 1686 1687 if (*num_pols == 0 || !pols[0]) { 1688 *num_pols = 0; 1689 *num_xfrms = 0; 1690 return 0; 1691 } 1692 if (IS_ERR(pols[0])) 1693 return PTR_ERR(pols[0]); 1694 1695 *num_xfrms = pols[0]->xfrm_nr; 1696 1697 #ifdef CONFIG_XFRM_SUB_POLICY 1698 if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && 1699 pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1700 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), 1701 XFRM_POLICY_TYPE_MAIN, 1702 fl, family, 1703 XFRM_POLICY_OUT); 1704 if (pols[1]) { 1705 if (IS_ERR(pols[1])) { 1706 xfrm_pols_put(pols, *num_pols); 1707 return PTR_ERR(pols[1]); 1708 } 1709 (*num_pols)++; 1710 (*num_xfrms) += pols[1]->xfrm_nr; 1711 } 1712 } 1713 #endif 1714 for (i = 0; i < *num_pols; i++) { 1715 if (pols[i]->action != XFRM_POLICY_ALLOW) { 1716 *num_xfrms = -1; 1717 break; 1718 } 1719 } 1720 1721 return 0; 1722 1723 } 1724 1725 static struct xfrm_dst * 1726 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, 1727 const struct flowi *fl, u16 family, 1728 struct dst_entry *dst_orig) 1729 { 1730 struct net *net = xp_net(pols[0]); 1731 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1732 struct dst_entry *dst; 1733 struct xfrm_dst *xdst; 1734 int err; 1735 1736 /* Try to instantiate a bundle */ 1737 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); 1738 if (err <= 0) { 1739 if (err != 0 && err != -EAGAIN) 1740 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1741 return ERR_PTR(err); 1742 } 1743 1744 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); 1745 if (IS_ERR(dst)) { 1746 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1747 return ERR_CAST(dst); 1748 } 1749 1750 xdst = (struct xfrm_dst *)dst; 1751 xdst->num_xfrms = err; 1752 if (num_pols > 1) 1753 err = xfrm_dst_update_parent(dst, &pols[1]->selector); 1754 else 1755 err = xfrm_dst_update_origin(dst, fl); 1756 if (unlikely(err)) { 1757 dst_free(dst); 1758 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1759 return ERR_PTR(err); 1760 } 1761 1762 xdst->num_pols = num_pols; 1763 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); 1764 xdst->policy_genid = atomic_read(&pols[0]->genid); 1765 1766 return xdst; 1767 } 1768 1769 static void xfrm_policy_queue_process(unsigned long arg) 1770 { 1771 int err = 0; 1772 struct sk_buff *skb; 1773 struct sock *sk; 1774 struct dst_entry *dst; 1775 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1776 struct xfrm_policy_queue *pq = &pol->polq; 1777 struct flowi fl; 1778 struct sk_buff_head list; 1779 1780 spin_lock(&pq->hold_queue.lock); 1781 skb = skb_peek(&pq->hold_queue); 1782 if (!skb) { 1783 spin_unlock(&pq->hold_queue.lock); 1784 goto out; 1785 } 1786 dst = skb_dst(skb); 1787 sk = skb->sk; 1788 xfrm_decode_session(skb, &fl, dst->ops->family); 1789 spin_unlock(&pq->hold_queue.lock); 1790 1791 dst_hold(dst->path); 1792 dst = xfrm_lookup(xp_net(pol), dst->path, &fl, 1793 sk, 0); 1794 if (IS_ERR(dst)) 1795 goto purge_queue; 1796 1797 if (dst->flags & DST_XFRM_QUEUE) { 1798 dst_release(dst); 1799 1800 if (pq->timeout >= XFRM_QUEUE_TMO_MAX) 1801 goto purge_queue; 1802 1803 pq->timeout = pq->timeout << 1; 1804 if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) 1805 xfrm_pol_hold(pol); 1806 goto out; 1807 } 1808 1809 dst_release(dst); 1810 1811 __skb_queue_head_init(&list); 1812 1813 spin_lock(&pq->hold_queue.lock); 1814 pq->timeout = 0; 1815 skb_queue_splice_init(&pq->hold_queue, &list); 1816 spin_unlock(&pq->hold_queue.lock); 1817 1818 while (!skb_queue_empty(&list)) { 1819 skb = __skb_dequeue(&list); 1820 1821 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); 1822 dst_hold(skb_dst(skb)->path); 1823 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, 1824 &fl, skb->sk, 0); 1825 if (IS_ERR(dst)) { 1826 kfree_skb(skb); 1827 continue; 1828 } 1829 1830 nf_reset(skb); 1831 skb_dst_drop(skb); 1832 skb_dst_set(skb, dst); 1833 1834 err = dst_output(skb); 1835 } 1836 1837 out: 1838 xfrm_pol_put(pol); 1839 return; 1840 1841 purge_queue: 1842 pq->timeout = 0; 1843 xfrm_queue_purge(&pq->hold_queue); 1844 xfrm_pol_put(pol); 1845 } 1846 1847 static int xdst_queue_output(struct sk_buff *skb) 1848 { 1849 unsigned long sched_next; 1850 struct dst_entry *dst = skb_dst(skb); 1851 struct xfrm_dst *xdst = (struct xfrm_dst *) dst; 1852 struct xfrm_policy *pol = xdst->pols[0]; 1853 struct xfrm_policy_queue *pq = &pol->polq; 1854 const struct sk_buff *fclone = skb + 1; 1855 1856 if (unlikely(skb->fclone == SKB_FCLONE_ORIG && 1857 fclone->fclone == SKB_FCLONE_CLONE)) { 1858 kfree_skb(skb); 1859 return 0; 1860 } 1861 1862 if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { 1863 kfree_skb(skb); 1864 return -EAGAIN; 1865 } 1866 1867 skb_dst_force(skb); 1868 1869 spin_lock_bh(&pq->hold_queue.lock); 1870 1871 if (!pq->timeout) 1872 pq->timeout = XFRM_QUEUE_TMO_MIN; 1873 1874 sched_next = jiffies + pq->timeout; 1875 1876 if (del_timer(&pq->hold_timer)) { 1877 if (time_before(pq->hold_timer.expires, sched_next)) 1878 sched_next = pq->hold_timer.expires; 1879 xfrm_pol_put(pol); 1880 } 1881 1882 __skb_queue_tail(&pq->hold_queue, skb); 1883 if (!mod_timer(&pq->hold_timer, sched_next)) 1884 xfrm_pol_hold(pol); 1885 1886 spin_unlock_bh(&pq->hold_queue.lock); 1887 1888 return 0; 1889 } 1890 1891 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, 1892 struct dst_entry *dst, 1893 const struct flowi *fl, 1894 int num_xfrms, 1895 u16 family) 1896 { 1897 int err; 1898 struct net_device *dev; 1899 struct dst_entry *dst1; 1900 struct xfrm_dst *xdst; 1901 1902 xdst = xfrm_alloc_dst(net, family); 1903 if (IS_ERR(xdst)) 1904 return xdst; 1905 1906 if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) 1907 return xdst; 1908 1909 dst1 = &xdst->u.dst; 1910 dst_hold(dst); 1911 xdst->route = dst; 1912 1913 dst_copy_metrics(dst1, dst); 1914 1915 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1916 dst1->flags |= DST_HOST | DST_XFRM_QUEUE; 1917 dst1->lastuse = jiffies; 1918 1919 dst1->input = dst_discard; 1920 dst1->output = xdst_queue_output; 1921 1922 dst_hold(dst); 1923 dst1->child = dst; 1924 dst1->path = dst; 1925 1926 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); 1927 1928 err = -ENODEV; 1929 dev = dst->dev; 1930 if (!dev) 1931 goto free_dst; 1932 1933 err = xfrm_fill_dst(xdst, dev, fl); 1934 if (err) 1935 goto free_dst; 1936 1937 out: 1938 return xdst; 1939 1940 free_dst: 1941 dst_release(dst1); 1942 xdst = ERR_PTR(err); 1943 goto out; 1944 } 1945 1946 static struct flow_cache_object * 1947 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, 1948 struct flow_cache_object *oldflo, void *ctx) 1949 { 1950 struct dst_entry *dst_orig = (struct dst_entry *)ctx; 1951 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1952 struct xfrm_dst *xdst, *new_xdst; 1953 int num_pols = 0, num_xfrms = 0, i, err, pol_dead; 1954 1955 /* Check if the policies from old bundle are usable */ 1956 xdst = NULL; 1957 if (oldflo) { 1958 xdst = container_of(oldflo, struct xfrm_dst, flo); 1959 num_pols = xdst->num_pols; 1960 num_xfrms = xdst->num_xfrms; 1961 pol_dead = 0; 1962 for (i = 0; i < num_pols; i++) { 1963 pols[i] = xdst->pols[i]; 1964 pol_dead |= pols[i]->walk.dead; 1965 } 1966 if (pol_dead) { 1967 dst_free(&xdst->u.dst); 1968 xdst = NULL; 1969 num_pols = 0; 1970 num_xfrms = 0; 1971 oldflo = NULL; 1972 } 1973 } 1974 1975 /* Resolve policies to use if we couldn't get them from 1976 * previous cache entry */ 1977 if (xdst == NULL) { 1978 num_pols = 1; 1979 pols[0] = __xfrm_policy_lookup(net, fl, family, 1980 flow_to_policy_dir(dir)); 1981 err = xfrm_expand_policies(fl, family, pols, 1982 &num_pols, &num_xfrms); 1983 if (err < 0) 1984 goto inc_error; 1985 if (num_pols == 0) 1986 return NULL; 1987 if (num_xfrms <= 0) 1988 goto make_dummy_bundle; 1989 } 1990 1991 new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); 1992 if (IS_ERR(new_xdst)) { 1993 err = PTR_ERR(new_xdst); 1994 if (err != -EAGAIN) 1995 goto error; 1996 if (oldflo == NULL) 1997 goto make_dummy_bundle; 1998 dst_hold(&xdst->u.dst); 1999 return oldflo; 2000 } else if (new_xdst == NULL) { 2001 num_xfrms = 0; 2002 if (oldflo == NULL) 2003 goto make_dummy_bundle; 2004 xdst->num_xfrms = 0; 2005 dst_hold(&xdst->u.dst); 2006 return oldflo; 2007 } 2008 2009 /* Kill the previous bundle */ 2010 if (xdst) { 2011 /* The policies were stolen for newly generated bundle */ 2012 xdst->num_pols = 0; 2013 dst_free(&xdst->u.dst); 2014 } 2015 2016 /* Flow cache does not have reference, it dst_free()'s, 2017 * but we do need to return one reference for original caller */ 2018 dst_hold(&new_xdst->u.dst); 2019 return &new_xdst->flo; 2020 2021 make_dummy_bundle: 2022 /* We found policies, but there's no bundles to instantiate: 2023 * either because the policy blocks, has no transformations or 2024 * we could not build template (no xfrm_states).*/ 2025 xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); 2026 if (IS_ERR(xdst)) { 2027 xfrm_pols_put(pols, num_pols); 2028 return ERR_CAST(xdst); 2029 } 2030 xdst->num_pols = num_pols; 2031 xdst->num_xfrms = num_xfrms; 2032 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); 2033 2034 dst_hold(&xdst->u.dst); 2035 return &xdst->flo; 2036 2037 inc_error: 2038 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 2039 error: 2040 if (xdst != NULL) 2041 dst_free(&xdst->u.dst); 2042 else 2043 xfrm_pols_put(pols, num_pols); 2044 return ERR_PTR(err); 2045 } 2046 2047 static struct dst_entry *make_blackhole(struct net *net, u16 family, 2048 struct dst_entry *dst_orig) 2049 { 2050 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2051 struct dst_entry *ret; 2052 2053 if (!afinfo) { 2054 dst_release(dst_orig); 2055 return ERR_PTR(-EINVAL); 2056 } else { 2057 ret = afinfo->blackhole_route(net, dst_orig); 2058 } 2059 xfrm_policy_put_afinfo(afinfo); 2060 2061 return ret; 2062 } 2063 2064 /* Main function: finds/creates a bundle for given flow. 2065 * 2066 * At the moment we eat a raw IP route. Mostly to speed up lookups 2067 * on interfaces with disabled IPsec. 2068 */ 2069 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 2070 const struct flowi *fl, 2071 struct sock *sk, int flags) 2072 { 2073 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2074 struct flow_cache_object *flo; 2075 struct xfrm_dst *xdst; 2076 struct dst_entry *dst, *route; 2077 u16 family = dst_orig->ops->family; 2078 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 2079 int i, err, num_pols, num_xfrms = 0, drop_pols = 0; 2080 2081 dst = NULL; 2082 xdst = NULL; 2083 route = NULL; 2084 2085 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 2086 num_pols = 1; 2087 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 2088 err = xfrm_expand_policies(fl, family, pols, 2089 &num_pols, &num_xfrms); 2090 if (err < 0) 2091 goto dropdst; 2092 2093 if (num_pols) { 2094 if (num_xfrms <= 0) { 2095 drop_pols = num_pols; 2096 goto no_transform; 2097 } 2098 2099 xdst = xfrm_resolve_and_create_bundle( 2100 pols, num_pols, fl, 2101 family, dst_orig); 2102 if (IS_ERR(xdst)) { 2103 xfrm_pols_put(pols, num_pols); 2104 err = PTR_ERR(xdst); 2105 goto dropdst; 2106 } else if (xdst == NULL) { 2107 num_xfrms = 0; 2108 drop_pols = num_pols; 2109 goto no_transform; 2110 } 2111 2112 dst_hold(&xdst->u.dst); 2113 2114 spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); 2115 xdst->u.dst.next = xfrm_policy_sk_bundles; 2116 xfrm_policy_sk_bundles = &xdst->u.dst; 2117 spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); 2118 2119 route = xdst->route; 2120 } 2121 } 2122 2123 if (xdst == NULL) { 2124 /* To accelerate a bit... */ 2125 if ((dst_orig->flags & DST_NOXFRM) || 2126 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 2127 goto nopol; 2128 2129 flo = flow_cache_lookup(net, fl, family, dir, 2130 xfrm_bundle_lookup, dst_orig); 2131 if (flo == NULL) 2132 goto nopol; 2133 if (IS_ERR(flo)) { 2134 err = PTR_ERR(flo); 2135 goto dropdst; 2136 } 2137 xdst = container_of(flo, struct xfrm_dst, flo); 2138 2139 num_pols = xdst->num_pols; 2140 num_xfrms = xdst->num_xfrms; 2141 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); 2142 route = xdst->route; 2143 } 2144 2145 dst = &xdst->u.dst; 2146 if (route == NULL && num_xfrms > 0) { 2147 /* The only case when xfrm_bundle_lookup() returns a 2148 * bundle with null route, is when the template could 2149 * not be resolved. It means policies are there, but 2150 * bundle could not be created, since we don't yet 2151 * have the xfrm_state's. We need to wait for KM to 2152 * negotiate new SA's or bail out with error.*/ 2153 if (net->xfrm.sysctl_larval_drop) { 2154 dst_release(dst); 2155 xfrm_pols_put(pols, drop_pols); 2156 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2157 2158 return make_blackhole(net, family, dst_orig); 2159 } 2160 2161 err = -EAGAIN; 2162 2163 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2164 goto error; 2165 } 2166 2167 no_transform: 2168 if (num_pols == 0) 2169 goto nopol; 2170 2171 if ((flags & XFRM_LOOKUP_ICMP) && 2172 !(pols[0]->flags & XFRM_POLICY_ICMP)) { 2173 err = -ENOENT; 2174 goto error; 2175 } 2176 2177 for (i = 0; i < num_pols; i++) 2178 pols[i]->curlft.use_time = get_seconds(); 2179 2180 if (num_xfrms < 0) { 2181 /* Prohibit the flow */ 2182 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); 2183 err = -EPERM; 2184 goto error; 2185 } else if (num_xfrms > 0) { 2186 /* Flow transformed */ 2187 dst_release(dst_orig); 2188 } else { 2189 /* Flow passes untransformed */ 2190 dst_release(dst); 2191 dst = dst_orig; 2192 } 2193 ok: 2194 xfrm_pols_put(pols, drop_pols); 2195 if (dst && dst->xfrm && 2196 dst->xfrm->props.mode == XFRM_MODE_TUNNEL) 2197 dst->flags |= DST_XFRM_TUNNEL; 2198 return dst; 2199 2200 nopol: 2201 if (!(flags & XFRM_LOOKUP_ICMP)) { 2202 dst = dst_orig; 2203 goto ok; 2204 } 2205 err = -ENOENT; 2206 error: 2207 dst_release(dst); 2208 dropdst: 2209 dst_release(dst_orig); 2210 xfrm_pols_put(pols, drop_pols); 2211 return ERR_PTR(err); 2212 } 2213 EXPORT_SYMBOL(xfrm_lookup); 2214 2215 static inline int 2216 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) 2217 { 2218 struct xfrm_state *x; 2219 2220 if (!skb->sp || idx < 0 || idx >= skb->sp->len) 2221 return 0; 2222 x = skb->sp->xvec[idx]; 2223 if (!x->type->reject) 2224 return 0; 2225 return x->type->reject(x, skb, fl); 2226 } 2227 2228 /* When skb is transformed back to its "native" form, we have to 2229 * check policy restrictions. At the moment we make this in maximally 2230 * stupid way. Shame on me. :-) Of course, connected sockets must 2231 * have policy cached at them. 2232 */ 2233 2234 static inline int 2235 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, 2236 unsigned short family) 2237 { 2238 if (xfrm_state_kern(x)) 2239 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); 2240 return x->id.proto == tmpl->id.proto && 2241 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 2242 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 2243 x->props.mode == tmpl->mode && 2244 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || 2245 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && 2246 !(x->props.mode != XFRM_MODE_TRANSPORT && 2247 xfrm_state_addr_cmp(tmpl, x, family)); 2248 } 2249 2250 /* 2251 * 0 or more than 0 is returned when validation is succeeded (either bypass 2252 * because of optional transport mode, or next index of the mathced secpath 2253 * state with the template. 2254 * -1 is returned when no matching template is found. 2255 * Otherwise "-2 - errored_index" is returned. 2256 */ 2257 static inline int 2258 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, 2259 unsigned short family) 2260 { 2261 int idx = start; 2262 2263 if (tmpl->optional) { 2264 if (tmpl->mode == XFRM_MODE_TRANSPORT) 2265 return start; 2266 } else 2267 start = -1; 2268 for (; idx < sp->len; idx++) { 2269 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 2270 return ++idx; 2271 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { 2272 if (start == -1) 2273 start = -2-idx; 2274 break; 2275 } 2276 } 2277 return start; 2278 } 2279 2280 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, 2281 unsigned int family, int reverse) 2282 { 2283 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2284 int err; 2285 2286 if (unlikely(afinfo == NULL)) 2287 return -EAFNOSUPPORT; 2288 2289 afinfo->decode_session(skb, fl, reverse); 2290 err = security_xfrm_decode_session(skb, &fl->flowi_secid); 2291 xfrm_policy_put_afinfo(afinfo); 2292 return err; 2293 } 2294 EXPORT_SYMBOL(__xfrm_decode_session); 2295 2296 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) 2297 { 2298 for (; k < sp->len; k++) { 2299 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 2300 *idxp = k; 2301 return 1; 2302 } 2303 } 2304 2305 return 0; 2306 } 2307 2308 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 2309 unsigned short family) 2310 { 2311 struct net *net = dev_net(skb->dev); 2312 struct xfrm_policy *pol; 2313 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2314 int npols = 0; 2315 int xfrm_nr; 2316 int pi; 2317 int reverse; 2318 struct flowi fl; 2319 u8 fl_dir; 2320 int xerr_idx = -1; 2321 2322 reverse = dir & ~XFRM_POLICY_MASK; 2323 dir &= XFRM_POLICY_MASK; 2324 fl_dir = policy_to_flow_dir(dir); 2325 2326 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { 2327 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 2328 return 0; 2329 } 2330 2331 nf_nat_decode_session(skb, &fl, family); 2332 2333 /* First, check used SA against their selectors. */ 2334 if (skb->sp) { 2335 int i; 2336 2337 for (i = skb->sp->len-1; i >= 0; i--) { 2338 struct xfrm_state *x = skb->sp->xvec[i]; 2339 if (!xfrm_selector_match(&x->sel, &fl, family)) { 2340 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 2341 return 0; 2342 } 2343 } 2344 } 2345 2346 pol = NULL; 2347 if (sk && sk->sk_policy[dir]) { 2348 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 2349 if (IS_ERR(pol)) { 2350 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2351 return 0; 2352 } 2353 } 2354 2355 if (!pol) { 2356 struct flow_cache_object *flo; 2357 2358 flo = flow_cache_lookup(net, &fl, family, fl_dir, 2359 xfrm_policy_lookup, NULL); 2360 if (IS_ERR_OR_NULL(flo)) 2361 pol = ERR_CAST(flo); 2362 else 2363 pol = container_of(flo, struct xfrm_policy, flo); 2364 } 2365 2366 if (IS_ERR(pol)) { 2367 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2368 return 0; 2369 } 2370 2371 if (!pol) { 2372 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 2373 xfrm_secpath_reject(xerr_idx, skb, &fl); 2374 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); 2375 return 0; 2376 } 2377 return 1; 2378 } 2379 2380 pol->curlft.use_time = get_seconds(); 2381 2382 pols[0] = pol; 2383 npols++; 2384 #ifdef CONFIG_XFRM_SUB_POLICY 2385 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 2386 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, 2387 &fl, family, 2388 XFRM_POLICY_IN); 2389 if (pols[1]) { 2390 if (IS_ERR(pols[1])) { 2391 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2392 return 0; 2393 } 2394 pols[1]->curlft.use_time = get_seconds(); 2395 npols++; 2396 } 2397 } 2398 #endif 2399 2400 if (pol->action == XFRM_POLICY_ALLOW) { 2401 struct sec_path *sp; 2402 static struct sec_path dummy; 2403 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; 2404 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; 2405 struct xfrm_tmpl **tpp = tp; 2406 int ti = 0; 2407 int i, k; 2408 2409 if ((sp = skb->sp) == NULL) 2410 sp = &dummy; 2411 2412 for (pi = 0; pi < npols; pi++) { 2413 if (pols[pi] != pol && 2414 pols[pi]->action != XFRM_POLICY_ALLOW) { 2415 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2416 goto reject; 2417 } 2418 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { 2419 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 2420 goto reject_error; 2421 } 2422 for (i = 0; i < pols[pi]->xfrm_nr; i++) 2423 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 2424 } 2425 xfrm_nr = ti; 2426 if (npols > 1) { 2427 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); 2428 tpp = stp; 2429 } 2430 2431 /* For each tunnel xfrm, find the first matching tmpl. 2432 * For each tmpl before that, find corresponding xfrm. 2433 * Order is _important_. Later we will implement 2434 * some barriers, but at the moment barriers 2435 * are implied between each two transformations. 2436 */ 2437 for (i = xfrm_nr-1, k = 0; i >= 0; i--) { 2438 k = xfrm_policy_ok(tpp[i], sp, k, family); 2439 if (k < 0) { 2440 if (k < -1) 2441 /* "-2 - errored_index" returned */ 2442 xerr_idx = -(2+k); 2443 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2444 goto reject; 2445 } 2446 } 2447 2448 if (secpath_has_nontransport(sp, k, &xerr_idx)) { 2449 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2450 goto reject; 2451 } 2452 2453 xfrm_pols_put(pols, npols); 2454 return 1; 2455 } 2456 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2457 2458 reject: 2459 xfrm_secpath_reject(xerr_idx, skb, &fl); 2460 reject_error: 2461 xfrm_pols_put(pols, npols); 2462 return 0; 2463 } 2464 EXPORT_SYMBOL(__xfrm_policy_check); 2465 2466 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 2467 { 2468 struct net *net = dev_net(skb->dev); 2469 struct flowi fl; 2470 struct dst_entry *dst; 2471 int res = 1; 2472 2473 if (xfrm_decode_session(skb, &fl, family) < 0) { 2474 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); 2475 return 0; 2476 } 2477 2478 skb_dst_force(skb); 2479 2480 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); 2481 if (IS_ERR(dst)) { 2482 res = 0; 2483 dst = NULL; 2484 } 2485 skb_dst_set(skb, dst); 2486 return res; 2487 } 2488 EXPORT_SYMBOL(__xfrm_route_forward); 2489 2490 /* Optimize later using cookies and generation ids. */ 2491 2492 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 2493 { 2494 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete 2495 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to 2496 * get validated by dst_ops->check on every use. We do this 2497 * because when a normal route referenced by an XFRM dst is 2498 * obsoleted we do not go looking around for all parent 2499 * referencing XFRM dsts so that we can invalidate them. It 2500 * is just too much work. Instead we make the checks here on 2501 * every use. For example: 2502 * 2503 * XFRM dst A --> IPv4 dst X 2504 * 2505 * X is the "xdst->route" of A (X is also the "dst->path" of A 2506 * in this example). If X is marked obsolete, "A" will not 2507 * notice. That's what we are validating here via the 2508 * stale_bundle() check. 2509 * 2510 * When a policy's bundle is pruned, we dst_free() the XFRM 2511 * dst which causes it's ->obsolete field to be set to 2512 * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like 2513 * this, we want to force a new route lookup. 2514 */ 2515 if (dst->obsolete < 0 && !stale_bundle(dst)) 2516 return dst; 2517 2518 return NULL; 2519 } 2520 2521 static int stale_bundle(struct dst_entry *dst) 2522 { 2523 return !xfrm_bundle_ok((struct xfrm_dst *)dst); 2524 } 2525 2526 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2527 { 2528 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2529 dst->dev = dev_net(dev)->loopback_dev; 2530 dev_hold(dst->dev); 2531 dev_put(dev); 2532 } 2533 } 2534 EXPORT_SYMBOL(xfrm_dst_ifdown); 2535 2536 static void xfrm_link_failure(struct sk_buff *skb) 2537 { 2538 /* Impossible. Such dst must be popped before reaches point of failure. */ 2539 } 2540 2541 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2542 { 2543 if (dst) { 2544 if (dst->obsolete) { 2545 dst_release(dst); 2546 dst = NULL; 2547 } 2548 } 2549 return dst; 2550 } 2551 2552 static void __xfrm_garbage_collect(struct net *net) 2553 { 2554 struct dst_entry *head, *next; 2555 2556 spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); 2557 head = xfrm_policy_sk_bundles; 2558 xfrm_policy_sk_bundles = NULL; 2559 spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); 2560 2561 while (head) { 2562 next = head->next; 2563 dst_free(head); 2564 head = next; 2565 } 2566 } 2567 2568 void xfrm_garbage_collect(struct net *net) 2569 { 2570 flow_cache_flush(); 2571 __xfrm_garbage_collect(net); 2572 } 2573 EXPORT_SYMBOL(xfrm_garbage_collect); 2574 2575 static void xfrm_garbage_collect_deferred(struct net *net) 2576 { 2577 flow_cache_flush_deferred(); 2578 __xfrm_garbage_collect(net); 2579 } 2580 2581 static void xfrm_init_pmtu(struct dst_entry *dst) 2582 { 2583 do { 2584 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2585 u32 pmtu, route_mtu_cached; 2586 2587 pmtu = dst_mtu(dst->child); 2588 xdst->child_mtu_cached = pmtu; 2589 2590 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2591 2592 route_mtu_cached = dst_mtu(xdst->route); 2593 xdst->route_mtu_cached = route_mtu_cached; 2594 2595 if (pmtu > route_mtu_cached) 2596 pmtu = route_mtu_cached; 2597 2598 dst_metric_set(dst, RTAX_MTU, pmtu); 2599 } while ((dst = dst->next)); 2600 } 2601 2602 /* Check that the bundle accepts the flow and its components are 2603 * still valid. 2604 */ 2605 2606 static int xfrm_bundle_ok(struct xfrm_dst *first) 2607 { 2608 struct dst_entry *dst = &first->u.dst; 2609 struct xfrm_dst *last; 2610 u32 mtu; 2611 2612 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2613 (dst->dev && !netif_running(dst->dev))) 2614 return 0; 2615 2616 if (dst->flags & DST_XFRM_QUEUE) 2617 return 1; 2618 2619 last = NULL; 2620 2621 do { 2622 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2623 2624 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2625 return 0; 2626 if (xdst->xfrm_genid != dst->xfrm->genid) 2627 return 0; 2628 if (xdst->num_pols > 0 && 2629 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2630 return 0; 2631 2632 mtu = dst_mtu(dst->child); 2633 if (xdst->child_mtu_cached != mtu) { 2634 last = xdst; 2635 xdst->child_mtu_cached = mtu; 2636 } 2637 2638 if (!dst_check(xdst->route, xdst->route_cookie)) 2639 return 0; 2640 mtu = dst_mtu(xdst->route); 2641 if (xdst->route_mtu_cached != mtu) { 2642 last = xdst; 2643 xdst->route_mtu_cached = mtu; 2644 } 2645 2646 dst = dst->child; 2647 } while (dst->xfrm); 2648 2649 if (likely(!last)) 2650 return 1; 2651 2652 mtu = last->child_mtu_cached; 2653 for (;;) { 2654 dst = &last->u.dst; 2655 2656 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2657 if (mtu > last->route_mtu_cached) 2658 mtu = last->route_mtu_cached; 2659 dst_metric_set(dst, RTAX_MTU, mtu); 2660 2661 if (last == first) 2662 break; 2663 2664 last = (struct xfrm_dst *)last->u.dst.next; 2665 last->child_mtu_cached = mtu; 2666 } 2667 2668 return 1; 2669 } 2670 2671 static unsigned int xfrm_default_advmss(const struct dst_entry *dst) 2672 { 2673 return dst_metric_advmss(dst->path); 2674 } 2675 2676 static unsigned int xfrm_mtu(const struct dst_entry *dst) 2677 { 2678 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2679 2680 return mtu ? : dst_mtu(dst->path); 2681 } 2682 2683 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, 2684 struct sk_buff *skb, 2685 const void *daddr) 2686 { 2687 return dst->path->ops->neigh_lookup(dst, skb, daddr); 2688 } 2689 2690 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2691 { 2692 struct net *net; 2693 int err = 0; 2694 if (unlikely(afinfo == NULL)) 2695 return -EINVAL; 2696 if (unlikely(afinfo->family >= NPROTO)) 2697 return -EAFNOSUPPORT; 2698 spin_lock(&xfrm_policy_afinfo_lock); 2699 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2700 err = -ENOBUFS; 2701 else { 2702 struct dst_ops *dst_ops = afinfo->dst_ops; 2703 if (likely(dst_ops->kmem_cachep == NULL)) 2704 dst_ops->kmem_cachep = xfrm_dst_cache; 2705 if (likely(dst_ops->check == NULL)) 2706 dst_ops->check = xfrm_dst_check; 2707 if (likely(dst_ops->default_advmss == NULL)) 2708 dst_ops->default_advmss = xfrm_default_advmss; 2709 if (likely(dst_ops->mtu == NULL)) 2710 dst_ops->mtu = xfrm_mtu; 2711 if (likely(dst_ops->negative_advice == NULL)) 2712 dst_ops->negative_advice = xfrm_negative_advice; 2713 if (likely(dst_ops->link_failure == NULL)) 2714 dst_ops->link_failure = xfrm_link_failure; 2715 if (likely(dst_ops->neigh_lookup == NULL)) 2716 dst_ops->neigh_lookup = xfrm_neigh_lookup; 2717 if (likely(afinfo->garbage_collect == NULL)) 2718 afinfo->garbage_collect = xfrm_garbage_collect_deferred; 2719 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); 2720 } 2721 spin_unlock(&xfrm_policy_afinfo_lock); 2722 2723 rtnl_lock(); 2724 for_each_net(net) { 2725 struct dst_ops *xfrm_dst_ops; 2726 2727 switch (afinfo->family) { 2728 case AF_INET: 2729 xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; 2730 break; 2731 #if IS_ENABLED(CONFIG_IPV6) 2732 case AF_INET6: 2733 xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; 2734 break; 2735 #endif 2736 default: 2737 BUG(); 2738 } 2739 *xfrm_dst_ops = *afinfo->dst_ops; 2740 } 2741 rtnl_unlock(); 2742 2743 return err; 2744 } 2745 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 2746 2747 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 2748 { 2749 int err = 0; 2750 if (unlikely(afinfo == NULL)) 2751 return -EINVAL; 2752 if (unlikely(afinfo->family >= NPROTO)) 2753 return -EAFNOSUPPORT; 2754 spin_lock(&xfrm_policy_afinfo_lock); 2755 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 2756 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 2757 err = -EINVAL; 2758 else 2759 RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], 2760 NULL); 2761 } 2762 spin_unlock(&xfrm_policy_afinfo_lock); 2763 if (!err) { 2764 struct dst_ops *dst_ops = afinfo->dst_ops; 2765 2766 synchronize_rcu(); 2767 2768 dst_ops->kmem_cachep = NULL; 2769 dst_ops->check = NULL; 2770 dst_ops->negative_advice = NULL; 2771 dst_ops->link_failure = NULL; 2772 afinfo->garbage_collect = NULL; 2773 } 2774 return err; 2775 } 2776 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 2777 2778 static void __net_init xfrm_dst_ops_init(struct net *net) 2779 { 2780 struct xfrm_policy_afinfo *afinfo; 2781 2782 rcu_read_lock(); 2783 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); 2784 if (afinfo) 2785 net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; 2786 #if IS_ENABLED(CONFIG_IPV6) 2787 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); 2788 if (afinfo) 2789 net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; 2790 #endif 2791 rcu_read_unlock(); 2792 } 2793 2794 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2795 { 2796 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2797 2798 switch (event) { 2799 case NETDEV_DOWN: 2800 xfrm_garbage_collect(dev_net(dev)); 2801 } 2802 return NOTIFY_DONE; 2803 } 2804 2805 static struct notifier_block xfrm_dev_notifier = { 2806 .notifier_call = xfrm_dev_event, 2807 }; 2808 2809 #ifdef CONFIG_XFRM_STATISTICS 2810 static int __net_init xfrm_statistics_init(struct net *net) 2811 { 2812 int rv; 2813 2814 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, 2815 sizeof(struct linux_xfrm_mib), 2816 __alignof__(struct linux_xfrm_mib)) < 0) 2817 return -ENOMEM; 2818 rv = xfrm_proc_init(net); 2819 if (rv < 0) 2820 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2821 return rv; 2822 } 2823 2824 static void xfrm_statistics_fini(struct net *net) 2825 { 2826 xfrm_proc_fini(net); 2827 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2828 } 2829 #else 2830 static int __net_init xfrm_statistics_init(struct net *net) 2831 { 2832 return 0; 2833 } 2834 2835 static void xfrm_statistics_fini(struct net *net) 2836 { 2837 } 2838 #endif 2839 2840 static int __net_init xfrm_policy_init(struct net *net) 2841 { 2842 unsigned int hmask, sz; 2843 int dir; 2844 2845 if (net_eq(net, &init_net)) 2846 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2847 sizeof(struct xfrm_dst), 2848 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2849 NULL); 2850 2851 hmask = 8 - 1; 2852 sz = (hmask+1) * sizeof(struct hlist_head); 2853 2854 net->xfrm.policy_byidx = xfrm_hash_alloc(sz); 2855 if (!net->xfrm.policy_byidx) 2856 goto out_byidx; 2857 net->xfrm.policy_idx_hmask = hmask; 2858 2859 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2860 struct xfrm_policy_hash *htab; 2861 2862 net->xfrm.policy_count[dir] = 0; 2863 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 2864 2865 htab = &net->xfrm.policy_bydst[dir]; 2866 htab->table = xfrm_hash_alloc(sz); 2867 if (!htab->table) 2868 goto out_bydst; 2869 htab->hmask = hmask; 2870 } 2871 2872 INIT_LIST_HEAD(&net->xfrm.policy_all); 2873 INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); 2874 if (net_eq(net, &init_net)) 2875 register_netdevice_notifier(&xfrm_dev_notifier); 2876 return 0; 2877 2878 out_bydst: 2879 for (dir--; dir >= 0; dir--) { 2880 struct xfrm_policy_hash *htab; 2881 2882 htab = &net->xfrm.policy_bydst[dir]; 2883 xfrm_hash_free(htab->table, sz); 2884 } 2885 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2886 out_byidx: 2887 return -ENOMEM; 2888 } 2889 2890 static void xfrm_policy_fini(struct net *net) 2891 { 2892 struct xfrm_audit audit_info; 2893 unsigned int sz; 2894 int dir; 2895 2896 flush_work(&net->xfrm.policy_hash_work); 2897 #ifdef CONFIG_XFRM_SUB_POLICY 2898 audit_info.loginuid = INVALID_UID; 2899 audit_info.sessionid = (unsigned int)-1; 2900 audit_info.secid = 0; 2901 xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); 2902 #endif 2903 audit_info.loginuid = INVALID_UID; 2904 audit_info.sessionid = (unsigned int)-1; 2905 audit_info.secid = 0; 2906 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); 2907 2908 WARN_ON(!list_empty(&net->xfrm.policy_all)); 2909 2910 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2911 struct xfrm_policy_hash *htab; 2912 2913 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); 2914 2915 htab = &net->xfrm.policy_bydst[dir]; 2916 sz = (htab->hmask + 1) * sizeof(struct hlist_head); 2917 WARN_ON(!hlist_empty(htab->table)); 2918 xfrm_hash_free(htab->table, sz); 2919 } 2920 2921 sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); 2922 WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); 2923 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2924 } 2925 2926 static int __net_init xfrm_net_init(struct net *net) 2927 { 2928 int rv; 2929 2930 rv = xfrm_statistics_init(net); 2931 if (rv < 0) 2932 goto out_statistics; 2933 rv = xfrm_state_init(net); 2934 if (rv < 0) 2935 goto out_state; 2936 rv = xfrm_policy_init(net); 2937 if (rv < 0) 2938 goto out_policy; 2939 xfrm_dst_ops_init(net); 2940 rv = xfrm_sysctl_init(net); 2941 if (rv < 0) 2942 goto out_sysctl; 2943 2944 /* Initialize the per-net locks here */ 2945 spin_lock_init(&net->xfrm.xfrm_state_lock); 2946 rwlock_init(&net->xfrm.xfrm_policy_lock); 2947 spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock); 2948 mutex_init(&net->xfrm.xfrm_cfg_mutex); 2949 2950 return 0; 2951 2952 out_sysctl: 2953 xfrm_policy_fini(net); 2954 out_policy: 2955 xfrm_state_fini(net); 2956 out_state: 2957 xfrm_statistics_fini(net); 2958 out_statistics: 2959 return rv; 2960 } 2961 2962 static void __net_exit xfrm_net_exit(struct net *net) 2963 { 2964 xfrm_sysctl_fini(net); 2965 xfrm_policy_fini(net); 2966 xfrm_state_fini(net); 2967 xfrm_statistics_fini(net); 2968 } 2969 2970 static struct pernet_operations __net_initdata xfrm_net_ops = { 2971 .init = xfrm_net_init, 2972 .exit = xfrm_net_exit, 2973 }; 2974 2975 void __init xfrm_init(void) 2976 { 2977 register_pernet_subsys(&xfrm_net_ops); 2978 xfrm_input_init(); 2979 } 2980 2981 #ifdef CONFIG_AUDITSYSCALL 2982 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2983 struct audit_buffer *audit_buf) 2984 { 2985 struct xfrm_sec_ctx *ctx = xp->security; 2986 struct xfrm_selector *sel = &xp->selector; 2987 2988 if (ctx) 2989 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2990 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 2991 2992 switch (sel->family) { 2993 case AF_INET: 2994 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); 2995 if (sel->prefixlen_s != 32) 2996 audit_log_format(audit_buf, " src_prefixlen=%d", 2997 sel->prefixlen_s); 2998 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); 2999 if (sel->prefixlen_d != 32) 3000 audit_log_format(audit_buf, " dst_prefixlen=%d", 3001 sel->prefixlen_d); 3002 break; 3003 case AF_INET6: 3004 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); 3005 if (sel->prefixlen_s != 128) 3006 audit_log_format(audit_buf, " src_prefixlen=%d", 3007 sel->prefixlen_s); 3008 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); 3009 if (sel->prefixlen_d != 128) 3010 audit_log_format(audit_buf, " dst_prefixlen=%d", 3011 sel->prefixlen_d); 3012 break; 3013 } 3014 } 3015 3016 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, 3017 kuid_t auid, unsigned int sessionid, u32 secid) 3018 { 3019 struct audit_buffer *audit_buf; 3020 3021 audit_buf = xfrm_audit_start("SPD-add"); 3022 if (audit_buf == NULL) 3023 return; 3024 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 3025 audit_log_format(audit_buf, " res=%u", result); 3026 xfrm_audit_common_policyinfo(xp, audit_buf); 3027 audit_log_end(audit_buf); 3028 } 3029 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 3030 3031 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, 3032 kuid_t auid, unsigned int sessionid, u32 secid) 3033 { 3034 struct audit_buffer *audit_buf; 3035 3036 audit_buf = xfrm_audit_start("SPD-delete"); 3037 if (audit_buf == NULL) 3038 return; 3039 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 3040 audit_log_format(audit_buf, " res=%u", result); 3041 xfrm_audit_common_policyinfo(xp, audit_buf); 3042 audit_log_end(audit_buf); 3043 } 3044 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); 3045 #endif 3046 3047 #ifdef CONFIG_XFRM_MIGRATE 3048 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, 3049 const struct xfrm_selector *sel_tgt) 3050 { 3051 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 3052 if (sel_tgt->family == sel_cmp->family && 3053 xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, 3054 sel_cmp->family) && 3055 xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, 3056 sel_cmp->family) && 3057 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && 3058 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { 3059 return true; 3060 } 3061 } else { 3062 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { 3063 return true; 3064 } 3065 } 3066 return false; 3067 } 3068 3069 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, 3070 u8 dir, u8 type, struct net *net) 3071 { 3072 struct xfrm_policy *pol, *ret = NULL; 3073 struct hlist_head *chain; 3074 u32 priority = ~0U; 3075 3076 read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ 3077 chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); 3078 hlist_for_each_entry(pol, chain, bydst) { 3079 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3080 pol->type == type) { 3081 ret = pol; 3082 priority = ret->priority; 3083 break; 3084 } 3085 } 3086 chain = &net->xfrm.policy_inexact[dir]; 3087 hlist_for_each_entry(pol, chain, bydst) { 3088 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3089 pol->type == type && 3090 pol->priority < priority) { 3091 ret = pol; 3092 break; 3093 } 3094 } 3095 3096 if (ret) 3097 xfrm_pol_hold(ret); 3098 3099 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 3100 3101 return ret; 3102 } 3103 3104 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) 3105 { 3106 int match = 0; 3107 3108 if (t->mode == m->mode && t->id.proto == m->proto && 3109 (m->reqid == 0 || t->reqid == m->reqid)) { 3110 switch (t->mode) { 3111 case XFRM_MODE_TUNNEL: 3112 case XFRM_MODE_BEET: 3113 if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, 3114 m->old_family) && 3115 xfrm_addr_equal(&t->saddr, &m->old_saddr, 3116 m->old_family)) { 3117 match = 1; 3118 } 3119 break; 3120 case XFRM_MODE_TRANSPORT: 3121 /* in case of transport mode, template does not store 3122 any IP addresses, hence we just compare mode and 3123 protocol */ 3124 match = 1; 3125 break; 3126 default: 3127 break; 3128 } 3129 } 3130 return match; 3131 } 3132 3133 /* update endpoint address(es) of template(s) */ 3134 static int xfrm_policy_migrate(struct xfrm_policy *pol, 3135 struct xfrm_migrate *m, int num_migrate) 3136 { 3137 struct xfrm_migrate *mp; 3138 int i, j, n = 0; 3139 3140 write_lock_bh(&pol->lock); 3141 if (unlikely(pol->walk.dead)) { 3142 /* target policy has been deleted */ 3143 write_unlock_bh(&pol->lock); 3144 return -ENOENT; 3145 } 3146 3147 for (i = 0; i < pol->xfrm_nr; i++) { 3148 for (j = 0, mp = m; j < num_migrate; j++, mp++) { 3149 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) 3150 continue; 3151 n++; 3152 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && 3153 pol->xfrm_vec[i].mode != XFRM_MODE_BEET) 3154 continue; 3155 /* update endpoints */ 3156 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, 3157 sizeof(pol->xfrm_vec[i].id.daddr)); 3158 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, 3159 sizeof(pol->xfrm_vec[i].saddr)); 3160 pol->xfrm_vec[i].encap_family = mp->new_family; 3161 /* flush bundles */ 3162 atomic_inc(&pol->genid); 3163 } 3164 } 3165 3166 write_unlock_bh(&pol->lock); 3167 3168 if (!n) 3169 return -ENODATA; 3170 3171 return 0; 3172 } 3173 3174 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) 3175 { 3176 int i, j; 3177 3178 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) 3179 return -EINVAL; 3180 3181 for (i = 0; i < num_migrate; i++) { 3182 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, 3183 m[i].old_family) && 3184 xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, 3185 m[i].old_family)) 3186 return -EINVAL; 3187 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || 3188 xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) 3189 return -EINVAL; 3190 3191 /* check if there is any duplicated entry */ 3192 for (j = i + 1; j < num_migrate; j++) { 3193 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, 3194 sizeof(m[i].old_daddr)) && 3195 !memcmp(&m[i].old_saddr, &m[j].old_saddr, 3196 sizeof(m[i].old_saddr)) && 3197 m[i].proto == m[j].proto && 3198 m[i].mode == m[j].mode && 3199 m[i].reqid == m[j].reqid && 3200 m[i].old_family == m[j].old_family) 3201 return -EINVAL; 3202 } 3203 } 3204 3205 return 0; 3206 } 3207 3208 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, 3209 struct xfrm_migrate *m, int num_migrate, 3210 struct xfrm_kmaddress *k, struct net *net) 3211 { 3212 int i, err, nx_cur = 0, nx_new = 0; 3213 struct xfrm_policy *pol = NULL; 3214 struct xfrm_state *x, *xc; 3215 struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; 3216 struct xfrm_state *x_new[XFRM_MAX_DEPTH]; 3217 struct xfrm_migrate *mp; 3218 3219 if ((err = xfrm_migrate_check(m, num_migrate)) < 0) 3220 goto out; 3221 3222 /* Stage 1 - find policy */ 3223 if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { 3224 err = -ENOENT; 3225 goto out; 3226 } 3227 3228 /* Stage 2 - find and update state(s) */ 3229 for (i = 0, mp = m; i < num_migrate; i++, mp++) { 3230 if ((x = xfrm_migrate_state_find(mp, net))) { 3231 x_cur[nx_cur] = x; 3232 nx_cur++; 3233 if ((xc = xfrm_state_migrate(x, mp))) { 3234 x_new[nx_new] = xc; 3235 nx_new++; 3236 } else { 3237 err = -ENODATA; 3238 goto restore_state; 3239 } 3240 } 3241 } 3242 3243 /* Stage 3 - update policy */ 3244 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) 3245 goto restore_state; 3246 3247 /* Stage 4 - delete old state(s) */ 3248 if (nx_cur) { 3249 xfrm_states_put(x_cur, nx_cur); 3250 xfrm_states_delete(x_cur, nx_cur); 3251 } 3252 3253 /* Stage 5 - announce */ 3254 km_migrate(sel, dir, type, m, num_migrate, k); 3255 3256 xfrm_pol_put(pol); 3257 3258 return 0; 3259 out: 3260 return err; 3261 3262 restore_state: 3263 if (pol) 3264 xfrm_pol_put(pol); 3265 if (nx_cur) 3266 xfrm_states_put(x_cur, nx_cur); 3267 if (nx_new) 3268 xfrm_states_delete(x_new, nx_new); 3269 3270 return err; 3271 } 3272 EXPORT_SYMBOL(xfrm_migrate); 3273 #endif 3274