1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/kmod.h> 19 #include <linux/list.h> 20 #include <linux/spinlock.h> 21 #include <linux/workqueue.h> 22 #include <linux/notifier.h> 23 #include <linux/netdevice.h> 24 #include <linux/netfilter.h> 25 #include <linux/module.h> 26 #include <linux/cache.h> 27 #include <linux/audit.h> 28 #include <net/dst.h> 29 #include <net/flow.h> 30 #include <net/xfrm.h> 31 #include <net/ip.h> 32 #ifdef CONFIG_XFRM_STATISTICS 33 #include <net/snmp.h> 34 #endif 35 36 #include "xfrm_hash.h" 37 38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) 39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) 40 #define XFRM_MAX_QUEUE_LEN 100 41 42 DEFINE_MUTEX(xfrm_cfg_mutex); 43 EXPORT_SYMBOL(xfrm_cfg_mutex); 44 45 static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); 46 static struct dst_entry *xfrm_policy_sk_bundles; 47 static DEFINE_RWLOCK(xfrm_policy_lock); 48 49 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); 50 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] 51 __read_mostly; 52 53 static struct kmem_cache *xfrm_dst_cache __read_mostly; 54 55 static void xfrm_init_pmtu(struct dst_entry *dst); 56 static int stale_bundle(struct dst_entry *dst); 57 static int xfrm_bundle_ok(struct xfrm_dst *xdst); 58 static void xfrm_policy_queue_process(unsigned long arg); 59 60 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 61 int dir); 62 63 static inline bool 64 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 65 { 66 const struct flowi4 *fl4 = &fl->u.ip4; 67 68 return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && 69 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && 70 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && 71 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && 72 (fl4->flowi4_proto == sel->proto || !sel->proto) && 73 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); 74 } 75 76 static inline bool 77 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 78 { 79 const struct flowi6 *fl6 = &fl->u.ip6; 80 81 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && 82 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && 83 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && 84 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && 85 (fl6->flowi6_proto == sel->proto || !sel->proto) && 86 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); 87 } 88 89 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, 90 unsigned short family) 91 { 92 switch (family) { 93 case AF_INET: 94 return __xfrm4_selector_match(sel, fl); 95 case AF_INET6: 96 return __xfrm6_selector_match(sel, fl); 97 } 98 return false; 99 } 100 101 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 102 { 103 struct xfrm_policy_afinfo *afinfo; 104 105 if (unlikely(family >= NPROTO)) 106 return NULL; 107 rcu_read_lock(); 108 afinfo = rcu_dereference(xfrm_policy_afinfo[family]); 109 if (unlikely(!afinfo)) 110 rcu_read_unlock(); 111 return afinfo; 112 } 113 114 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 115 { 116 rcu_read_unlock(); 117 } 118 119 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, 120 const xfrm_address_t *saddr, 121 const xfrm_address_t *daddr, 122 int family) 123 { 124 struct xfrm_policy_afinfo *afinfo; 125 struct dst_entry *dst; 126 127 afinfo = xfrm_policy_get_afinfo(family); 128 if (unlikely(afinfo == NULL)) 129 return ERR_PTR(-EAFNOSUPPORT); 130 131 dst = afinfo->dst_lookup(net, tos, saddr, daddr); 132 133 xfrm_policy_put_afinfo(afinfo); 134 135 return dst; 136 } 137 138 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, 139 xfrm_address_t *prev_saddr, 140 xfrm_address_t *prev_daddr, 141 int family) 142 { 143 struct net *net = xs_net(x); 144 xfrm_address_t *saddr = &x->props.saddr; 145 xfrm_address_t *daddr = &x->id.daddr; 146 struct dst_entry *dst; 147 148 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { 149 saddr = x->coaddr; 150 daddr = prev_daddr; 151 } 152 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { 153 saddr = prev_saddr; 154 daddr = x->coaddr; 155 } 156 157 dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); 158 159 if (!IS_ERR(dst)) { 160 if (prev_saddr != saddr) 161 memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); 162 if (prev_daddr != daddr) 163 memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); 164 } 165 166 return dst; 167 } 168 169 static inline unsigned long make_jiffies(long secs) 170 { 171 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 172 return MAX_SCHEDULE_TIMEOUT-1; 173 else 174 return secs*HZ; 175 } 176 177 static void xfrm_policy_timer(unsigned long data) 178 { 179 struct xfrm_policy *xp = (struct xfrm_policy*)data; 180 unsigned long now = get_seconds(); 181 long next = LONG_MAX; 182 int warn = 0; 183 int dir; 184 185 read_lock(&xp->lock); 186 187 if (unlikely(xp->walk.dead)) 188 goto out; 189 190 dir = xfrm_policy_id2dir(xp->index); 191 192 if (xp->lft.hard_add_expires_seconds) { 193 long tmo = xp->lft.hard_add_expires_seconds + 194 xp->curlft.add_time - now; 195 if (tmo <= 0) 196 goto expired; 197 if (tmo < next) 198 next = tmo; 199 } 200 if (xp->lft.hard_use_expires_seconds) { 201 long tmo = xp->lft.hard_use_expires_seconds + 202 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 203 if (tmo <= 0) 204 goto expired; 205 if (tmo < next) 206 next = tmo; 207 } 208 if (xp->lft.soft_add_expires_seconds) { 209 long tmo = xp->lft.soft_add_expires_seconds + 210 xp->curlft.add_time - now; 211 if (tmo <= 0) { 212 warn = 1; 213 tmo = XFRM_KM_TIMEOUT; 214 } 215 if (tmo < next) 216 next = tmo; 217 } 218 if (xp->lft.soft_use_expires_seconds) { 219 long tmo = xp->lft.soft_use_expires_seconds + 220 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 221 if (tmo <= 0) { 222 warn = 1; 223 tmo = XFRM_KM_TIMEOUT; 224 } 225 if (tmo < next) 226 next = tmo; 227 } 228 229 if (warn) 230 km_policy_expired(xp, dir, 0, 0); 231 if (next != LONG_MAX && 232 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 233 xfrm_pol_hold(xp); 234 235 out: 236 read_unlock(&xp->lock); 237 xfrm_pol_put(xp); 238 return; 239 240 expired: 241 read_unlock(&xp->lock); 242 if (!xfrm_policy_delete(xp, dir)) 243 km_policy_expired(xp, dir, 1, 0); 244 xfrm_pol_put(xp); 245 } 246 247 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) 248 { 249 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 250 251 if (unlikely(pol->walk.dead)) 252 flo = NULL; 253 else 254 xfrm_pol_hold(pol); 255 256 return flo; 257 } 258 259 static int xfrm_policy_flo_check(struct flow_cache_object *flo) 260 { 261 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 262 263 return !pol->walk.dead; 264 } 265 266 static void xfrm_policy_flo_delete(struct flow_cache_object *flo) 267 { 268 xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); 269 } 270 271 static const struct flow_cache_ops xfrm_policy_fc_ops = { 272 .get = xfrm_policy_flo_get, 273 .check = xfrm_policy_flo_check, 274 .delete = xfrm_policy_flo_delete, 275 }; 276 277 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 278 * SPD calls. 279 */ 280 281 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) 282 { 283 struct xfrm_policy *policy; 284 285 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 286 287 if (policy) { 288 write_pnet(&policy->xp_net, net); 289 INIT_LIST_HEAD(&policy->walk.all); 290 INIT_HLIST_NODE(&policy->bydst); 291 INIT_HLIST_NODE(&policy->byidx); 292 rwlock_init(&policy->lock); 293 atomic_set(&policy->refcnt, 1); 294 skb_queue_head_init(&policy->polq.hold_queue); 295 setup_timer(&policy->timer, xfrm_policy_timer, 296 (unsigned long)policy); 297 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, 298 (unsigned long)policy); 299 policy->flo.ops = &xfrm_policy_fc_ops; 300 } 301 return policy; 302 } 303 EXPORT_SYMBOL(xfrm_policy_alloc); 304 305 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 306 307 void xfrm_policy_destroy(struct xfrm_policy *policy) 308 { 309 BUG_ON(!policy->walk.dead); 310 311 if (del_timer(&policy->timer)) 312 BUG(); 313 314 security_xfrm_policy_free(policy->security); 315 kfree(policy); 316 } 317 EXPORT_SYMBOL(xfrm_policy_destroy); 318 319 static void xfrm_queue_purge(struct sk_buff_head *list) 320 { 321 struct sk_buff *skb; 322 323 while ((skb = skb_dequeue(list)) != NULL) { 324 dev_put(skb->dev); 325 kfree_skb(skb); 326 } 327 } 328 329 /* Rule must be locked. Release descentant resources, announce 330 * entry dead. The rule must be unlinked from lists to the moment. 331 */ 332 333 static void xfrm_policy_kill(struct xfrm_policy *policy) 334 { 335 policy->walk.dead = 1; 336 337 atomic_inc(&policy->genid); 338 339 del_timer(&policy->polq.hold_timer); 340 xfrm_queue_purge(&policy->polq.hold_queue); 341 342 if (del_timer(&policy->timer)) 343 xfrm_pol_put(policy); 344 345 xfrm_pol_put(policy); 346 } 347 348 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 349 350 static inline unsigned int idx_hash(struct net *net, u32 index) 351 { 352 return __idx_hash(index, net->xfrm.policy_idx_hmask); 353 } 354 355 static struct hlist_head *policy_hash_bysel(struct net *net, 356 const struct xfrm_selector *sel, 357 unsigned short family, int dir) 358 { 359 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 360 unsigned int hash = __sel_hash(sel, family, hmask); 361 362 return (hash == hmask + 1 ? 363 &net->xfrm.policy_inexact[dir] : 364 net->xfrm.policy_bydst[dir].table + hash); 365 } 366 367 static struct hlist_head *policy_hash_direct(struct net *net, 368 const xfrm_address_t *daddr, 369 const xfrm_address_t *saddr, 370 unsigned short family, int dir) 371 { 372 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 373 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 374 375 return net->xfrm.policy_bydst[dir].table + hash; 376 } 377 378 static void xfrm_dst_hash_transfer(struct hlist_head *list, 379 struct hlist_head *ndsttable, 380 unsigned int nhashmask) 381 { 382 struct hlist_node *tmp, *entry0 = NULL; 383 struct xfrm_policy *pol; 384 unsigned int h0 = 0; 385 386 redo: 387 hlist_for_each_entry_safe(pol, tmp, list, bydst) { 388 unsigned int h; 389 390 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 391 pol->family, nhashmask); 392 if (!entry0) { 393 hlist_del(&pol->bydst); 394 hlist_add_head(&pol->bydst, ndsttable+h); 395 h0 = h; 396 } else { 397 if (h != h0) 398 continue; 399 hlist_del(&pol->bydst); 400 hlist_add_after(entry0, &pol->bydst); 401 } 402 entry0 = &pol->bydst; 403 } 404 if (!hlist_empty(list)) { 405 entry0 = NULL; 406 goto redo; 407 } 408 } 409 410 static void xfrm_idx_hash_transfer(struct hlist_head *list, 411 struct hlist_head *nidxtable, 412 unsigned int nhashmask) 413 { 414 struct hlist_node *tmp; 415 struct xfrm_policy *pol; 416 417 hlist_for_each_entry_safe(pol, tmp, list, byidx) { 418 unsigned int h; 419 420 h = __idx_hash(pol->index, nhashmask); 421 hlist_add_head(&pol->byidx, nidxtable+h); 422 } 423 } 424 425 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) 426 { 427 return ((old_hmask + 1) << 1) - 1; 428 } 429 430 static void xfrm_bydst_resize(struct net *net, int dir) 431 { 432 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 433 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 434 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 435 struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; 436 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 437 int i; 438 439 if (!ndst) 440 return; 441 442 write_lock_bh(&xfrm_policy_lock); 443 444 for (i = hmask; i >= 0; i--) 445 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 446 447 net->xfrm.policy_bydst[dir].table = ndst; 448 net->xfrm.policy_bydst[dir].hmask = nhashmask; 449 450 write_unlock_bh(&xfrm_policy_lock); 451 452 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 453 } 454 455 static void xfrm_byidx_resize(struct net *net, int total) 456 { 457 unsigned int hmask = net->xfrm.policy_idx_hmask; 458 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 459 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 460 struct hlist_head *oidx = net->xfrm.policy_byidx; 461 struct hlist_head *nidx = xfrm_hash_alloc(nsize); 462 int i; 463 464 if (!nidx) 465 return; 466 467 write_lock_bh(&xfrm_policy_lock); 468 469 for (i = hmask; i >= 0; i--) 470 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 471 472 net->xfrm.policy_byidx = nidx; 473 net->xfrm.policy_idx_hmask = nhashmask; 474 475 write_unlock_bh(&xfrm_policy_lock); 476 477 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 478 } 479 480 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) 481 { 482 unsigned int cnt = net->xfrm.policy_count[dir]; 483 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 484 485 if (total) 486 *total += cnt; 487 488 if ((hmask + 1) < xfrm_policy_hashmax && 489 cnt > hmask) 490 return 1; 491 492 return 0; 493 } 494 495 static inline int xfrm_byidx_should_resize(struct net *net, int total) 496 { 497 unsigned int hmask = net->xfrm.policy_idx_hmask; 498 499 if ((hmask + 1) < xfrm_policy_hashmax && 500 total > hmask) 501 return 1; 502 503 return 0; 504 } 505 506 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) 507 { 508 read_lock_bh(&xfrm_policy_lock); 509 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; 510 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; 511 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; 512 si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; 513 si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; 514 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 515 si->spdhcnt = net->xfrm.policy_idx_hmask; 516 si->spdhmcnt = xfrm_policy_hashmax; 517 read_unlock_bh(&xfrm_policy_lock); 518 } 519 EXPORT_SYMBOL(xfrm_spd_getinfo); 520 521 static DEFINE_MUTEX(hash_resize_mutex); 522 static void xfrm_hash_resize(struct work_struct *work) 523 { 524 struct net *net = container_of(work, struct net, xfrm.policy_hash_work); 525 int dir, total; 526 527 mutex_lock(&hash_resize_mutex); 528 529 total = 0; 530 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 531 if (xfrm_bydst_should_resize(net, dir, &total)) 532 xfrm_bydst_resize(net, dir); 533 } 534 if (xfrm_byidx_should_resize(net, total)) 535 xfrm_byidx_resize(net, total); 536 537 mutex_unlock(&hash_resize_mutex); 538 } 539 540 /* Generate new index... KAME seems to generate them ordered by cost 541 * of an absolute inpredictability of ordering of rules. This will not pass. */ 542 static u32 xfrm_gen_index(struct net *net, int dir) 543 { 544 static u32 idx_generator; 545 546 for (;;) { 547 struct hlist_head *list; 548 struct xfrm_policy *p; 549 u32 idx; 550 int found; 551 552 idx = (idx_generator | dir); 553 idx_generator += 8; 554 if (idx == 0) 555 idx = 8; 556 list = net->xfrm.policy_byidx + idx_hash(net, idx); 557 found = 0; 558 hlist_for_each_entry(p, list, byidx) { 559 if (p->index == idx) { 560 found = 1; 561 break; 562 } 563 } 564 if (!found) 565 return idx; 566 } 567 } 568 569 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) 570 { 571 u32 *p1 = (u32 *) s1; 572 u32 *p2 = (u32 *) s2; 573 int len = sizeof(struct xfrm_selector) / sizeof(u32); 574 int i; 575 576 for (i = 0; i < len; i++) { 577 if (p1[i] != p2[i]) 578 return 1; 579 } 580 581 return 0; 582 } 583 584 static void xfrm_policy_requeue(struct xfrm_policy *old, 585 struct xfrm_policy *new) 586 { 587 struct xfrm_policy_queue *pq = &old->polq; 588 struct sk_buff_head list; 589 590 __skb_queue_head_init(&list); 591 592 spin_lock_bh(&pq->hold_queue.lock); 593 skb_queue_splice_init(&pq->hold_queue, &list); 594 del_timer(&pq->hold_timer); 595 spin_unlock_bh(&pq->hold_queue.lock); 596 597 if (skb_queue_empty(&list)) 598 return; 599 600 pq = &new->polq; 601 602 spin_lock_bh(&pq->hold_queue.lock); 603 skb_queue_splice(&list, &pq->hold_queue); 604 pq->timeout = XFRM_QUEUE_TMO_MIN; 605 mod_timer(&pq->hold_timer, jiffies); 606 spin_unlock_bh(&pq->hold_queue.lock); 607 } 608 609 static bool xfrm_policy_mark_match(struct xfrm_policy *policy, 610 struct xfrm_policy *pol) 611 { 612 u32 mark = policy->mark.v & policy->mark.m; 613 614 if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) 615 return true; 616 617 if ((mark & pol->mark.m) == pol->mark.v && 618 policy->priority == pol->priority) 619 return true; 620 621 return false; 622 } 623 624 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 625 { 626 struct net *net = xp_net(policy); 627 struct xfrm_policy *pol; 628 struct xfrm_policy *delpol; 629 struct hlist_head *chain; 630 struct hlist_node *newpos; 631 632 write_lock_bh(&xfrm_policy_lock); 633 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); 634 delpol = NULL; 635 newpos = NULL; 636 hlist_for_each_entry(pol, chain, bydst) { 637 if (pol->type == policy->type && 638 !selector_cmp(&pol->selector, &policy->selector) && 639 xfrm_policy_mark_match(policy, pol) && 640 xfrm_sec_ctx_match(pol->security, policy->security) && 641 !WARN_ON(delpol)) { 642 if (excl) { 643 write_unlock_bh(&xfrm_policy_lock); 644 return -EEXIST; 645 } 646 delpol = pol; 647 if (policy->priority > pol->priority) 648 continue; 649 } else if (policy->priority >= pol->priority) { 650 newpos = &pol->bydst; 651 continue; 652 } 653 if (delpol) 654 break; 655 } 656 if (newpos) 657 hlist_add_after(newpos, &policy->bydst); 658 else 659 hlist_add_head(&policy->bydst, chain); 660 xfrm_pol_hold(policy); 661 net->xfrm.policy_count[dir]++; 662 atomic_inc(&flow_cache_genid); 663 rt_genid_bump(net); 664 if (delpol) { 665 xfrm_policy_requeue(delpol, policy); 666 __xfrm_policy_unlink(delpol, dir); 667 } 668 policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); 669 hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); 670 policy->curlft.add_time = get_seconds(); 671 policy->curlft.use_time = 0; 672 if (!mod_timer(&policy->timer, jiffies + HZ)) 673 xfrm_pol_hold(policy); 674 list_add(&policy->walk.all, &net->xfrm.policy_all); 675 write_unlock_bh(&xfrm_policy_lock); 676 677 if (delpol) 678 xfrm_policy_kill(delpol); 679 else if (xfrm_bydst_should_resize(net, dir, NULL)) 680 schedule_work(&net->xfrm.policy_hash_work); 681 682 return 0; 683 } 684 EXPORT_SYMBOL(xfrm_policy_insert); 685 686 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, 687 int dir, struct xfrm_selector *sel, 688 struct xfrm_sec_ctx *ctx, int delete, 689 int *err) 690 { 691 struct xfrm_policy *pol, *ret; 692 struct hlist_head *chain; 693 694 *err = 0; 695 write_lock_bh(&xfrm_policy_lock); 696 chain = policy_hash_bysel(net, sel, sel->family, dir); 697 ret = NULL; 698 hlist_for_each_entry(pol, chain, bydst) { 699 if (pol->type == type && 700 (mark & pol->mark.m) == pol->mark.v && 701 !selector_cmp(sel, &pol->selector) && 702 xfrm_sec_ctx_match(ctx, pol->security)) { 703 xfrm_pol_hold(pol); 704 if (delete) { 705 *err = security_xfrm_policy_delete( 706 pol->security); 707 if (*err) { 708 write_unlock_bh(&xfrm_policy_lock); 709 return pol; 710 } 711 __xfrm_policy_unlink(pol, dir); 712 } 713 ret = pol; 714 break; 715 } 716 } 717 write_unlock_bh(&xfrm_policy_lock); 718 719 if (ret && delete) 720 xfrm_policy_kill(ret); 721 return ret; 722 } 723 EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 724 725 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, 726 int dir, u32 id, int delete, int *err) 727 { 728 struct xfrm_policy *pol, *ret; 729 struct hlist_head *chain; 730 731 *err = -ENOENT; 732 if (xfrm_policy_id2dir(id) != dir) 733 return NULL; 734 735 *err = 0; 736 write_lock_bh(&xfrm_policy_lock); 737 chain = net->xfrm.policy_byidx + idx_hash(net, id); 738 ret = NULL; 739 hlist_for_each_entry(pol, chain, byidx) { 740 if (pol->type == type && pol->index == id && 741 (mark & pol->mark.m) == pol->mark.v) { 742 xfrm_pol_hold(pol); 743 if (delete) { 744 *err = security_xfrm_policy_delete( 745 pol->security); 746 if (*err) { 747 write_unlock_bh(&xfrm_policy_lock); 748 return pol; 749 } 750 __xfrm_policy_unlink(pol, dir); 751 } 752 ret = pol; 753 break; 754 } 755 } 756 write_unlock_bh(&xfrm_policy_lock); 757 758 if (ret && delete) 759 xfrm_policy_kill(ret); 760 return ret; 761 } 762 EXPORT_SYMBOL(xfrm_policy_byid); 763 764 #ifdef CONFIG_SECURITY_NETWORK_XFRM 765 static inline int 766 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 767 { 768 int dir, err = 0; 769 770 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 771 struct xfrm_policy *pol; 772 int i; 773 774 hlist_for_each_entry(pol, 775 &net->xfrm.policy_inexact[dir], bydst) { 776 if (pol->type != type) 777 continue; 778 err = security_xfrm_policy_delete(pol->security); 779 if (err) { 780 xfrm_audit_policy_delete(pol, 0, 781 audit_info->loginuid, 782 audit_info->sessionid, 783 audit_info->secid); 784 return err; 785 } 786 } 787 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 788 hlist_for_each_entry(pol, 789 net->xfrm.policy_bydst[dir].table + i, 790 bydst) { 791 if (pol->type != type) 792 continue; 793 err = security_xfrm_policy_delete( 794 pol->security); 795 if (err) { 796 xfrm_audit_policy_delete(pol, 0, 797 audit_info->loginuid, 798 audit_info->sessionid, 799 audit_info->secid); 800 return err; 801 } 802 } 803 } 804 } 805 return err; 806 } 807 #else 808 static inline int 809 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 810 { 811 return 0; 812 } 813 #endif 814 815 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) 816 { 817 int dir, err = 0, cnt = 0; 818 819 write_lock_bh(&xfrm_policy_lock); 820 821 err = xfrm_policy_flush_secctx_check(net, type, audit_info); 822 if (err) 823 goto out; 824 825 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 826 struct xfrm_policy *pol; 827 int i; 828 829 again1: 830 hlist_for_each_entry(pol, 831 &net->xfrm.policy_inexact[dir], bydst) { 832 if (pol->type != type) 833 continue; 834 __xfrm_policy_unlink(pol, dir); 835 write_unlock_bh(&xfrm_policy_lock); 836 cnt++; 837 838 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, 839 audit_info->sessionid, 840 audit_info->secid); 841 842 xfrm_policy_kill(pol); 843 844 write_lock_bh(&xfrm_policy_lock); 845 goto again1; 846 } 847 848 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 849 again2: 850 hlist_for_each_entry(pol, 851 net->xfrm.policy_bydst[dir].table + i, 852 bydst) { 853 if (pol->type != type) 854 continue; 855 __xfrm_policy_unlink(pol, dir); 856 write_unlock_bh(&xfrm_policy_lock); 857 cnt++; 858 859 xfrm_audit_policy_delete(pol, 1, 860 audit_info->loginuid, 861 audit_info->sessionid, 862 audit_info->secid); 863 xfrm_policy_kill(pol); 864 865 write_lock_bh(&xfrm_policy_lock); 866 goto again2; 867 } 868 } 869 870 } 871 if (!cnt) 872 err = -ESRCH; 873 out: 874 write_unlock_bh(&xfrm_policy_lock); 875 return err; 876 } 877 EXPORT_SYMBOL(xfrm_policy_flush); 878 879 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, 880 int (*func)(struct xfrm_policy *, int, int, void*), 881 void *data) 882 { 883 struct xfrm_policy *pol; 884 struct xfrm_policy_walk_entry *x; 885 int error = 0; 886 887 if (walk->type >= XFRM_POLICY_TYPE_MAX && 888 walk->type != XFRM_POLICY_TYPE_ANY) 889 return -EINVAL; 890 891 if (list_empty(&walk->walk.all) && walk->seq != 0) 892 return 0; 893 894 write_lock_bh(&xfrm_policy_lock); 895 if (list_empty(&walk->walk.all)) 896 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); 897 else 898 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); 899 list_for_each_entry_from(x, &net->xfrm.policy_all, all) { 900 if (x->dead) 901 continue; 902 pol = container_of(x, struct xfrm_policy, walk); 903 if (walk->type != XFRM_POLICY_TYPE_ANY && 904 walk->type != pol->type) 905 continue; 906 error = func(pol, xfrm_policy_id2dir(pol->index), 907 walk->seq, data); 908 if (error) { 909 list_move_tail(&walk->walk.all, &x->all); 910 goto out; 911 } 912 walk->seq++; 913 } 914 if (walk->seq == 0) { 915 error = -ENOENT; 916 goto out; 917 } 918 list_del_init(&walk->walk.all); 919 out: 920 write_unlock_bh(&xfrm_policy_lock); 921 return error; 922 } 923 EXPORT_SYMBOL(xfrm_policy_walk); 924 925 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) 926 { 927 INIT_LIST_HEAD(&walk->walk.all); 928 walk->walk.dead = 1; 929 walk->type = type; 930 walk->seq = 0; 931 } 932 EXPORT_SYMBOL(xfrm_policy_walk_init); 933 934 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) 935 { 936 if (list_empty(&walk->walk.all)) 937 return; 938 939 write_lock_bh(&xfrm_policy_lock); 940 list_del(&walk->walk.all); 941 write_unlock_bh(&xfrm_policy_lock); 942 } 943 EXPORT_SYMBOL(xfrm_policy_walk_done); 944 945 /* 946 * Find policy to apply to this flow. 947 * 948 * Returns 0 if policy found, else an -errno. 949 */ 950 static int xfrm_policy_match(const struct xfrm_policy *pol, 951 const struct flowi *fl, 952 u8 type, u16 family, int dir) 953 { 954 const struct xfrm_selector *sel = &pol->selector; 955 int ret = -ESRCH; 956 bool match; 957 958 if (pol->family != family || 959 (fl->flowi_mark & pol->mark.m) != pol->mark.v || 960 pol->type != type) 961 return ret; 962 963 match = xfrm_selector_match(sel, fl, family); 964 if (match) 965 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, 966 dir); 967 968 return ret; 969 } 970 971 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, 972 const struct flowi *fl, 973 u16 family, u8 dir) 974 { 975 int err; 976 struct xfrm_policy *pol, *ret; 977 const xfrm_address_t *daddr, *saddr; 978 struct hlist_head *chain; 979 u32 priority = ~0U; 980 981 daddr = xfrm_flowi_daddr(fl, family); 982 saddr = xfrm_flowi_saddr(fl, family); 983 if (unlikely(!daddr || !saddr)) 984 return NULL; 985 986 read_lock_bh(&xfrm_policy_lock); 987 chain = policy_hash_direct(net, daddr, saddr, family, dir); 988 ret = NULL; 989 hlist_for_each_entry(pol, chain, bydst) { 990 err = xfrm_policy_match(pol, fl, type, family, dir); 991 if (err) { 992 if (err == -ESRCH) 993 continue; 994 else { 995 ret = ERR_PTR(err); 996 goto fail; 997 } 998 } else { 999 ret = pol; 1000 priority = ret->priority; 1001 break; 1002 } 1003 } 1004 chain = &net->xfrm.policy_inexact[dir]; 1005 hlist_for_each_entry(pol, chain, bydst) { 1006 err = xfrm_policy_match(pol, fl, type, family, dir); 1007 if (err) { 1008 if (err == -ESRCH) 1009 continue; 1010 else { 1011 ret = ERR_PTR(err); 1012 goto fail; 1013 } 1014 } else if (pol->priority < priority) { 1015 ret = pol; 1016 break; 1017 } 1018 } 1019 if (ret) 1020 xfrm_pol_hold(ret); 1021 fail: 1022 read_unlock_bh(&xfrm_policy_lock); 1023 1024 return ret; 1025 } 1026 1027 static struct xfrm_policy * 1028 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) 1029 { 1030 #ifdef CONFIG_XFRM_SUB_POLICY 1031 struct xfrm_policy *pol; 1032 1033 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 1034 if (pol != NULL) 1035 return pol; 1036 #endif 1037 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1038 } 1039 1040 static int flow_to_policy_dir(int dir) 1041 { 1042 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1043 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1044 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1045 return dir; 1046 1047 switch (dir) { 1048 default: 1049 case FLOW_DIR_IN: 1050 return XFRM_POLICY_IN; 1051 case FLOW_DIR_OUT: 1052 return XFRM_POLICY_OUT; 1053 case FLOW_DIR_FWD: 1054 return XFRM_POLICY_FWD; 1055 } 1056 } 1057 1058 static struct flow_cache_object * 1059 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, 1060 u8 dir, struct flow_cache_object *old_obj, void *ctx) 1061 { 1062 struct xfrm_policy *pol; 1063 1064 if (old_obj) 1065 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 1066 1067 pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); 1068 if (IS_ERR_OR_NULL(pol)) 1069 return ERR_CAST(pol); 1070 1071 /* Resolver returns two references: 1072 * one for cache and one for caller of flow_cache_lookup() */ 1073 xfrm_pol_hold(pol); 1074 1075 return &pol->flo; 1076 } 1077 1078 static inline int policy_to_flow_dir(int dir) 1079 { 1080 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1081 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1082 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1083 return dir; 1084 switch (dir) { 1085 default: 1086 case XFRM_POLICY_IN: 1087 return FLOW_DIR_IN; 1088 case XFRM_POLICY_OUT: 1089 return FLOW_DIR_OUT; 1090 case XFRM_POLICY_FWD: 1091 return FLOW_DIR_FWD; 1092 } 1093 } 1094 1095 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, 1096 const struct flowi *fl) 1097 { 1098 struct xfrm_policy *pol; 1099 1100 read_lock_bh(&xfrm_policy_lock); 1101 if ((pol = sk->sk_policy[dir]) != NULL) { 1102 bool match = xfrm_selector_match(&pol->selector, fl, 1103 sk->sk_family); 1104 int err = 0; 1105 1106 if (match) { 1107 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1108 pol = NULL; 1109 goto out; 1110 } 1111 err = security_xfrm_policy_lookup(pol->security, 1112 fl->flowi_secid, 1113 policy_to_flow_dir(dir)); 1114 if (!err) 1115 xfrm_pol_hold(pol); 1116 else if (err == -ESRCH) 1117 pol = NULL; 1118 else 1119 pol = ERR_PTR(err); 1120 } else 1121 pol = NULL; 1122 } 1123 out: 1124 read_unlock_bh(&xfrm_policy_lock); 1125 return pol; 1126 } 1127 1128 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1129 { 1130 struct net *net = xp_net(pol); 1131 struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, 1132 pol->family, dir); 1133 1134 list_add(&pol->walk.all, &net->xfrm.policy_all); 1135 hlist_add_head(&pol->bydst, chain); 1136 hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); 1137 net->xfrm.policy_count[dir]++; 1138 xfrm_pol_hold(pol); 1139 1140 if (xfrm_bydst_should_resize(net, dir, NULL)) 1141 schedule_work(&net->xfrm.policy_hash_work); 1142 } 1143 1144 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1145 int dir) 1146 { 1147 struct net *net = xp_net(pol); 1148 1149 if (hlist_unhashed(&pol->bydst)) 1150 return NULL; 1151 1152 hlist_del(&pol->bydst); 1153 hlist_del(&pol->byidx); 1154 list_del(&pol->walk.all); 1155 net->xfrm.policy_count[dir]--; 1156 1157 return pol; 1158 } 1159 1160 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1161 { 1162 write_lock_bh(&xfrm_policy_lock); 1163 pol = __xfrm_policy_unlink(pol, dir); 1164 write_unlock_bh(&xfrm_policy_lock); 1165 if (pol) { 1166 xfrm_policy_kill(pol); 1167 return 0; 1168 } 1169 return -ENOENT; 1170 } 1171 EXPORT_SYMBOL(xfrm_policy_delete); 1172 1173 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1174 { 1175 struct net *net = xp_net(pol); 1176 struct xfrm_policy *old_pol; 1177 1178 #ifdef CONFIG_XFRM_SUB_POLICY 1179 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) 1180 return -EINVAL; 1181 #endif 1182 1183 write_lock_bh(&xfrm_policy_lock); 1184 old_pol = sk->sk_policy[dir]; 1185 sk->sk_policy[dir] = pol; 1186 if (pol) { 1187 pol->curlft.add_time = get_seconds(); 1188 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); 1189 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1190 } 1191 if (old_pol) { 1192 if (pol) 1193 xfrm_policy_requeue(old_pol, pol); 1194 1195 /* Unlinking succeeds always. This is the only function 1196 * allowed to delete or replace socket policy. 1197 */ 1198 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1199 } 1200 write_unlock_bh(&xfrm_policy_lock); 1201 1202 if (old_pol) { 1203 xfrm_policy_kill(old_pol); 1204 } 1205 return 0; 1206 } 1207 1208 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) 1209 { 1210 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); 1211 1212 if (newp) { 1213 newp->selector = old->selector; 1214 if (security_xfrm_policy_clone(old->security, 1215 &newp->security)) { 1216 kfree(newp); 1217 return NULL; /* ENOMEM */ 1218 } 1219 newp->lft = old->lft; 1220 newp->curlft = old->curlft; 1221 newp->mark = old->mark; 1222 newp->action = old->action; 1223 newp->flags = old->flags; 1224 newp->xfrm_nr = old->xfrm_nr; 1225 newp->index = old->index; 1226 newp->type = old->type; 1227 memcpy(newp->xfrm_vec, old->xfrm_vec, 1228 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1229 write_lock_bh(&xfrm_policy_lock); 1230 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 1231 write_unlock_bh(&xfrm_policy_lock); 1232 xfrm_pol_put(newp); 1233 } 1234 return newp; 1235 } 1236 1237 int __xfrm_sk_clone_policy(struct sock *sk) 1238 { 1239 struct xfrm_policy *p0 = sk->sk_policy[0], 1240 *p1 = sk->sk_policy[1]; 1241 1242 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 1243 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 1244 return -ENOMEM; 1245 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 1246 return -ENOMEM; 1247 return 0; 1248 } 1249 1250 static int 1251 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, 1252 unsigned short family) 1253 { 1254 int err; 1255 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1256 1257 if (unlikely(afinfo == NULL)) 1258 return -EINVAL; 1259 err = afinfo->get_saddr(net, local, remote); 1260 xfrm_policy_put_afinfo(afinfo); 1261 return err; 1262 } 1263 1264 /* Resolve list of templates for the flow, given policy. */ 1265 1266 static int 1267 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, 1268 struct xfrm_state **xfrm, unsigned short family) 1269 { 1270 struct net *net = xp_net(policy); 1271 int nx; 1272 int i, error; 1273 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1274 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1275 xfrm_address_t tmp; 1276 1277 for (nx=0, i = 0; i < policy->xfrm_nr; i++) { 1278 struct xfrm_state *x; 1279 xfrm_address_t *remote = daddr; 1280 xfrm_address_t *local = saddr; 1281 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1282 1283 if (tmpl->mode == XFRM_MODE_TUNNEL || 1284 tmpl->mode == XFRM_MODE_BEET) { 1285 remote = &tmpl->id.daddr; 1286 local = &tmpl->saddr; 1287 if (xfrm_addr_any(local, tmpl->encap_family)) { 1288 error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); 1289 if (error) 1290 goto fail; 1291 local = &tmp; 1292 } 1293 } 1294 1295 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1296 1297 if (x && x->km.state == XFRM_STATE_VALID) { 1298 xfrm[nx++] = x; 1299 daddr = remote; 1300 saddr = local; 1301 continue; 1302 } 1303 if (x) { 1304 error = (x->km.state == XFRM_STATE_ERROR ? 1305 -EINVAL : -EAGAIN); 1306 xfrm_state_put(x); 1307 } 1308 else if (error == -ESRCH) 1309 error = -EAGAIN; 1310 1311 if (!tmpl->optional) 1312 goto fail; 1313 } 1314 return nx; 1315 1316 fail: 1317 for (nx--; nx>=0; nx--) 1318 xfrm_state_put(xfrm[nx]); 1319 return error; 1320 } 1321 1322 static int 1323 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, 1324 struct xfrm_state **xfrm, unsigned short family) 1325 { 1326 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1327 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1328 int cnx = 0; 1329 int error; 1330 int ret; 1331 int i; 1332 1333 for (i = 0; i < npols; i++) { 1334 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { 1335 error = -ENOBUFS; 1336 goto fail; 1337 } 1338 1339 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); 1340 if (ret < 0) { 1341 error = ret; 1342 goto fail; 1343 } else 1344 cnx += ret; 1345 } 1346 1347 /* found states are sorted for outbound processing */ 1348 if (npols > 1) 1349 xfrm_state_sort(xfrm, tpp, cnx, family); 1350 1351 return cnx; 1352 1353 fail: 1354 for (cnx--; cnx>=0; cnx--) 1355 xfrm_state_put(tpp[cnx]); 1356 return error; 1357 1358 } 1359 1360 /* Check that the bundle accepts the flow and its components are 1361 * still valid. 1362 */ 1363 1364 static inline int xfrm_get_tos(const struct flowi *fl, int family) 1365 { 1366 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1367 int tos; 1368 1369 if (!afinfo) 1370 return -EINVAL; 1371 1372 tos = afinfo->get_tos(fl); 1373 1374 xfrm_policy_put_afinfo(afinfo); 1375 1376 return tos; 1377 } 1378 1379 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) 1380 { 1381 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1382 struct dst_entry *dst = &xdst->u.dst; 1383 1384 if (xdst->route == NULL) { 1385 /* Dummy bundle - if it has xfrms we were not 1386 * able to build bundle as template resolution failed. 1387 * It means we need to try again resolving. */ 1388 if (xdst->num_xfrms > 0) 1389 return NULL; 1390 } else if (dst->flags & DST_XFRM_QUEUE) { 1391 return NULL; 1392 } else { 1393 /* Real bundle */ 1394 if (stale_bundle(dst)) 1395 return NULL; 1396 } 1397 1398 dst_hold(dst); 1399 return flo; 1400 } 1401 1402 static int xfrm_bundle_flo_check(struct flow_cache_object *flo) 1403 { 1404 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1405 struct dst_entry *dst = &xdst->u.dst; 1406 1407 if (!xdst->route) 1408 return 0; 1409 if (stale_bundle(dst)) 1410 return 0; 1411 1412 return 1; 1413 } 1414 1415 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) 1416 { 1417 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1418 struct dst_entry *dst = &xdst->u.dst; 1419 1420 dst_free(dst); 1421 } 1422 1423 static const struct flow_cache_ops xfrm_bundle_fc_ops = { 1424 .get = xfrm_bundle_flo_get, 1425 .check = xfrm_bundle_flo_check, 1426 .delete = xfrm_bundle_flo_delete, 1427 }; 1428 1429 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) 1430 { 1431 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1432 struct dst_ops *dst_ops; 1433 struct xfrm_dst *xdst; 1434 1435 if (!afinfo) 1436 return ERR_PTR(-EINVAL); 1437 1438 switch (family) { 1439 case AF_INET: 1440 dst_ops = &net->xfrm.xfrm4_dst_ops; 1441 break; 1442 #if IS_ENABLED(CONFIG_IPV6) 1443 case AF_INET6: 1444 dst_ops = &net->xfrm.xfrm6_dst_ops; 1445 break; 1446 #endif 1447 default: 1448 BUG(); 1449 } 1450 xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); 1451 1452 if (likely(xdst)) { 1453 struct dst_entry *dst = &xdst->u.dst; 1454 1455 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); 1456 xdst->flo.ops = &xfrm_bundle_fc_ops; 1457 if (afinfo->init_dst) 1458 afinfo->init_dst(net, xdst); 1459 } else 1460 xdst = ERR_PTR(-ENOBUFS); 1461 1462 xfrm_policy_put_afinfo(afinfo); 1463 1464 return xdst; 1465 } 1466 1467 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, 1468 int nfheader_len) 1469 { 1470 struct xfrm_policy_afinfo *afinfo = 1471 xfrm_policy_get_afinfo(dst->ops->family); 1472 int err; 1473 1474 if (!afinfo) 1475 return -EINVAL; 1476 1477 err = afinfo->init_path(path, dst, nfheader_len); 1478 1479 xfrm_policy_put_afinfo(afinfo); 1480 1481 return err; 1482 } 1483 1484 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 1485 const struct flowi *fl) 1486 { 1487 struct xfrm_policy_afinfo *afinfo = 1488 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1489 int err; 1490 1491 if (!afinfo) 1492 return -EINVAL; 1493 1494 err = afinfo->fill_dst(xdst, dev, fl); 1495 1496 xfrm_policy_put_afinfo(afinfo); 1497 1498 return err; 1499 } 1500 1501 1502 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 1503 * all the metrics... Shortly, bundle a bundle. 1504 */ 1505 1506 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1507 struct xfrm_state **xfrm, int nx, 1508 const struct flowi *fl, 1509 struct dst_entry *dst) 1510 { 1511 struct net *net = xp_net(policy); 1512 unsigned long now = jiffies; 1513 struct net_device *dev; 1514 struct xfrm_mode *inner_mode; 1515 struct dst_entry *dst_prev = NULL; 1516 struct dst_entry *dst0 = NULL; 1517 int i = 0; 1518 int err; 1519 int header_len = 0; 1520 int nfheader_len = 0; 1521 int trailer_len = 0; 1522 int tos; 1523 int family = policy->selector.family; 1524 xfrm_address_t saddr, daddr; 1525 1526 xfrm_flowi_addr_get(fl, &saddr, &daddr, family); 1527 1528 tos = xfrm_get_tos(fl, family); 1529 err = tos; 1530 if (tos < 0) 1531 goto put_states; 1532 1533 dst_hold(dst); 1534 1535 for (; i < nx; i++) { 1536 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); 1537 struct dst_entry *dst1 = &xdst->u.dst; 1538 1539 err = PTR_ERR(xdst); 1540 if (IS_ERR(xdst)) { 1541 dst_release(dst); 1542 goto put_states; 1543 } 1544 1545 if (xfrm[i]->sel.family == AF_UNSPEC) { 1546 inner_mode = xfrm_ip2inner_mode(xfrm[i], 1547 xfrm_af2proto(family)); 1548 if (!inner_mode) { 1549 err = -EAFNOSUPPORT; 1550 dst_release(dst); 1551 goto put_states; 1552 } 1553 } else 1554 inner_mode = xfrm[i]->inner_mode; 1555 1556 if (!dst_prev) 1557 dst0 = dst1; 1558 else { 1559 dst_prev->child = dst_clone(dst1); 1560 dst1->flags |= DST_NOHASH; 1561 } 1562 1563 xdst->route = dst; 1564 dst_copy_metrics(dst1, dst); 1565 1566 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 1567 family = xfrm[i]->props.family; 1568 dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, 1569 family); 1570 err = PTR_ERR(dst); 1571 if (IS_ERR(dst)) 1572 goto put_states; 1573 } else 1574 dst_hold(dst); 1575 1576 dst1->xfrm = xfrm[i]; 1577 xdst->xfrm_genid = xfrm[i]->genid; 1578 1579 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1580 dst1->flags |= DST_HOST; 1581 dst1->lastuse = now; 1582 1583 dst1->input = dst_discard; 1584 dst1->output = inner_mode->afinfo->output; 1585 1586 dst1->next = dst_prev; 1587 dst_prev = dst1; 1588 1589 header_len += xfrm[i]->props.header_len; 1590 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1591 nfheader_len += xfrm[i]->props.header_len; 1592 trailer_len += xfrm[i]->props.trailer_len; 1593 } 1594 1595 dst_prev->child = dst; 1596 dst0->path = dst; 1597 1598 err = -ENODEV; 1599 dev = dst->dev; 1600 if (!dev) 1601 goto free_dst; 1602 1603 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1604 xfrm_init_pmtu(dst_prev); 1605 1606 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { 1607 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; 1608 1609 err = xfrm_fill_dst(xdst, dev, fl); 1610 if (err) 1611 goto free_dst; 1612 1613 dst_prev->header_len = header_len; 1614 dst_prev->trailer_len = trailer_len; 1615 header_len -= xdst->u.dst.xfrm->props.header_len; 1616 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1617 } 1618 1619 out: 1620 return dst0; 1621 1622 put_states: 1623 for (; i < nx; i++) 1624 xfrm_state_put(xfrm[i]); 1625 free_dst: 1626 if (dst0) 1627 dst_free(dst0); 1628 dst0 = ERR_PTR(err); 1629 goto out; 1630 } 1631 1632 static int inline 1633 xfrm_dst_alloc_copy(void **target, const void *src, int size) 1634 { 1635 if (!*target) { 1636 *target = kmalloc(size, GFP_ATOMIC); 1637 if (!*target) 1638 return -ENOMEM; 1639 } 1640 memcpy(*target, src, size); 1641 return 0; 1642 } 1643 1644 static int inline 1645 xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) 1646 { 1647 #ifdef CONFIG_XFRM_SUB_POLICY 1648 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1649 return xfrm_dst_alloc_copy((void **)&(xdst->partner), 1650 sel, sizeof(*sel)); 1651 #else 1652 return 0; 1653 #endif 1654 } 1655 1656 static int inline 1657 xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) 1658 { 1659 #ifdef CONFIG_XFRM_SUB_POLICY 1660 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1661 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); 1662 #else 1663 return 0; 1664 #endif 1665 } 1666 1667 static int xfrm_expand_policies(const struct flowi *fl, u16 family, 1668 struct xfrm_policy **pols, 1669 int *num_pols, int *num_xfrms) 1670 { 1671 int i; 1672 1673 if (*num_pols == 0 || !pols[0]) { 1674 *num_pols = 0; 1675 *num_xfrms = 0; 1676 return 0; 1677 } 1678 if (IS_ERR(pols[0])) 1679 return PTR_ERR(pols[0]); 1680 1681 *num_xfrms = pols[0]->xfrm_nr; 1682 1683 #ifdef CONFIG_XFRM_SUB_POLICY 1684 if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && 1685 pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1686 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), 1687 XFRM_POLICY_TYPE_MAIN, 1688 fl, family, 1689 XFRM_POLICY_OUT); 1690 if (pols[1]) { 1691 if (IS_ERR(pols[1])) { 1692 xfrm_pols_put(pols, *num_pols); 1693 return PTR_ERR(pols[1]); 1694 } 1695 (*num_pols) ++; 1696 (*num_xfrms) += pols[1]->xfrm_nr; 1697 } 1698 } 1699 #endif 1700 for (i = 0; i < *num_pols; i++) { 1701 if (pols[i]->action != XFRM_POLICY_ALLOW) { 1702 *num_xfrms = -1; 1703 break; 1704 } 1705 } 1706 1707 return 0; 1708 1709 } 1710 1711 static struct xfrm_dst * 1712 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, 1713 const struct flowi *fl, u16 family, 1714 struct dst_entry *dst_orig) 1715 { 1716 struct net *net = xp_net(pols[0]); 1717 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1718 struct dst_entry *dst; 1719 struct xfrm_dst *xdst; 1720 int err; 1721 1722 /* Try to instantiate a bundle */ 1723 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); 1724 if (err <= 0) { 1725 if (err != 0 && err != -EAGAIN) 1726 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1727 return ERR_PTR(err); 1728 } 1729 1730 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); 1731 if (IS_ERR(dst)) { 1732 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1733 return ERR_CAST(dst); 1734 } 1735 1736 xdst = (struct xfrm_dst *)dst; 1737 xdst->num_xfrms = err; 1738 if (num_pols > 1) 1739 err = xfrm_dst_update_parent(dst, &pols[1]->selector); 1740 else 1741 err = xfrm_dst_update_origin(dst, fl); 1742 if (unlikely(err)) { 1743 dst_free(dst); 1744 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1745 return ERR_PTR(err); 1746 } 1747 1748 xdst->num_pols = num_pols; 1749 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); 1750 xdst->policy_genid = atomic_read(&pols[0]->genid); 1751 1752 return xdst; 1753 } 1754 1755 static void xfrm_policy_queue_process(unsigned long arg) 1756 { 1757 int err = 0; 1758 struct sk_buff *skb; 1759 struct sock *sk; 1760 struct dst_entry *dst; 1761 struct net_device *dev; 1762 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1763 struct xfrm_policy_queue *pq = &pol->polq; 1764 struct flowi fl; 1765 struct sk_buff_head list; 1766 1767 spin_lock(&pq->hold_queue.lock); 1768 skb = skb_peek(&pq->hold_queue); 1769 dst = skb_dst(skb); 1770 sk = skb->sk; 1771 xfrm_decode_session(skb, &fl, dst->ops->family); 1772 spin_unlock(&pq->hold_queue.lock); 1773 1774 dst_hold(dst->path); 1775 dst = xfrm_lookup(xp_net(pol), dst->path, &fl, 1776 sk, 0); 1777 if (IS_ERR(dst)) 1778 goto purge_queue; 1779 1780 if (dst->flags & DST_XFRM_QUEUE) { 1781 dst_release(dst); 1782 1783 if (pq->timeout >= XFRM_QUEUE_TMO_MAX) 1784 goto purge_queue; 1785 1786 pq->timeout = pq->timeout << 1; 1787 mod_timer(&pq->hold_timer, jiffies + pq->timeout); 1788 return; 1789 } 1790 1791 dst_release(dst); 1792 1793 __skb_queue_head_init(&list); 1794 1795 spin_lock(&pq->hold_queue.lock); 1796 pq->timeout = 0; 1797 skb_queue_splice_init(&pq->hold_queue, &list); 1798 spin_unlock(&pq->hold_queue.lock); 1799 1800 while (!skb_queue_empty(&list)) { 1801 skb = __skb_dequeue(&list); 1802 1803 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); 1804 dst_hold(skb_dst(skb)->path); 1805 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, 1806 &fl, skb->sk, 0); 1807 if (IS_ERR(dst)) { 1808 dev_put(skb->dev); 1809 kfree_skb(skb); 1810 continue; 1811 } 1812 1813 nf_reset(skb); 1814 skb_dst_drop(skb); 1815 skb_dst_set(skb, dst); 1816 1817 dev = skb->dev; 1818 err = dst_output(skb); 1819 dev_put(dev); 1820 } 1821 1822 return; 1823 1824 purge_queue: 1825 pq->timeout = 0; 1826 xfrm_queue_purge(&pq->hold_queue); 1827 } 1828 1829 static int xdst_queue_output(struct sk_buff *skb) 1830 { 1831 unsigned long sched_next; 1832 struct dst_entry *dst = skb_dst(skb); 1833 struct xfrm_dst *xdst = (struct xfrm_dst *) dst; 1834 struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; 1835 1836 if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { 1837 kfree_skb(skb); 1838 return -EAGAIN; 1839 } 1840 1841 skb_dst_force(skb); 1842 dev_hold(skb->dev); 1843 1844 spin_lock_bh(&pq->hold_queue.lock); 1845 1846 if (!pq->timeout) 1847 pq->timeout = XFRM_QUEUE_TMO_MIN; 1848 1849 sched_next = jiffies + pq->timeout; 1850 1851 if (del_timer(&pq->hold_timer)) { 1852 if (time_before(pq->hold_timer.expires, sched_next)) 1853 sched_next = pq->hold_timer.expires; 1854 } 1855 1856 __skb_queue_tail(&pq->hold_queue, skb); 1857 mod_timer(&pq->hold_timer, sched_next); 1858 1859 spin_unlock_bh(&pq->hold_queue.lock); 1860 1861 return 0; 1862 } 1863 1864 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, 1865 struct dst_entry *dst, 1866 const struct flowi *fl, 1867 int num_xfrms, 1868 u16 family) 1869 { 1870 int err; 1871 struct net_device *dev; 1872 struct dst_entry *dst1; 1873 struct xfrm_dst *xdst; 1874 1875 xdst = xfrm_alloc_dst(net, family); 1876 if (IS_ERR(xdst)) 1877 return xdst; 1878 1879 if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || 1880 (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) 1881 return xdst; 1882 1883 dst1 = &xdst->u.dst; 1884 dst_hold(dst); 1885 xdst->route = dst; 1886 1887 dst_copy_metrics(dst1, dst); 1888 1889 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1890 dst1->flags |= DST_HOST | DST_XFRM_QUEUE; 1891 dst1->lastuse = jiffies; 1892 1893 dst1->input = dst_discard; 1894 dst1->output = xdst_queue_output; 1895 1896 dst_hold(dst); 1897 dst1->child = dst; 1898 dst1->path = dst; 1899 1900 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); 1901 1902 err = -ENODEV; 1903 dev = dst->dev; 1904 if (!dev) 1905 goto free_dst; 1906 1907 err = xfrm_fill_dst(xdst, dev, fl); 1908 if (err) 1909 goto free_dst; 1910 1911 out: 1912 return xdst; 1913 1914 free_dst: 1915 dst_release(dst1); 1916 xdst = ERR_PTR(err); 1917 goto out; 1918 } 1919 1920 static struct flow_cache_object * 1921 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, 1922 struct flow_cache_object *oldflo, void *ctx) 1923 { 1924 struct dst_entry *dst_orig = (struct dst_entry *)ctx; 1925 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1926 struct xfrm_dst *xdst, *new_xdst; 1927 int num_pols = 0, num_xfrms = 0, i, err, pol_dead; 1928 1929 /* Check if the policies from old bundle are usable */ 1930 xdst = NULL; 1931 if (oldflo) { 1932 xdst = container_of(oldflo, struct xfrm_dst, flo); 1933 num_pols = xdst->num_pols; 1934 num_xfrms = xdst->num_xfrms; 1935 pol_dead = 0; 1936 for (i = 0; i < num_pols; i++) { 1937 pols[i] = xdst->pols[i]; 1938 pol_dead |= pols[i]->walk.dead; 1939 } 1940 if (pol_dead) { 1941 dst_free(&xdst->u.dst); 1942 xdst = NULL; 1943 num_pols = 0; 1944 num_xfrms = 0; 1945 oldflo = NULL; 1946 } 1947 } 1948 1949 /* Resolve policies to use if we couldn't get them from 1950 * previous cache entry */ 1951 if (xdst == NULL) { 1952 num_pols = 1; 1953 pols[0] = __xfrm_policy_lookup(net, fl, family, 1954 flow_to_policy_dir(dir)); 1955 err = xfrm_expand_policies(fl, family, pols, 1956 &num_pols, &num_xfrms); 1957 if (err < 0) 1958 goto inc_error; 1959 if (num_pols == 0) 1960 return NULL; 1961 if (num_xfrms <= 0) 1962 goto make_dummy_bundle; 1963 } 1964 1965 new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); 1966 if (IS_ERR(new_xdst)) { 1967 err = PTR_ERR(new_xdst); 1968 if (err != -EAGAIN) 1969 goto error; 1970 if (oldflo == NULL) 1971 goto make_dummy_bundle; 1972 dst_hold(&xdst->u.dst); 1973 return oldflo; 1974 } else if (new_xdst == NULL) { 1975 num_xfrms = 0; 1976 if (oldflo == NULL) 1977 goto make_dummy_bundle; 1978 xdst->num_xfrms = 0; 1979 dst_hold(&xdst->u.dst); 1980 return oldflo; 1981 } 1982 1983 /* Kill the previous bundle */ 1984 if (xdst) { 1985 /* The policies were stolen for newly generated bundle */ 1986 xdst->num_pols = 0; 1987 dst_free(&xdst->u.dst); 1988 } 1989 1990 /* Flow cache does not have reference, it dst_free()'s, 1991 * but we do need to return one reference for original caller */ 1992 dst_hold(&new_xdst->u.dst); 1993 return &new_xdst->flo; 1994 1995 make_dummy_bundle: 1996 /* We found policies, but there's no bundles to instantiate: 1997 * either because the policy blocks, has no transformations or 1998 * we could not build template (no xfrm_states).*/ 1999 xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); 2000 if (IS_ERR(xdst)) { 2001 xfrm_pols_put(pols, num_pols); 2002 return ERR_CAST(xdst); 2003 } 2004 xdst->num_pols = num_pols; 2005 xdst->num_xfrms = num_xfrms; 2006 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); 2007 2008 dst_hold(&xdst->u.dst); 2009 return &xdst->flo; 2010 2011 inc_error: 2012 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 2013 error: 2014 if (xdst != NULL) 2015 dst_free(&xdst->u.dst); 2016 else 2017 xfrm_pols_put(pols, num_pols); 2018 return ERR_PTR(err); 2019 } 2020 2021 static struct dst_entry *make_blackhole(struct net *net, u16 family, 2022 struct dst_entry *dst_orig) 2023 { 2024 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2025 struct dst_entry *ret; 2026 2027 if (!afinfo) { 2028 dst_release(dst_orig); 2029 return ERR_PTR(-EINVAL); 2030 } else { 2031 ret = afinfo->blackhole_route(net, dst_orig); 2032 } 2033 xfrm_policy_put_afinfo(afinfo); 2034 2035 return ret; 2036 } 2037 2038 /* Main function: finds/creates a bundle for given flow. 2039 * 2040 * At the moment we eat a raw IP route. Mostly to speed up lookups 2041 * on interfaces with disabled IPsec. 2042 */ 2043 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 2044 const struct flowi *fl, 2045 struct sock *sk, int flags) 2046 { 2047 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2048 struct flow_cache_object *flo; 2049 struct xfrm_dst *xdst; 2050 struct dst_entry *dst, *route; 2051 u16 family = dst_orig->ops->family; 2052 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 2053 int i, err, num_pols, num_xfrms = 0, drop_pols = 0; 2054 2055 restart: 2056 dst = NULL; 2057 xdst = NULL; 2058 route = NULL; 2059 2060 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 2061 num_pols = 1; 2062 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 2063 err = xfrm_expand_policies(fl, family, pols, 2064 &num_pols, &num_xfrms); 2065 if (err < 0) 2066 goto dropdst; 2067 2068 if (num_pols) { 2069 if (num_xfrms <= 0) { 2070 drop_pols = num_pols; 2071 goto no_transform; 2072 } 2073 2074 xdst = xfrm_resolve_and_create_bundle( 2075 pols, num_pols, fl, 2076 family, dst_orig); 2077 if (IS_ERR(xdst)) { 2078 xfrm_pols_put(pols, num_pols); 2079 err = PTR_ERR(xdst); 2080 goto dropdst; 2081 } else if (xdst == NULL) { 2082 num_xfrms = 0; 2083 drop_pols = num_pols; 2084 goto no_transform; 2085 } 2086 2087 dst_hold(&xdst->u.dst); 2088 2089 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 2090 xdst->u.dst.next = xfrm_policy_sk_bundles; 2091 xfrm_policy_sk_bundles = &xdst->u.dst; 2092 spin_unlock_bh(&xfrm_policy_sk_bundle_lock); 2093 2094 route = xdst->route; 2095 } 2096 } 2097 2098 if (xdst == NULL) { 2099 /* To accelerate a bit... */ 2100 if ((dst_orig->flags & DST_NOXFRM) || 2101 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 2102 goto nopol; 2103 2104 flo = flow_cache_lookup(net, fl, family, dir, 2105 xfrm_bundle_lookup, dst_orig); 2106 if (flo == NULL) 2107 goto nopol; 2108 if (IS_ERR(flo)) { 2109 err = PTR_ERR(flo); 2110 goto dropdst; 2111 } 2112 xdst = container_of(flo, struct xfrm_dst, flo); 2113 2114 num_pols = xdst->num_pols; 2115 num_xfrms = xdst->num_xfrms; 2116 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); 2117 route = xdst->route; 2118 } 2119 2120 dst = &xdst->u.dst; 2121 if (route == NULL && num_xfrms > 0) { 2122 /* The only case when xfrm_bundle_lookup() returns a 2123 * bundle with null route, is when the template could 2124 * not be resolved. It means policies are there, but 2125 * bundle could not be created, since we don't yet 2126 * have the xfrm_state's. We need to wait for KM to 2127 * negotiate new SA's or bail out with error.*/ 2128 if (net->xfrm.sysctl_larval_drop) { 2129 /* EREMOTE tells the caller to generate 2130 * a one-shot blackhole route. */ 2131 dst_release(dst); 2132 xfrm_pols_put(pols, drop_pols); 2133 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2134 2135 return make_blackhole(net, family, dst_orig); 2136 } 2137 if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { 2138 DECLARE_WAITQUEUE(wait, current); 2139 2140 add_wait_queue(&net->xfrm.km_waitq, &wait); 2141 set_current_state(TASK_INTERRUPTIBLE); 2142 schedule(); 2143 set_current_state(TASK_RUNNING); 2144 remove_wait_queue(&net->xfrm.km_waitq, &wait); 2145 2146 if (!signal_pending(current)) { 2147 dst_release(dst); 2148 goto restart; 2149 } 2150 2151 err = -ERESTART; 2152 } else 2153 err = -EAGAIN; 2154 2155 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2156 goto error; 2157 } 2158 2159 no_transform: 2160 if (num_pols == 0) 2161 goto nopol; 2162 2163 if ((flags & XFRM_LOOKUP_ICMP) && 2164 !(pols[0]->flags & XFRM_POLICY_ICMP)) { 2165 err = -ENOENT; 2166 goto error; 2167 } 2168 2169 for (i = 0; i < num_pols; i++) 2170 pols[i]->curlft.use_time = get_seconds(); 2171 2172 if (num_xfrms < 0) { 2173 /* Prohibit the flow */ 2174 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); 2175 err = -EPERM; 2176 goto error; 2177 } else if (num_xfrms > 0) { 2178 /* Flow transformed */ 2179 dst_release(dst_orig); 2180 } else { 2181 /* Flow passes untransformed */ 2182 dst_release(dst); 2183 dst = dst_orig; 2184 } 2185 ok: 2186 xfrm_pols_put(pols, drop_pols); 2187 if (dst && dst->xfrm && 2188 dst->xfrm->props.mode == XFRM_MODE_TUNNEL) 2189 dst->flags |= DST_XFRM_TUNNEL; 2190 return dst; 2191 2192 nopol: 2193 if (!(flags & XFRM_LOOKUP_ICMP)) { 2194 dst = dst_orig; 2195 goto ok; 2196 } 2197 err = -ENOENT; 2198 error: 2199 dst_release(dst); 2200 dropdst: 2201 dst_release(dst_orig); 2202 xfrm_pols_put(pols, drop_pols); 2203 return ERR_PTR(err); 2204 } 2205 EXPORT_SYMBOL(xfrm_lookup); 2206 2207 static inline int 2208 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) 2209 { 2210 struct xfrm_state *x; 2211 2212 if (!skb->sp || idx < 0 || idx >= skb->sp->len) 2213 return 0; 2214 x = skb->sp->xvec[idx]; 2215 if (!x->type->reject) 2216 return 0; 2217 return x->type->reject(x, skb, fl); 2218 } 2219 2220 /* When skb is transformed back to its "native" form, we have to 2221 * check policy restrictions. At the moment we make this in maximally 2222 * stupid way. Shame on me. :-) Of course, connected sockets must 2223 * have policy cached at them. 2224 */ 2225 2226 static inline int 2227 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, 2228 unsigned short family) 2229 { 2230 if (xfrm_state_kern(x)) 2231 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); 2232 return x->id.proto == tmpl->id.proto && 2233 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 2234 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 2235 x->props.mode == tmpl->mode && 2236 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || 2237 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && 2238 !(x->props.mode != XFRM_MODE_TRANSPORT && 2239 xfrm_state_addr_cmp(tmpl, x, family)); 2240 } 2241 2242 /* 2243 * 0 or more than 0 is returned when validation is succeeded (either bypass 2244 * because of optional transport mode, or next index of the mathced secpath 2245 * state with the template. 2246 * -1 is returned when no matching template is found. 2247 * Otherwise "-2 - errored_index" is returned. 2248 */ 2249 static inline int 2250 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, 2251 unsigned short family) 2252 { 2253 int idx = start; 2254 2255 if (tmpl->optional) { 2256 if (tmpl->mode == XFRM_MODE_TRANSPORT) 2257 return start; 2258 } else 2259 start = -1; 2260 for (; idx < sp->len; idx++) { 2261 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 2262 return ++idx; 2263 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { 2264 if (start == -1) 2265 start = -2-idx; 2266 break; 2267 } 2268 } 2269 return start; 2270 } 2271 2272 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, 2273 unsigned int family, int reverse) 2274 { 2275 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2276 int err; 2277 2278 if (unlikely(afinfo == NULL)) 2279 return -EAFNOSUPPORT; 2280 2281 afinfo->decode_session(skb, fl, reverse); 2282 err = security_xfrm_decode_session(skb, &fl->flowi_secid); 2283 xfrm_policy_put_afinfo(afinfo); 2284 return err; 2285 } 2286 EXPORT_SYMBOL(__xfrm_decode_session); 2287 2288 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) 2289 { 2290 for (; k < sp->len; k++) { 2291 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 2292 *idxp = k; 2293 return 1; 2294 } 2295 } 2296 2297 return 0; 2298 } 2299 2300 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 2301 unsigned short family) 2302 { 2303 struct net *net = dev_net(skb->dev); 2304 struct xfrm_policy *pol; 2305 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2306 int npols = 0; 2307 int xfrm_nr; 2308 int pi; 2309 int reverse; 2310 struct flowi fl; 2311 u8 fl_dir; 2312 int xerr_idx = -1; 2313 2314 reverse = dir & ~XFRM_POLICY_MASK; 2315 dir &= XFRM_POLICY_MASK; 2316 fl_dir = policy_to_flow_dir(dir); 2317 2318 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { 2319 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 2320 return 0; 2321 } 2322 2323 nf_nat_decode_session(skb, &fl, family); 2324 2325 /* First, check used SA against their selectors. */ 2326 if (skb->sp) { 2327 int i; 2328 2329 for (i=skb->sp->len-1; i>=0; i--) { 2330 struct xfrm_state *x = skb->sp->xvec[i]; 2331 if (!xfrm_selector_match(&x->sel, &fl, family)) { 2332 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 2333 return 0; 2334 } 2335 } 2336 } 2337 2338 pol = NULL; 2339 if (sk && sk->sk_policy[dir]) { 2340 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 2341 if (IS_ERR(pol)) { 2342 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2343 return 0; 2344 } 2345 } 2346 2347 if (!pol) { 2348 struct flow_cache_object *flo; 2349 2350 flo = flow_cache_lookup(net, &fl, family, fl_dir, 2351 xfrm_policy_lookup, NULL); 2352 if (IS_ERR_OR_NULL(flo)) 2353 pol = ERR_CAST(flo); 2354 else 2355 pol = container_of(flo, struct xfrm_policy, flo); 2356 } 2357 2358 if (IS_ERR(pol)) { 2359 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2360 return 0; 2361 } 2362 2363 if (!pol) { 2364 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 2365 xfrm_secpath_reject(xerr_idx, skb, &fl); 2366 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); 2367 return 0; 2368 } 2369 return 1; 2370 } 2371 2372 pol->curlft.use_time = get_seconds(); 2373 2374 pols[0] = pol; 2375 npols ++; 2376 #ifdef CONFIG_XFRM_SUB_POLICY 2377 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 2378 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, 2379 &fl, family, 2380 XFRM_POLICY_IN); 2381 if (pols[1]) { 2382 if (IS_ERR(pols[1])) { 2383 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2384 return 0; 2385 } 2386 pols[1]->curlft.use_time = get_seconds(); 2387 npols ++; 2388 } 2389 } 2390 #endif 2391 2392 if (pol->action == XFRM_POLICY_ALLOW) { 2393 struct sec_path *sp; 2394 static struct sec_path dummy; 2395 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; 2396 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; 2397 struct xfrm_tmpl **tpp = tp; 2398 int ti = 0; 2399 int i, k; 2400 2401 if ((sp = skb->sp) == NULL) 2402 sp = &dummy; 2403 2404 for (pi = 0; pi < npols; pi++) { 2405 if (pols[pi] != pol && 2406 pols[pi]->action != XFRM_POLICY_ALLOW) { 2407 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2408 goto reject; 2409 } 2410 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { 2411 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 2412 goto reject_error; 2413 } 2414 for (i = 0; i < pols[pi]->xfrm_nr; i++) 2415 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 2416 } 2417 xfrm_nr = ti; 2418 if (npols > 1) { 2419 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); 2420 tpp = stp; 2421 } 2422 2423 /* For each tunnel xfrm, find the first matching tmpl. 2424 * For each tmpl before that, find corresponding xfrm. 2425 * Order is _important_. Later we will implement 2426 * some barriers, but at the moment barriers 2427 * are implied between each two transformations. 2428 */ 2429 for (i = xfrm_nr-1, k = 0; i >= 0; i--) { 2430 k = xfrm_policy_ok(tpp[i], sp, k, family); 2431 if (k < 0) { 2432 if (k < -1) 2433 /* "-2 - errored_index" returned */ 2434 xerr_idx = -(2+k); 2435 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2436 goto reject; 2437 } 2438 } 2439 2440 if (secpath_has_nontransport(sp, k, &xerr_idx)) { 2441 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2442 goto reject; 2443 } 2444 2445 xfrm_pols_put(pols, npols); 2446 return 1; 2447 } 2448 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2449 2450 reject: 2451 xfrm_secpath_reject(xerr_idx, skb, &fl); 2452 reject_error: 2453 xfrm_pols_put(pols, npols); 2454 return 0; 2455 } 2456 EXPORT_SYMBOL(__xfrm_policy_check); 2457 2458 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 2459 { 2460 struct net *net = dev_net(skb->dev); 2461 struct flowi fl; 2462 struct dst_entry *dst; 2463 int res = 1; 2464 2465 if (xfrm_decode_session(skb, &fl, family) < 0) { 2466 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); 2467 return 0; 2468 } 2469 2470 skb_dst_force(skb); 2471 2472 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); 2473 if (IS_ERR(dst)) { 2474 res = 0; 2475 dst = NULL; 2476 } 2477 skb_dst_set(skb, dst); 2478 return res; 2479 } 2480 EXPORT_SYMBOL(__xfrm_route_forward); 2481 2482 /* Optimize later using cookies and generation ids. */ 2483 2484 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 2485 { 2486 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete 2487 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to 2488 * get validated by dst_ops->check on every use. We do this 2489 * because when a normal route referenced by an XFRM dst is 2490 * obsoleted we do not go looking around for all parent 2491 * referencing XFRM dsts so that we can invalidate them. It 2492 * is just too much work. Instead we make the checks here on 2493 * every use. For example: 2494 * 2495 * XFRM dst A --> IPv4 dst X 2496 * 2497 * X is the "xdst->route" of A (X is also the "dst->path" of A 2498 * in this example). If X is marked obsolete, "A" will not 2499 * notice. That's what we are validating here via the 2500 * stale_bundle() check. 2501 * 2502 * When a policy's bundle is pruned, we dst_free() the XFRM 2503 * dst which causes it's ->obsolete field to be set to 2504 * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like 2505 * this, we want to force a new route lookup. 2506 */ 2507 if (dst->obsolete < 0 && !stale_bundle(dst)) 2508 return dst; 2509 2510 return NULL; 2511 } 2512 2513 static int stale_bundle(struct dst_entry *dst) 2514 { 2515 return !xfrm_bundle_ok((struct xfrm_dst *)dst); 2516 } 2517 2518 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2519 { 2520 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2521 dst->dev = dev_net(dev)->loopback_dev; 2522 dev_hold(dst->dev); 2523 dev_put(dev); 2524 } 2525 } 2526 EXPORT_SYMBOL(xfrm_dst_ifdown); 2527 2528 static void xfrm_link_failure(struct sk_buff *skb) 2529 { 2530 /* Impossible. Such dst must be popped before reaches point of failure. */ 2531 } 2532 2533 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2534 { 2535 if (dst) { 2536 if (dst->obsolete) { 2537 dst_release(dst); 2538 dst = NULL; 2539 } 2540 } 2541 return dst; 2542 } 2543 2544 static void __xfrm_garbage_collect(struct net *net) 2545 { 2546 struct dst_entry *head, *next; 2547 2548 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 2549 head = xfrm_policy_sk_bundles; 2550 xfrm_policy_sk_bundles = NULL; 2551 spin_unlock_bh(&xfrm_policy_sk_bundle_lock); 2552 2553 while (head) { 2554 next = head->next; 2555 dst_free(head); 2556 head = next; 2557 } 2558 } 2559 2560 void xfrm_garbage_collect(struct net *net) 2561 { 2562 flow_cache_flush(); 2563 __xfrm_garbage_collect(net); 2564 } 2565 EXPORT_SYMBOL(xfrm_garbage_collect); 2566 2567 static void xfrm_garbage_collect_deferred(struct net *net) 2568 { 2569 flow_cache_flush_deferred(); 2570 __xfrm_garbage_collect(net); 2571 } 2572 2573 static void xfrm_init_pmtu(struct dst_entry *dst) 2574 { 2575 do { 2576 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2577 u32 pmtu, route_mtu_cached; 2578 2579 pmtu = dst_mtu(dst->child); 2580 xdst->child_mtu_cached = pmtu; 2581 2582 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2583 2584 route_mtu_cached = dst_mtu(xdst->route); 2585 xdst->route_mtu_cached = route_mtu_cached; 2586 2587 if (pmtu > route_mtu_cached) 2588 pmtu = route_mtu_cached; 2589 2590 dst_metric_set(dst, RTAX_MTU, pmtu); 2591 } while ((dst = dst->next)); 2592 } 2593 2594 /* Check that the bundle accepts the flow and its components are 2595 * still valid. 2596 */ 2597 2598 static int xfrm_bundle_ok(struct xfrm_dst *first) 2599 { 2600 struct dst_entry *dst = &first->u.dst; 2601 struct xfrm_dst *last; 2602 u32 mtu; 2603 2604 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2605 (dst->dev && !netif_running(dst->dev))) 2606 return 0; 2607 2608 if (dst->flags & DST_XFRM_QUEUE) 2609 return 1; 2610 2611 last = NULL; 2612 2613 do { 2614 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2615 2616 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2617 return 0; 2618 if (xdst->xfrm_genid != dst->xfrm->genid) 2619 return 0; 2620 if (xdst->num_pols > 0 && 2621 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2622 return 0; 2623 2624 mtu = dst_mtu(dst->child); 2625 if (xdst->child_mtu_cached != mtu) { 2626 last = xdst; 2627 xdst->child_mtu_cached = mtu; 2628 } 2629 2630 if (!dst_check(xdst->route, xdst->route_cookie)) 2631 return 0; 2632 mtu = dst_mtu(xdst->route); 2633 if (xdst->route_mtu_cached != mtu) { 2634 last = xdst; 2635 xdst->route_mtu_cached = mtu; 2636 } 2637 2638 dst = dst->child; 2639 } while (dst->xfrm); 2640 2641 if (likely(!last)) 2642 return 1; 2643 2644 mtu = last->child_mtu_cached; 2645 for (;;) { 2646 dst = &last->u.dst; 2647 2648 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2649 if (mtu > last->route_mtu_cached) 2650 mtu = last->route_mtu_cached; 2651 dst_metric_set(dst, RTAX_MTU, mtu); 2652 2653 if (last == first) 2654 break; 2655 2656 last = (struct xfrm_dst *)last->u.dst.next; 2657 last->child_mtu_cached = mtu; 2658 } 2659 2660 return 1; 2661 } 2662 2663 static unsigned int xfrm_default_advmss(const struct dst_entry *dst) 2664 { 2665 return dst_metric_advmss(dst->path); 2666 } 2667 2668 static unsigned int xfrm_mtu(const struct dst_entry *dst) 2669 { 2670 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2671 2672 return mtu ? : dst_mtu(dst->path); 2673 } 2674 2675 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, 2676 struct sk_buff *skb, 2677 const void *daddr) 2678 { 2679 return dst->path->ops->neigh_lookup(dst, skb, daddr); 2680 } 2681 2682 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2683 { 2684 struct net *net; 2685 int err = 0; 2686 if (unlikely(afinfo == NULL)) 2687 return -EINVAL; 2688 if (unlikely(afinfo->family >= NPROTO)) 2689 return -EAFNOSUPPORT; 2690 spin_lock(&xfrm_policy_afinfo_lock); 2691 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2692 err = -ENOBUFS; 2693 else { 2694 struct dst_ops *dst_ops = afinfo->dst_ops; 2695 if (likely(dst_ops->kmem_cachep == NULL)) 2696 dst_ops->kmem_cachep = xfrm_dst_cache; 2697 if (likely(dst_ops->check == NULL)) 2698 dst_ops->check = xfrm_dst_check; 2699 if (likely(dst_ops->default_advmss == NULL)) 2700 dst_ops->default_advmss = xfrm_default_advmss; 2701 if (likely(dst_ops->mtu == NULL)) 2702 dst_ops->mtu = xfrm_mtu; 2703 if (likely(dst_ops->negative_advice == NULL)) 2704 dst_ops->negative_advice = xfrm_negative_advice; 2705 if (likely(dst_ops->link_failure == NULL)) 2706 dst_ops->link_failure = xfrm_link_failure; 2707 if (likely(dst_ops->neigh_lookup == NULL)) 2708 dst_ops->neigh_lookup = xfrm_neigh_lookup; 2709 if (likely(afinfo->garbage_collect == NULL)) 2710 afinfo->garbage_collect = xfrm_garbage_collect_deferred; 2711 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); 2712 } 2713 spin_unlock(&xfrm_policy_afinfo_lock); 2714 2715 rtnl_lock(); 2716 for_each_net(net) { 2717 struct dst_ops *xfrm_dst_ops; 2718 2719 switch (afinfo->family) { 2720 case AF_INET: 2721 xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; 2722 break; 2723 #if IS_ENABLED(CONFIG_IPV6) 2724 case AF_INET6: 2725 xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; 2726 break; 2727 #endif 2728 default: 2729 BUG(); 2730 } 2731 *xfrm_dst_ops = *afinfo->dst_ops; 2732 } 2733 rtnl_unlock(); 2734 2735 return err; 2736 } 2737 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 2738 2739 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 2740 { 2741 int err = 0; 2742 if (unlikely(afinfo == NULL)) 2743 return -EINVAL; 2744 if (unlikely(afinfo->family >= NPROTO)) 2745 return -EAFNOSUPPORT; 2746 spin_lock(&xfrm_policy_afinfo_lock); 2747 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 2748 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 2749 err = -EINVAL; 2750 else 2751 RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], 2752 NULL); 2753 } 2754 spin_unlock(&xfrm_policy_afinfo_lock); 2755 if (!err) { 2756 struct dst_ops *dst_ops = afinfo->dst_ops; 2757 2758 synchronize_rcu(); 2759 2760 dst_ops->kmem_cachep = NULL; 2761 dst_ops->check = NULL; 2762 dst_ops->negative_advice = NULL; 2763 dst_ops->link_failure = NULL; 2764 afinfo->garbage_collect = NULL; 2765 } 2766 return err; 2767 } 2768 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 2769 2770 static void __net_init xfrm_dst_ops_init(struct net *net) 2771 { 2772 struct xfrm_policy_afinfo *afinfo; 2773 2774 rcu_read_lock(); 2775 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); 2776 if (afinfo) 2777 net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; 2778 #if IS_ENABLED(CONFIG_IPV6) 2779 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); 2780 if (afinfo) 2781 net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; 2782 #endif 2783 rcu_read_unlock(); 2784 } 2785 2786 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2787 { 2788 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2789 2790 switch (event) { 2791 case NETDEV_DOWN: 2792 xfrm_garbage_collect(dev_net(dev)); 2793 } 2794 return NOTIFY_DONE; 2795 } 2796 2797 static struct notifier_block xfrm_dev_notifier = { 2798 .notifier_call = xfrm_dev_event, 2799 }; 2800 2801 #ifdef CONFIG_XFRM_STATISTICS 2802 static int __net_init xfrm_statistics_init(struct net *net) 2803 { 2804 int rv; 2805 2806 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, 2807 sizeof(struct linux_xfrm_mib), 2808 __alignof__(struct linux_xfrm_mib)) < 0) 2809 return -ENOMEM; 2810 rv = xfrm_proc_init(net); 2811 if (rv < 0) 2812 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2813 return rv; 2814 } 2815 2816 static void xfrm_statistics_fini(struct net *net) 2817 { 2818 xfrm_proc_fini(net); 2819 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2820 } 2821 #else 2822 static int __net_init xfrm_statistics_init(struct net *net) 2823 { 2824 return 0; 2825 } 2826 2827 static void xfrm_statistics_fini(struct net *net) 2828 { 2829 } 2830 #endif 2831 2832 static int __net_init xfrm_policy_init(struct net *net) 2833 { 2834 unsigned int hmask, sz; 2835 int dir; 2836 2837 if (net_eq(net, &init_net)) 2838 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2839 sizeof(struct xfrm_dst), 2840 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2841 NULL); 2842 2843 hmask = 8 - 1; 2844 sz = (hmask+1) * sizeof(struct hlist_head); 2845 2846 net->xfrm.policy_byidx = xfrm_hash_alloc(sz); 2847 if (!net->xfrm.policy_byidx) 2848 goto out_byidx; 2849 net->xfrm.policy_idx_hmask = hmask; 2850 2851 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2852 struct xfrm_policy_hash *htab; 2853 2854 net->xfrm.policy_count[dir] = 0; 2855 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 2856 2857 htab = &net->xfrm.policy_bydst[dir]; 2858 htab->table = xfrm_hash_alloc(sz); 2859 if (!htab->table) 2860 goto out_bydst; 2861 htab->hmask = hmask; 2862 } 2863 2864 INIT_LIST_HEAD(&net->xfrm.policy_all); 2865 INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); 2866 if (net_eq(net, &init_net)) 2867 register_netdevice_notifier(&xfrm_dev_notifier); 2868 return 0; 2869 2870 out_bydst: 2871 for (dir--; dir >= 0; dir--) { 2872 struct xfrm_policy_hash *htab; 2873 2874 htab = &net->xfrm.policy_bydst[dir]; 2875 xfrm_hash_free(htab->table, sz); 2876 } 2877 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2878 out_byidx: 2879 return -ENOMEM; 2880 } 2881 2882 static void xfrm_policy_fini(struct net *net) 2883 { 2884 struct xfrm_audit audit_info; 2885 unsigned int sz; 2886 int dir; 2887 2888 flush_work(&net->xfrm.policy_hash_work); 2889 #ifdef CONFIG_XFRM_SUB_POLICY 2890 audit_info.loginuid = INVALID_UID; 2891 audit_info.sessionid = -1; 2892 audit_info.secid = 0; 2893 xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); 2894 #endif 2895 audit_info.loginuid = INVALID_UID; 2896 audit_info.sessionid = -1; 2897 audit_info.secid = 0; 2898 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); 2899 2900 WARN_ON(!list_empty(&net->xfrm.policy_all)); 2901 2902 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2903 struct xfrm_policy_hash *htab; 2904 2905 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); 2906 2907 htab = &net->xfrm.policy_bydst[dir]; 2908 sz = (htab->hmask + 1) * sizeof(struct hlist_head); 2909 WARN_ON(!hlist_empty(htab->table)); 2910 xfrm_hash_free(htab->table, sz); 2911 } 2912 2913 sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); 2914 WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); 2915 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2916 } 2917 2918 static int __net_init xfrm_net_init(struct net *net) 2919 { 2920 int rv; 2921 2922 rv = xfrm_statistics_init(net); 2923 if (rv < 0) 2924 goto out_statistics; 2925 rv = xfrm_state_init(net); 2926 if (rv < 0) 2927 goto out_state; 2928 rv = xfrm_policy_init(net); 2929 if (rv < 0) 2930 goto out_policy; 2931 xfrm_dst_ops_init(net); 2932 rv = xfrm_sysctl_init(net); 2933 if (rv < 0) 2934 goto out_sysctl; 2935 return 0; 2936 2937 out_sysctl: 2938 xfrm_policy_fini(net); 2939 out_policy: 2940 xfrm_state_fini(net); 2941 out_state: 2942 xfrm_statistics_fini(net); 2943 out_statistics: 2944 return rv; 2945 } 2946 2947 static void __net_exit xfrm_net_exit(struct net *net) 2948 { 2949 xfrm_sysctl_fini(net); 2950 xfrm_policy_fini(net); 2951 xfrm_state_fini(net); 2952 xfrm_statistics_fini(net); 2953 } 2954 2955 static struct pernet_operations __net_initdata xfrm_net_ops = { 2956 .init = xfrm_net_init, 2957 .exit = xfrm_net_exit, 2958 }; 2959 2960 void __init xfrm_init(void) 2961 { 2962 register_pernet_subsys(&xfrm_net_ops); 2963 xfrm_input_init(); 2964 } 2965 2966 #ifdef CONFIG_AUDITSYSCALL 2967 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2968 struct audit_buffer *audit_buf) 2969 { 2970 struct xfrm_sec_ctx *ctx = xp->security; 2971 struct xfrm_selector *sel = &xp->selector; 2972 2973 if (ctx) 2974 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2975 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 2976 2977 switch(sel->family) { 2978 case AF_INET: 2979 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); 2980 if (sel->prefixlen_s != 32) 2981 audit_log_format(audit_buf, " src_prefixlen=%d", 2982 sel->prefixlen_s); 2983 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); 2984 if (sel->prefixlen_d != 32) 2985 audit_log_format(audit_buf, " dst_prefixlen=%d", 2986 sel->prefixlen_d); 2987 break; 2988 case AF_INET6: 2989 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); 2990 if (sel->prefixlen_s != 128) 2991 audit_log_format(audit_buf, " src_prefixlen=%d", 2992 sel->prefixlen_s); 2993 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); 2994 if (sel->prefixlen_d != 128) 2995 audit_log_format(audit_buf, " dst_prefixlen=%d", 2996 sel->prefixlen_d); 2997 break; 2998 } 2999 } 3000 3001 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, 3002 kuid_t auid, u32 sessionid, u32 secid) 3003 { 3004 struct audit_buffer *audit_buf; 3005 3006 audit_buf = xfrm_audit_start("SPD-add"); 3007 if (audit_buf == NULL) 3008 return; 3009 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 3010 audit_log_format(audit_buf, " res=%u", result); 3011 xfrm_audit_common_policyinfo(xp, audit_buf); 3012 audit_log_end(audit_buf); 3013 } 3014 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 3015 3016 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, 3017 kuid_t auid, u32 sessionid, u32 secid) 3018 { 3019 struct audit_buffer *audit_buf; 3020 3021 audit_buf = xfrm_audit_start("SPD-delete"); 3022 if (audit_buf == NULL) 3023 return; 3024 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 3025 audit_log_format(audit_buf, " res=%u", result); 3026 xfrm_audit_common_policyinfo(xp, audit_buf); 3027 audit_log_end(audit_buf); 3028 } 3029 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); 3030 #endif 3031 3032 #ifdef CONFIG_XFRM_MIGRATE 3033 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, 3034 const struct xfrm_selector *sel_tgt) 3035 { 3036 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 3037 if (sel_tgt->family == sel_cmp->family && 3038 xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, 3039 sel_cmp->family) && 3040 xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, 3041 sel_cmp->family) && 3042 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && 3043 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { 3044 return true; 3045 } 3046 } else { 3047 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { 3048 return true; 3049 } 3050 } 3051 return false; 3052 } 3053 3054 static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel, 3055 u8 dir, u8 type) 3056 { 3057 struct xfrm_policy *pol, *ret = NULL; 3058 struct hlist_head *chain; 3059 u32 priority = ~0U; 3060 3061 read_lock_bh(&xfrm_policy_lock); 3062 chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); 3063 hlist_for_each_entry(pol, chain, bydst) { 3064 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3065 pol->type == type) { 3066 ret = pol; 3067 priority = ret->priority; 3068 break; 3069 } 3070 } 3071 chain = &init_net.xfrm.policy_inexact[dir]; 3072 hlist_for_each_entry(pol, chain, bydst) { 3073 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3074 pol->type == type && 3075 pol->priority < priority) { 3076 ret = pol; 3077 break; 3078 } 3079 } 3080 3081 if (ret) 3082 xfrm_pol_hold(ret); 3083 3084 read_unlock_bh(&xfrm_policy_lock); 3085 3086 return ret; 3087 } 3088 3089 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) 3090 { 3091 int match = 0; 3092 3093 if (t->mode == m->mode && t->id.proto == m->proto && 3094 (m->reqid == 0 || t->reqid == m->reqid)) { 3095 switch (t->mode) { 3096 case XFRM_MODE_TUNNEL: 3097 case XFRM_MODE_BEET: 3098 if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, 3099 m->old_family) && 3100 xfrm_addr_equal(&t->saddr, &m->old_saddr, 3101 m->old_family)) { 3102 match = 1; 3103 } 3104 break; 3105 case XFRM_MODE_TRANSPORT: 3106 /* in case of transport mode, template does not store 3107 any IP addresses, hence we just compare mode and 3108 protocol */ 3109 match = 1; 3110 break; 3111 default: 3112 break; 3113 } 3114 } 3115 return match; 3116 } 3117 3118 /* update endpoint address(es) of template(s) */ 3119 static int xfrm_policy_migrate(struct xfrm_policy *pol, 3120 struct xfrm_migrate *m, int num_migrate) 3121 { 3122 struct xfrm_migrate *mp; 3123 int i, j, n = 0; 3124 3125 write_lock_bh(&pol->lock); 3126 if (unlikely(pol->walk.dead)) { 3127 /* target policy has been deleted */ 3128 write_unlock_bh(&pol->lock); 3129 return -ENOENT; 3130 } 3131 3132 for (i = 0; i < pol->xfrm_nr; i++) { 3133 for (j = 0, mp = m; j < num_migrate; j++, mp++) { 3134 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) 3135 continue; 3136 n++; 3137 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && 3138 pol->xfrm_vec[i].mode != XFRM_MODE_BEET) 3139 continue; 3140 /* update endpoints */ 3141 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, 3142 sizeof(pol->xfrm_vec[i].id.daddr)); 3143 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, 3144 sizeof(pol->xfrm_vec[i].saddr)); 3145 pol->xfrm_vec[i].encap_family = mp->new_family; 3146 /* flush bundles */ 3147 atomic_inc(&pol->genid); 3148 } 3149 } 3150 3151 write_unlock_bh(&pol->lock); 3152 3153 if (!n) 3154 return -ENODATA; 3155 3156 return 0; 3157 } 3158 3159 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) 3160 { 3161 int i, j; 3162 3163 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) 3164 return -EINVAL; 3165 3166 for (i = 0; i < num_migrate; i++) { 3167 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, 3168 m[i].old_family) && 3169 xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, 3170 m[i].old_family)) 3171 return -EINVAL; 3172 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || 3173 xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) 3174 return -EINVAL; 3175 3176 /* check if there is any duplicated entry */ 3177 for (j = i + 1; j < num_migrate; j++) { 3178 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, 3179 sizeof(m[i].old_daddr)) && 3180 !memcmp(&m[i].old_saddr, &m[j].old_saddr, 3181 sizeof(m[i].old_saddr)) && 3182 m[i].proto == m[j].proto && 3183 m[i].mode == m[j].mode && 3184 m[i].reqid == m[j].reqid && 3185 m[i].old_family == m[j].old_family) 3186 return -EINVAL; 3187 } 3188 } 3189 3190 return 0; 3191 } 3192 3193 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, 3194 struct xfrm_migrate *m, int num_migrate, 3195 struct xfrm_kmaddress *k) 3196 { 3197 int i, err, nx_cur = 0, nx_new = 0; 3198 struct xfrm_policy *pol = NULL; 3199 struct xfrm_state *x, *xc; 3200 struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; 3201 struct xfrm_state *x_new[XFRM_MAX_DEPTH]; 3202 struct xfrm_migrate *mp; 3203 3204 if ((err = xfrm_migrate_check(m, num_migrate)) < 0) 3205 goto out; 3206 3207 /* Stage 1 - find policy */ 3208 if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { 3209 err = -ENOENT; 3210 goto out; 3211 } 3212 3213 /* Stage 2 - find and update state(s) */ 3214 for (i = 0, mp = m; i < num_migrate; i++, mp++) { 3215 if ((x = xfrm_migrate_state_find(mp))) { 3216 x_cur[nx_cur] = x; 3217 nx_cur++; 3218 if ((xc = xfrm_state_migrate(x, mp))) { 3219 x_new[nx_new] = xc; 3220 nx_new++; 3221 } else { 3222 err = -ENODATA; 3223 goto restore_state; 3224 } 3225 } 3226 } 3227 3228 /* Stage 3 - update policy */ 3229 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) 3230 goto restore_state; 3231 3232 /* Stage 4 - delete old state(s) */ 3233 if (nx_cur) { 3234 xfrm_states_put(x_cur, nx_cur); 3235 xfrm_states_delete(x_cur, nx_cur); 3236 } 3237 3238 /* Stage 5 - announce */ 3239 km_migrate(sel, dir, type, m, num_migrate, k); 3240 3241 xfrm_pol_put(pol); 3242 3243 return 0; 3244 out: 3245 return err; 3246 3247 restore_state: 3248 if (pol) 3249 xfrm_pol_put(pol); 3250 if (nx_cur) 3251 xfrm_states_put(x_cur, nx_cur); 3252 if (nx_new) 3253 xfrm_states_delete(x_new, nx_new); 3254 3255 return err; 3256 } 3257 EXPORT_SYMBOL(xfrm_migrate); 3258 #endif 3259