1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/kmod.h> 19 #include <linux/list.h> 20 #include <linux/spinlock.h> 21 #include <linux/workqueue.h> 22 #include <linux/notifier.h> 23 #include <linux/netdevice.h> 24 #include <linux/netfilter.h> 25 #include <linux/module.h> 26 #include <linux/cache.h> 27 #include <linux/audit.h> 28 #include <net/dst.h> 29 #include <net/flow.h> 30 #include <net/xfrm.h> 31 #include <net/ip.h> 32 #ifdef CONFIG_XFRM_STATISTICS 33 #include <net/snmp.h> 34 #endif 35 36 #include "xfrm_hash.h" 37 38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) 39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) 40 #define XFRM_MAX_QUEUE_LEN 100 41 42 DEFINE_MUTEX(xfrm_cfg_mutex); 43 EXPORT_SYMBOL(xfrm_cfg_mutex); 44 45 static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); 46 static struct dst_entry *xfrm_policy_sk_bundles; 47 static DEFINE_RWLOCK(xfrm_policy_lock); 48 49 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); 50 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] 51 __read_mostly; 52 53 static struct kmem_cache *xfrm_dst_cache __read_mostly; 54 55 static void xfrm_init_pmtu(struct dst_entry *dst); 56 static int stale_bundle(struct dst_entry *dst); 57 static int xfrm_bundle_ok(struct xfrm_dst *xdst); 58 static void xfrm_policy_queue_process(unsigned long arg); 59 60 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 61 int dir); 62 63 static inline bool 64 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 65 { 66 const struct flowi4 *fl4 = &fl->u.ip4; 67 68 return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && 69 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && 70 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && 71 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && 72 (fl4->flowi4_proto == sel->proto || !sel->proto) && 73 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); 74 } 75 76 static inline bool 77 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 78 { 79 const struct flowi6 *fl6 = &fl->u.ip6; 80 81 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && 82 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && 83 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && 84 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && 85 (fl6->flowi6_proto == sel->proto || !sel->proto) && 86 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); 87 } 88 89 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, 90 unsigned short family) 91 { 92 switch (family) { 93 case AF_INET: 94 return __xfrm4_selector_match(sel, fl); 95 case AF_INET6: 96 return __xfrm6_selector_match(sel, fl); 97 } 98 return false; 99 } 100 101 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 102 { 103 struct xfrm_policy_afinfo *afinfo; 104 105 if (unlikely(family >= NPROTO)) 106 return NULL; 107 rcu_read_lock(); 108 afinfo = rcu_dereference(xfrm_policy_afinfo[family]); 109 if (unlikely(!afinfo)) 110 rcu_read_unlock(); 111 return afinfo; 112 } 113 114 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 115 { 116 rcu_read_unlock(); 117 } 118 119 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, 120 const xfrm_address_t *saddr, 121 const xfrm_address_t *daddr, 122 int family) 123 { 124 struct xfrm_policy_afinfo *afinfo; 125 struct dst_entry *dst; 126 127 afinfo = xfrm_policy_get_afinfo(family); 128 if (unlikely(afinfo == NULL)) 129 return ERR_PTR(-EAFNOSUPPORT); 130 131 dst = afinfo->dst_lookup(net, tos, saddr, daddr); 132 133 xfrm_policy_put_afinfo(afinfo); 134 135 return dst; 136 } 137 138 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, 139 xfrm_address_t *prev_saddr, 140 xfrm_address_t *prev_daddr, 141 int family) 142 { 143 struct net *net = xs_net(x); 144 xfrm_address_t *saddr = &x->props.saddr; 145 xfrm_address_t *daddr = &x->id.daddr; 146 struct dst_entry *dst; 147 148 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { 149 saddr = x->coaddr; 150 daddr = prev_daddr; 151 } 152 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { 153 saddr = prev_saddr; 154 daddr = x->coaddr; 155 } 156 157 dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); 158 159 if (!IS_ERR(dst)) { 160 if (prev_saddr != saddr) 161 memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); 162 if (prev_daddr != daddr) 163 memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); 164 } 165 166 return dst; 167 } 168 169 static inline unsigned long make_jiffies(long secs) 170 { 171 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 172 return MAX_SCHEDULE_TIMEOUT-1; 173 else 174 return secs*HZ; 175 } 176 177 static void xfrm_policy_timer(unsigned long data) 178 { 179 struct xfrm_policy *xp = (struct xfrm_policy*)data; 180 unsigned long now = get_seconds(); 181 long next = LONG_MAX; 182 int warn = 0; 183 int dir; 184 185 read_lock(&xp->lock); 186 187 if (unlikely(xp->walk.dead)) 188 goto out; 189 190 dir = xfrm_policy_id2dir(xp->index); 191 192 if (xp->lft.hard_add_expires_seconds) { 193 long tmo = xp->lft.hard_add_expires_seconds + 194 xp->curlft.add_time - now; 195 if (tmo <= 0) 196 goto expired; 197 if (tmo < next) 198 next = tmo; 199 } 200 if (xp->lft.hard_use_expires_seconds) { 201 long tmo = xp->lft.hard_use_expires_seconds + 202 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 203 if (tmo <= 0) 204 goto expired; 205 if (tmo < next) 206 next = tmo; 207 } 208 if (xp->lft.soft_add_expires_seconds) { 209 long tmo = xp->lft.soft_add_expires_seconds + 210 xp->curlft.add_time - now; 211 if (tmo <= 0) { 212 warn = 1; 213 tmo = XFRM_KM_TIMEOUT; 214 } 215 if (tmo < next) 216 next = tmo; 217 } 218 if (xp->lft.soft_use_expires_seconds) { 219 long tmo = xp->lft.soft_use_expires_seconds + 220 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 221 if (tmo <= 0) { 222 warn = 1; 223 tmo = XFRM_KM_TIMEOUT; 224 } 225 if (tmo < next) 226 next = tmo; 227 } 228 229 if (warn) 230 km_policy_expired(xp, dir, 0, 0); 231 if (next != LONG_MAX && 232 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 233 xfrm_pol_hold(xp); 234 235 out: 236 read_unlock(&xp->lock); 237 xfrm_pol_put(xp); 238 return; 239 240 expired: 241 read_unlock(&xp->lock); 242 if (!xfrm_policy_delete(xp, dir)) 243 km_policy_expired(xp, dir, 1, 0); 244 xfrm_pol_put(xp); 245 } 246 247 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) 248 { 249 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 250 251 if (unlikely(pol->walk.dead)) 252 flo = NULL; 253 else 254 xfrm_pol_hold(pol); 255 256 return flo; 257 } 258 259 static int xfrm_policy_flo_check(struct flow_cache_object *flo) 260 { 261 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 262 263 return !pol->walk.dead; 264 } 265 266 static void xfrm_policy_flo_delete(struct flow_cache_object *flo) 267 { 268 xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); 269 } 270 271 static const struct flow_cache_ops xfrm_policy_fc_ops = { 272 .get = xfrm_policy_flo_get, 273 .check = xfrm_policy_flo_check, 274 .delete = xfrm_policy_flo_delete, 275 }; 276 277 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 278 * SPD calls. 279 */ 280 281 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) 282 { 283 struct xfrm_policy *policy; 284 285 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 286 287 if (policy) { 288 write_pnet(&policy->xp_net, net); 289 INIT_LIST_HEAD(&policy->walk.all); 290 INIT_HLIST_NODE(&policy->bydst); 291 INIT_HLIST_NODE(&policy->byidx); 292 rwlock_init(&policy->lock); 293 atomic_set(&policy->refcnt, 1); 294 skb_queue_head_init(&policy->polq.hold_queue); 295 setup_timer(&policy->timer, xfrm_policy_timer, 296 (unsigned long)policy); 297 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, 298 (unsigned long)policy); 299 policy->flo.ops = &xfrm_policy_fc_ops; 300 } 301 return policy; 302 } 303 EXPORT_SYMBOL(xfrm_policy_alloc); 304 305 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 306 307 void xfrm_policy_destroy(struct xfrm_policy *policy) 308 { 309 BUG_ON(!policy->walk.dead); 310 311 if (del_timer(&policy->timer)) 312 BUG(); 313 314 security_xfrm_policy_free(policy->security); 315 kfree(policy); 316 } 317 EXPORT_SYMBOL(xfrm_policy_destroy); 318 319 static void xfrm_queue_purge(struct sk_buff_head *list) 320 { 321 struct sk_buff *skb; 322 323 while ((skb = skb_dequeue(list)) != NULL) { 324 dev_put(skb->dev); 325 kfree_skb(skb); 326 } 327 } 328 329 /* Rule must be locked. Release descentant resources, announce 330 * entry dead. The rule must be unlinked from lists to the moment. 331 */ 332 333 static void xfrm_policy_kill(struct xfrm_policy *policy) 334 { 335 policy->walk.dead = 1; 336 337 atomic_inc(&policy->genid); 338 339 del_timer(&policy->polq.hold_timer); 340 xfrm_queue_purge(&policy->polq.hold_queue); 341 342 if (del_timer(&policy->timer)) 343 xfrm_pol_put(policy); 344 345 xfrm_pol_put(policy); 346 } 347 348 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 349 350 static inline unsigned int idx_hash(struct net *net, u32 index) 351 { 352 return __idx_hash(index, net->xfrm.policy_idx_hmask); 353 } 354 355 static struct hlist_head *policy_hash_bysel(struct net *net, 356 const struct xfrm_selector *sel, 357 unsigned short family, int dir) 358 { 359 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 360 unsigned int hash = __sel_hash(sel, family, hmask); 361 362 return (hash == hmask + 1 ? 363 &net->xfrm.policy_inexact[dir] : 364 net->xfrm.policy_bydst[dir].table + hash); 365 } 366 367 static struct hlist_head *policy_hash_direct(struct net *net, 368 const xfrm_address_t *daddr, 369 const xfrm_address_t *saddr, 370 unsigned short family, int dir) 371 { 372 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 373 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 374 375 return net->xfrm.policy_bydst[dir].table + hash; 376 } 377 378 static void xfrm_dst_hash_transfer(struct hlist_head *list, 379 struct hlist_head *ndsttable, 380 unsigned int nhashmask) 381 { 382 struct hlist_node *entry, *tmp, *entry0 = NULL; 383 struct xfrm_policy *pol; 384 unsigned int h0 = 0; 385 386 redo: 387 hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { 388 unsigned int h; 389 390 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 391 pol->family, nhashmask); 392 if (!entry0) { 393 hlist_del(entry); 394 hlist_add_head(&pol->bydst, ndsttable+h); 395 h0 = h; 396 } else { 397 if (h != h0) 398 continue; 399 hlist_del(entry); 400 hlist_add_after(entry0, &pol->bydst); 401 } 402 entry0 = entry; 403 } 404 if (!hlist_empty(list)) { 405 entry0 = NULL; 406 goto redo; 407 } 408 } 409 410 static void xfrm_idx_hash_transfer(struct hlist_head *list, 411 struct hlist_head *nidxtable, 412 unsigned int nhashmask) 413 { 414 struct hlist_node *entry, *tmp; 415 struct xfrm_policy *pol; 416 417 hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { 418 unsigned int h; 419 420 h = __idx_hash(pol->index, nhashmask); 421 hlist_add_head(&pol->byidx, nidxtable+h); 422 } 423 } 424 425 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) 426 { 427 return ((old_hmask + 1) << 1) - 1; 428 } 429 430 static void xfrm_bydst_resize(struct net *net, int dir) 431 { 432 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 433 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 434 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 435 struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; 436 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 437 int i; 438 439 if (!ndst) 440 return; 441 442 write_lock_bh(&xfrm_policy_lock); 443 444 for (i = hmask; i >= 0; i--) 445 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 446 447 net->xfrm.policy_bydst[dir].table = ndst; 448 net->xfrm.policy_bydst[dir].hmask = nhashmask; 449 450 write_unlock_bh(&xfrm_policy_lock); 451 452 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 453 } 454 455 static void xfrm_byidx_resize(struct net *net, int total) 456 { 457 unsigned int hmask = net->xfrm.policy_idx_hmask; 458 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 459 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 460 struct hlist_head *oidx = net->xfrm.policy_byidx; 461 struct hlist_head *nidx = xfrm_hash_alloc(nsize); 462 int i; 463 464 if (!nidx) 465 return; 466 467 write_lock_bh(&xfrm_policy_lock); 468 469 for (i = hmask; i >= 0; i--) 470 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 471 472 net->xfrm.policy_byidx = nidx; 473 net->xfrm.policy_idx_hmask = nhashmask; 474 475 write_unlock_bh(&xfrm_policy_lock); 476 477 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 478 } 479 480 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) 481 { 482 unsigned int cnt = net->xfrm.policy_count[dir]; 483 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 484 485 if (total) 486 *total += cnt; 487 488 if ((hmask + 1) < xfrm_policy_hashmax && 489 cnt > hmask) 490 return 1; 491 492 return 0; 493 } 494 495 static inline int xfrm_byidx_should_resize(struct net *net, int total) 496 { 497 unsigned int hmask = net->xfrm.policy_idx_hmask; 498 499 if ((hmask + 1) < xfrm_policy_hashmax && 500 total > hmask) 501 return 1; 502 503 return 0; 504 } 505 506 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) 507 { 508 read_lock_bh(&xfrm_policy_lock); 509 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; 510 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; 511 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; 512 si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; 513 si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; 514 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 515 si->spdhcnt = net->xfrm.policy_idx_hmask; 516 si->spdhmcnt = xfrm_policy_hashmax; 517 read_unlock_bh(&xfrm_policy_lock); 518 } 519 EXPORT_SYMBOL(xfrm_spd_getinfo); 520 521 static DEFINE_MUTEX(hash_resize_mutex); 522 static void xfrm_hash_resize(struct work_struct *work) 523 { 524 struct net *net = container_of(work, struct net, xfrm.policy_hash_work); 525 int dir, total; 526 527 mutex_lock(&hash_resize_mutex); 528 529 total = 0; 530 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 531 if (xfrm_bydst_should_resize(net, dir, &total)) 532 xfrm_bydst_resize(net, dir); 533 } 534 if (xfrm_byidx_should_resize(net, total)) 535 xfrm_byidx_resize(net, total); 536 537 mutex_unlock(&hash_resize_mutex); 538 } 539 540 /* Generate new index... KAME seems to generate them ordered by cost 541 * of an absolute inpredictability of ordering of rules. This will not pass. */ 542 static u32 xfrm_gen_index(struct net *net, int dir) 543 { 544 static u32 idx_generator; 545 546 for (;;) { 547 struct hlist_node *entry; 548 struct hlist_head *list; 549 struct xfrm_policy *p; 550 u32 idx; 551 int found; 552 553 idx = (idx_generator | dir); 554 idx_generator += 8; 555 if (idx == 0) 556 idx = 8; 557 list = net->xfrm.policy_byidx + idx_hash(net, idx); 558 found = 0; 559 hlist_for_each_entry(p, entry, list, byidx) { 560 if (p->index == idx) { 561 found = 1; 562 break; 563 } 564 } 565 if (!found) 566 return idx; 567 } 568 } 569 570 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) 571 { 572 u32 *p1 = (u32 *) s1; 573 u32 *p2 = (u32 *) s2; 574 int len = sizeof(struct xfrm_selector) / sizeof(u32); 575 int i; 576 577 for (i = 0; i < len; i++) { 578 if (p1[i] != p2[i]) 579 return 1; 580 } 581 582 return 0; 583 } 584 585 static void xfrm_policy_requeue(struct xfrm_policy *old, 586 struct xfrm_policy *new) 587 { 588 struct xfrm_policy_queue *pq = &old->polq; 589 struct sk_buff_head list; 590 591 __skb_queue_head_init(&list); 592 593 spin_lock_bh(&pq->hold_queue.lock); 594 skb_queue_splice_init(&pq->hold_queue, &list); 595 del_timer(&pq->hold_timer); 596 spin_unlock_bh(&pq->hold_queue.lock); 597 598 if (skb_queue_empty(&list)) 599 return; 600 601 pq = &new->polq; 602 603 spin_lock_bh(&pq->hold_queue.lock); 604 skb_queue_splice(&list, &pq->hold_queue); 605 pq->timeout = XFRM_QUEUE_TMO_MIN; 606 mod_timer(&pq->hold_timer, jiffies); 607 spin_unlock_bh(&pq->hold_queue.lock); 608 } 609 610 static bool xfrm_policy_mark_match(struct xfrm_policy *policy, 611 struct xfrm_policy *pol) 612 { 613 u32 mark = policy->mark.v & policy->mark.m; 614 615 if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) 616 return true; 617 618 if ((mark & pol->mark.m) == pol->mark.v && 619 policy->priority == pol->priority) 620 return true; 621 622 return false; 623 } 624 625 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 626 { 627 struct net *net = xp_net(policy); 628 struct xfrm_policy *pol; 629 struct xfrm_policy *delpol; 630 struct hlist_head *chain; 631 struct hlist_node *entry, *newpos; 632 633 write_lock_bh(&xfrm_policy_lock); 634 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); 635 delpol = NULL; 636 newpos = NULL; 637 hlist_for_each_entry(pol, entry, chain, bydst) { 638 if (pol->type == policy->type && 639 !selector_cmp(&pol->selector, &policy->selector) && 640 xfrm_policy_mark_match(policy, pol) && 641 xfrm_sec_ctx_match(pol->security, policy->security) && 642 !WARN_ON(delpol)) { 643 if (excl) { 644 write_unlock_bh(&xfrm_policy_lock); 645 return -EEXIST; 646 } 647 delpol = pol; 648 if (policy->priority > pol->priority) 649 continue; 650 } else if (policy->priority >= pol->priority) { 651 newpos = &pol->bydst; 652 continue; 653 } 654 if (delpol) 655 break; 656 } 657 if (newpos) 658 hlist_add_after(newpos, &policy->bydst); 659 else 660 hlist_add_head(&policy->bydst, chain); 661 xfrm_pol_hold(policy); 662 net->xfrm.policy_count[dir]++; 663 atomic_inc(&flow_cache_genid); 664 rt_genid_bump(net); 665 if (delpol) { 666 xfrm_policy_requeue(delpol, policy); 667 __xfrm_policy_unlink(delpol, dir); 668 } 669 policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); 670 hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); 671 policy->curlft.add_time = get_seconds(); 672 policy->curlft.use_time = 0; 673 if (!mod_timer(&policy->timer, jiffies + HZ)) 674 xfrm_pol_hold(policy); 675 list_add(&policy->walk.all, &net->xfrm.policy_all); 676 write_unlock_bh(&xfrm_policy_lock); 677 678 if (delpol) 679 xfrm_policy_kill(delpol); 680 else if (xfrm_bydst_should_resize(net, dir, NULL)) 681 schedule_work(&net->xfrm.policy_hash_work); 682 683 return 0; 684 } 685 EXPORT_SYMBOL(xfrm_policy_insert); 686 687 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, 688 int dir, struct xfrm_selector *sel, 689 struct xfrm_sec_ctx *ctx, int delete, 690 int *err) 691 { 692 struct xfrm_policy *pol, *ret; 693 struct hlist_head *chain; 694 struct hlist_node *entry; 695 696 *err = 0; 697 write_lock_bh(&xfrm_policy_lock); 698 chain = policy_hash_bysel(net, sel, sel->family, dir); 699 ret = NULL; 700 hlist_for_each_entry(pol, entry, chain, bydst) { 701 if (pol->type == type && 702 (mark & pol->mark.m) == pol->mark.v && 703 !selector_cmp(sel, &pol->selector) && 704 xfrm_sec_ctx_match(ctx, pol->security)) { 705 xfrm_pol_hold(pol); 706 if (delete) { 707 *err = security_xfrm_policy_delete( 708 pol->security); 709 if (*err) { 710 write_unlock_bh(&xfrm_policy_lock); 711 return pol; 712 } 713 __xfrm_policy_unlink(pol, dir); 714 } 715 ret = pol; 716 break; 717 } 718 } 719 write_unlock_bh(&xfrm_policy_lock); 720 721 if (ret && delete) 722 xfrm_policy_kill(ret); 723 return ret; 724 } 725 EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 726 727 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, 728 int dir, u32 id, int delete, int *err) 729 { 730 struct xfrm_policy *pol, *ret; 731 struct hlist_head *chain; 732 struct hlist_node *entry; 733 734 *err = -ENOENT; 735 if (xfrm_policy_id2dir(id) != dir) 736 return NULL; 737 738 *err = 0; 739 write_lock_bh(&xfrm_policy_lock); 740 chain = net->xfrm.policy_byidx + idx_hash(net, id); 741 ret = NULL; 742 hlist_for_each_entry(pol, entry, chain, byidx) { 743 if (pol->type == type && pol->index == id && 744 (mark & pol->mark.m) == pol->mark.v) { 745 xfrm_pol_hold(pol); 746 if (delete) { 747 *err = security_xfrm_policy_delete( 748 pol->security); 749 if (*err) { 750 write_unlock_bh(&xfrm_policy_lock); 751 return pol; 752 } 753 __xfrm_policy_unlink(pol, dir); 754 } 755 ret = pol; 756 break; 757 } 758 } 759 write_unlock_bh(&xfrm_policy_lock); 760 761 if (ret && delete) 762 xfrm_policy_kill(ret); 763 return ret; 764 } 765 EXPORT_SYMBOL(xfrm_policy_byid); 766 767 #ifdef CONFIG_SECURITY_NETWORK_XFRM 768 static inline int 769 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 770 { 771 int dir, err = 0; 772 773 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 774 struct xfrm_policy *pol; 775 struct hlist_node *entry; 776 int i; 777 778 hlist_for_each_entry(pol, entry, 779 &net->xfrm.policy_inexact[dir], bydst) { 780 if (pol->type != type) 781 continue; 782 err = security_xfrm_policy_delete(pol->security); 783 if (err) { 784 xfrm_audit_policy_delete(pol, 0, 785 audit_info->loginuid, 786 audit_info->sessionid, 787 audit_info->secid); 788 return err; 789 } 790 } 791 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 792 hlist_for_each_entry(pol, entry, 793 net->xfrm.policy_bydst[dir].table + i, 794 bydst) { 795 if (pol->type != type) 796 continue; 797 err = security_xfrm_policy_delete( 798 pol->security); 799 if (err) { 800 xfrm_audit_policy_delete(pol, 0, 801 audit_info->loginuid, 802 audit_info->sessionid, 803 audit_info->secid); 804 return err; 805 } 806 } 807 } 808 } 809 return err; 810 } 811 #else 812 static inline int 813 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 814 { 815 return 0; 816 } 817 #endif 818 819 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) 820 { 821 int dir, err = 0, cnt = 0; 822 823 write_lock_bh(&xfrm_policy_lock); 824 825 err = xfrm_policy_flush_secctx_check(net, type, audit_info); 826 if (err) 827 goto out; 828 829 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 830 struct xfrm_policy *pol; 831 struct hlist_node *entry; 832 int i; 833 834 again1: 835 hlist_for_each_entry(pol, entry, 836 &net->xfrm.policy_inexact[dir], bydst) { 837 if (pol->type != type) 838 continue; 839 __xfrm_policy_unlink(pol, dir); 840 write_unlock_bh(&xfrm_policy_lock); 841 cnt++; 842 843 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, 844 audit_info->sessionid, 845 audit_info->secid); 846 847 xfrm_policy_kill(pol); 848 849 write_lock_bh(&xfrm_policy_lock); 850 goto again1; 851 } 852 853 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 854 again2: 855 hlist_for_each_entry(pol, entry, 856 net->xfrm.policy_bydst[dir].table + i, 857 bydst) { 858 if (pol->type != type) 859 continue; 860 __xfrm_policy_unlink(pol, dir); 861 write_unlock_bh(&xfrm_policy_lock); 862 cnt++; 863 864 xfrm_audit_policy_delete(pol, 1, 865 audit_info->loginuid, 866 audit_info->sessionid, 867 audit_info->secid); 868 xfrm_policy_kill(pol); 869 870 write_lock_bh(&xfrm_policy_lock); 871 goto again2; 872 } 873 } 874 875 } 876 if (!cnt) 877 err = -ESRCH; 878 out: 879 write_unlock_bh(&xfrm_policy_lock); 880 return err; 881 } 882 EXPORT_SYMBOL(xfrm_policy_flush); 883 884 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, 885 int (*func)(struct xfrm_policy *, int, int, void*), 886 void *data) 887 { 888 struct xfrm_policy *pol; 889 struct xfrm_policy_walk_entry *x; 890 int error = 0; 891 892 if (walk->type >= XFRM_POLICY_TYPE_MAX && 893 walk->type != XFRM_POLICY_TYPE_ANY) 894 return -EINVAL; 895 896 if (list_empty(&walk->walk.all) && walk->seq != 0) 897 return 0; 898 899 write_lock_bh(&xfrm_policy_lock); 900 if (list_empty(&walk->walk.all)) 901 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); 902 else 903 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); 904 list_for_each_entry_from(x, &net->xfrm.policy_all, all) { 905 if (x->dead) 906 continue; 907 pol = container_of(x, struct xfrm_policy, walk); 908 if (walk->type != XFRM_POLICY_TYPE_ANY && 909 walk->type != pol->type) 910 continue; 911 error = func(pol, xfrm_policy_id2dir(pol->index), 912 walk->seq, data); 913 if (error) { 914 list_move_tail(&walk->walk.all, &x->all); 915 goto out; 916 } 917 walk->seq++; 918 } 919 if (walk->seq == 0) { 920 error = -ENOENT; 921 goto out; 922 } 923 list_del_init(&walk->walk.all); 924 out: 925 write_unlock_bh(&xfrm_policy_lock); 926 return error; 927 } 928 EXPORT_SYMBOL(xfrm_policy_walk); 929 930 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) 931 { 932 INIT_LIST_HEAD(&walk->walk.all); 933 walk->walk.dead = 1; 934 walk->type = type; 935 walk->seq = 0; 936 } 937 EXPORT_SYMBOL(xfrm_policy_walk_init); 938 939 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) 940 { 941 if (list_empty(&walk->walk.all)) 942 return; 943 944 write_lock_bh(&xfrm_policy_lock); 945 list_del(&walk->walk.all); 946 write_unlock_bh(&xfrm_policy_lock); 947 } 948 EXPORT_SYMBOL(xfrm_policy_walk_done); 949 950 /* 951 * Find policy to apply to this flow. 952 * 953 * Returns 0 if policy found, else an -errno. 954 */ 955 static int xfrm_policy_match(const struct xfrm_policy *pol, 956 const struct flowi *fl, 957 u8 type, u16 family, int dir) 958 { 959 const struct xfrm_selector *sel = &pol->selector; 960 int ret = -ESRCH; 961 bool match; 962 963 if (pol->family != family || 964 (fl->flowi_mark & pol->mark.m) != pol->mark.v || 965 pol->type != type) 966 return ret; 967 968 match = xfrm_selector_match(sel, fl, family); 969 if (match) 970 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, 971 dir); 972 973 return ret; 974 } 975 976 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, 977 const struct flowi *fl, 978 u16 family, u8 dir) 979 { 980 int err; 981 struct xfrm_policy *pol, *ret; 982 const xfrm_address_t *daddr, *saddr; 983 struct hlist_node *entry; 984 struct hlist_head *chain; 985 u32 priority = ~0U; 986 987 daddr = xfrm_flowi_daddr(fl, family); 988 saddr = xfrm_flowi_saddr(fl, family); 989 if (unlikely(!daddr || !saddr)) 990 return NULL; 991 992 read_lock_bh(&xfrm_policy_lock); 993 chain = policy_hash_direct(net, daddr, saddr, family, dir); 994 ret = NULL; 995 hlist_for_each_entry(pol, entry, chain, bydst) { 996 err = xfrm_policy_match(pol, fl, type, family, dir); 997 if (err) { 998 if (err == -ESRCH) 999 continue; 1000 else { 1001 ret = ERR_PTR(err); 1002 goto fail; 1003 } 1004 } else { 1005 ret = pol; 1006 priority = ret->priority; 1007 break; 1008 } 1009 } 1010 chain = &net->xfrm.policy_inexact[dir]; 1011 hlist_for_each_entry(pol, entry, chain, bydst) { 1012 err = xfrm_policy_match(pol, fl, type, family, dir); 1013 if (err) { 1014 if (err == -ESRCH) 1015 continue; 1016 else { 1017 ret = ERR_PTR(err); 1018 goto fail; 1019 } 1020 } else if (pol->priority < priority) { 1021 ret = pol; 1022 break; 1023 } 1024 } 1025 if (ret) 1026 xfrm_pol_hold(ret); 1027 fail: 1028 read_unlock_bh(&xfrm_policy_lock); 1029 1030 return ret; 1031 } 1032 1033 static struct xfrm_policy * 1034 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) 1035 { 1036 #ifdef CONFIG_XFRM_SUB_POLICY 1037 struct xfrm_policy *pol; 1038 1039 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 1040 if (pol != NULL) 1041 return pol; 1042 #endif 1043 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1044 } 1045 1046 static struct flow_cache_object * 1047 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, 1048 u8 dir, struct flow_cache_object *old_obj, void *ctx) 1049 { 1050 struct xfrm_policy *pol; 1051 1052 if (old_obj) 1053 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 1054 1055 pol = __xfrm_policy_lookup(net, fl, family, dir); 1056 if (IS_ERR_OR_NULL(pol)) 1057 return ERR_CAST(pol); 1058 1059 /* Resolver returns two references: 1060 * one for cache and one for caller of flow_cache_lookup() */ 1061 xfrm_pol_hold(pol); 1062 1063 return &pol->flo; 1064 } 1065 1066 static inline int policy_to_flow_dir(int dir) 1067 { 1068 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1069 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1070 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1071 return dir; 1072 switch (dir) { 1073 default: 1074 case XFRM_POLICY_IN: 1075 return FLOW_DIR_IN; 1076 case XFRM_POLICY_OUT: 1077 return FLOW_DIR_OUT; 1078 case XFRM_POLICY_FWD: 1079 return FLOW_DIR_FWD; 1080 } 1081 } 1082 1083 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, 1084 const struct flowi *fl) 1085 { 1086 struct xfrm_policy *pol; 1087 1088 read_lock_bh(&xfrm_policy_lock); 1089 if ((pol = sk->sk_policy[dir]) != NULL) { 1090 bool match = xfrm_selector_match(&pol->selector, fl, 1091 sk->sk_family); 1092 int err = 0; 1093 1094 if (match) { 1095 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1096 pol = NULL; 1097 goto out; 1098 } 1099 err = security_xfrm_policy_lookup(pol->security, 1100 fl->flowi_secid, 1101 policy_to_flow_dir(dir)); 1102 if (!err) 1103 xfrm_pol_hold(pol); 1104 else if (err == -ESRCH) 1105 pol = NULL; 1106 else 1107 pol = ERR_PTR(err); 1108 } else 1109 pol = NULL; 1110 } 1111 out: 1112 read_unlock_bh(&xfrm_policy_lock); 1113 return pol; 1114 } 1115 1116 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1117 { 1118 struct net *net = xp_net(pol); 1119 struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, 1120 pol->family, dir); 1121 1122 list_add(&pol->walk.all, &net->xfrm.policy_all); 1123 hlist_add_head(&pol->bydst, chain); 1124 hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); 1125 net->xfrm.policy_count[dir]++; 1126 xfrm_pol_hold(pol); 1127 1128 if (xfrm_bydst_should_resize(net, dir, NULL)) 1129 schedule_work(&net->xfrm.policy_hash_work); 1130 } 1131 1132 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1133 int dir) 1134 { 1135 struct net *net = xp_net(pol); 1136 1137 if (hlist_unhashed(&pol->bydst)) 1138 return NULL; 1139 1140 hlist_del(&pol->bydst); 1141 hlist_del(&pol->byidx); 1142 list_del(&pol->walk.all); 1143 net->xfrm.policy_count[dir]--; 1144 1145 return pol; 1146 } 1147 1148 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1149 { 1150 write_lock_bh(&xfrm_policy_lock); 1151 pol = __xfrm_policy_unlink(pol, dir); 1152 write_unlock_bh(&xfrm_policy_lock); 1153 if (pol) { 1154 xfrm_policy_kill(pol); 1155 return 0; 1156 } 1157 return -ENOENT; 1158 } 1159 EXPORT_SYMBOL(xfrm_policy_delete); 1160 1161 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1162 { 1163 struct net *net = xp_net(pol); 1164 struct xfrm_policy *old_pol; 1165 1166 #ifdef CONFIG_XFRM_SUB_POLICY 1167 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) 1168 return -EINVAL; 1169 #endif 1170 1171 write_lock_bh(&xfrm_policy_lock); 1172 old_pol = sk->sk_policy[dir]; 1173 sk->sk_policy[dir] = pol; 1174 if (pol) { 1175 pol->curlft.add_time = get_seconds(); 1176 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); 1177 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1178 } 1179 if (old_pol) { 1180 if (pol) 1181 xfrm_policy_requeue(old_pol, pol); 1182 1183 /* Unlinking succeeds always. This is the only function 1184 * allowed to delete or replace socket policy. 1185 */ 1186 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1187 } 1188 write_unlock_bh(&xfrm_policy_lock); 1189 1190 if (old_pol) { 1191 xfrm_policy_kill(old_pol); 1192 } 1193 return 0; 1194 } 1195 1196 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) 1197 { 1198 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); 1199 1200 if (newp) { 1201 newp->selector = old->selector; 1202 if (security_xfrm_policy_clone(old->security, 1203 &newp->security)) { 1204 kfree(newp); 1205 return NULL; /* ENOMEM */ 1206 } 1207 newp->lft = old->lft; 1208 newp->curlft = old->curlft; 1209 newp->mark = old->mark; 1210 newp->action = old->action; 1211 newp->flags = old->flags; 1212 newp->xfrm_nr = old->xfrm_nr; 1213 newp->index = old->index; 1214 newp->type = old->type; 1215 memcpy(newp->xfrm_vec, old->xfrm_vec, 1216 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1217 write_lock_bh(&xfrm_policy_lock); 1218 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 1219 write_unlock_bh(&xfrm_policy_lock); 1220 xfrm_pol_put(newp); 1221 } 1222 return newp; 1223 } 1224 1225 int __xfrm_sk_clone_policy(struct sock *sk) 1226 { 1227 struct xfrm_policy *p0 = sk->sk_policy[0], 1228 *p1 = sk->sk_policy[1]; 1229 1230 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 1231 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 1232 return -ENOMEM; 1233 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 1234 return -ENOMEM; 1235 return 0; 1236 } 1237 1238 static int 1239 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, 1240 unsigned short family) 1241 { 1242 int err; 1243 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1244 1245 if (unlikely(afinfo == NULL)) 1246 return -EINVAL; 1247 err = afinfo->get_saddr(net, local, remote); 1248 xfrm_policy_put_afinfo(afinfo); 1249 return err; 1250 } 1251 1252 /* Resolve list of templates for the flow, given policy. */ 1253 1254 static int 1255 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, 1256 struct xfrm_state **xfrm, unsigned short family) 1257 { 1258 struct net *net = xp_net(policy); 1259 int nx; 1260 int i, error; 1261 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1262 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1263 xfrm_address_t tmp; 1264 1265 for (nx=0, i = 0; i < policy->xfrm_nr; i++) { 1266 struct xfrm_state *x; 1267 xfrm_address_t *remote = daddr; 1268 xfrm_address_t *local = saddr; 1269 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1270 1271 if (tmpl->mode == XFRM_MODE_TUNNEL || 1272 tmpl->mode == XFRM_MODE_BEET) { 1273 remote = &tmpl->id.daddr; 1274 local = &tmpl->saddr; 1275 if (xfrm_addr_any(local, tmpl->encap_family)) { 1276 error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); 1277 if (error) 1278 goto fail; 1279 local = &tmp; 1280 } 1281 } 1282 1283 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1284 1285 if (x && x->km.state == XFRM_STATE_VALID) { 1286 xfrm[nx++] = x; 1287 daddr = remote; 1288 saddr = local; 1289 continue; 1290 } 1291 if (x) { 1292 error = (x->km.state == XFRM_STATE_ERROR ? 1293 -EINVAL : -EAGAIN); 1294 xfrm_state_put(x); 1295 } 1296 else if (error == -ESRCH) 1297 error = -EAGAIN; 1298 1299 if (!tmpl->optional) 1300 goto fail; 1301 } 1302 return nx; 1303 1304 fail: 1305 for (nx--; nx>=0; nx--) 1306 xfrm_state_put(xfrm[nx]); 1307 return error; 1308 } 1309 1310 static int 1311 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, 1312 struct xfrm_state **xfrm, unsigned short family) 1313 { 1314 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1315 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1316 int cnx = 0; 1317 int error; 1318 int ret; 1319 int i; 1320 1321 for (i = 0; i < npols; i++) { 1322 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { 1323 error = -ENOBUFS; 1324 goto fail; 1325 } 1326 1327 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); 1328 if (ret < 0) { 1329 error = ret; 1330 goto fail; 1331 } else 1332 cnx += ret; 1333 } 1334 1335 /* found states are sorted for outbound processing */ 1336 if (npols > 1) 1337 xfrm_state_sort(xfrm, tpp, cnx, family); 1338 1339 return cnx; 1340 1341 fail: 1342 for (cnx--; cnx>=0; cnx--) 1343 xfrm_state_put(tpp[cnx]); 1344 return error; 1345 1346 } 1347 1348 /* Check that the bundle accepts the flow and its components are 1349 * still valid. 1350 */ 1351 1352 static inline int xfrm_get_tos(const struct flowi *fl, int family) 1353 { 1354 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1355 int tos; 1356 1357 if (!afinfo) 1358 return -EINVAL; 1359 1360 tos = afinfo->get_tos(fl); 1361 1362 xfrm_policy_put_afinfo(afinfo); 1363 1364 return tos; 1365 } 1366 1367 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) 1368 { 1369 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1370 struct dst_entry *dst = &xdst->u.dst; 1371 1372 if (xdst->route == NULL) { 1373 /* Dummy bundle - if it has xfrms we were not 1374 * able to build bundle as template resolution failed. 1375 * It means we need to try again resolving. */ 1376 if (xdst->num_xfrms > 0) 1377 return NULL; 1378 } else if (dst->flags & DST_XFRM_QUEUE) { 1379 return NULL; 1380 } else { 1381 /* Real bundle */ 1382 if (stale_bundle(dst)) 1383 return NULL; 1384 } 1385 1386 dst_hold(dst); 1387 return flo; 1388 } 1389 1390 static int xfrm_bundle_flo_check(struct flow_cache_object *flo) 1391 { 1392 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1393 struct dst_entry *dst = &xdst->u.dst; 1394 1395 if (!xdst->route) 1396 return 0; 1397 if (stale_bundle(dst)) 1398 return 0; 1399 1400 return 1; 1401 } 1402 1403 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) 1404 { 1405 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1406 struct dst_entry *dst = &xdst->u.dst; 1407 1408 dst_free(dst); 1409 } 1410 1411 static const struct flow_cache_ops xfrm_bundle_fc_ops = { 1412 .get = xfrm_bundle_flo_get, 1413 .check = xfrm_bundle_flo_check, 1414 .delete = xfrm_bundle_flo_delete, 1415 }; 1416 1417 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) 1418 { 1419 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1420 struct dst_ops *dst_ops; 1421 struct xfrm_dst *xdst; 1422 1423 if (!afinfo) 1424 return ERR_PTR(-EINVAL); 1425 1426 switch (family) { 1427 case AF_INET: 1428 dst_ops = &net->xfrm.xfrm4_dst_ops; 1429 break; 1430 #if IS_ENABLED(CONFIG_IPV6) 1431 case AF_INET6: 1432 dst_ops = &net->xfrm.xfrm6_dst_ops; 1433 break; 1434 #endif 1435 default: 1436 BUG(); 1437 } 1438 xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); 1439 1440 if (likely(xdst)) { 1441 struct dst_entry *dst = &xdst->u.dst; 1442 1443 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); 1444 xdst->flo.ops = &xfrm_bundle_fc_ops; 1445 if (afinfo->init_dst) 1446 afinfo->init_dst(net, xdst); 1447 } else 1448 xdst = ERR_PTR(-ENOBUFS); 1449 1450 xfrm_policy_put_afinfo(afinfo); 1451 1452 return xdst; 1453 } 1454 1455 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, 1456 int nfheader_len) 1457 { 1458 struct xfrm_policy_afinfo *afinfo = 1459 xfrm_policy_get_afinfo(dst->ops->family); 1460 int err; 1461 1462 if (!afinfo) 1463 return -EINVAL; 1464 1465 err = afinfo->init_path(path, dst, nfheader_len); 1466 1467 xfrm_policy_put_afinfo(afinfo); 1468 1469 return err; 1470 } 1471 1472 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 1473 const struct flowi *fl) 1474 { 1475 struct xfrm_policy_afinfo *afinfo = 1476 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1477 int err; 1478 1479 if (!afinfo) 1480 return -EINVAL; 1481 1482 err = afinfo->fill_dst(xdst, dev, fl); 1483 1484 xfrm_policy_put_afinfo(afinfo); 1485 1486 return err; 1487 } 1488 1489 1490 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 1491 * all the metrics... Shortly, bundle a bundle. 1492 */ 1493 1494 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1495 struct xfrm_state **xfrm, int nx, 1496 const struct flowi *fl, 1497 struct dst_entry *dst) 1498 { 1499 struct net *net = xp_net(policy); 1500 unsigned long now = jiffies; 1501 struct net_device *dev; 1502 struct xfrm_mode *inner_mode; 1503 struct dst_entry *dst_prev = NULL; 1504 struct dst_entry *dst0 = NULL; 1505 int i = 0; 1506 int err; 1507 int header_len = 0; 1508 int nfheader_len = 0; 1509 int trailer_len = 0; 1510 int tos; 1511 int family = policy->selector.family; 1512 xfrm_address_t saddr, daddr; 1513 1514 xfrm_flowi_addr_get(fl, &saddr, &daddr, family); 1515 1516 tos = xfrm_get_tos(fl, family); 1517 err = tos; 1518 if (tos < 0) 1519 goto put_states; 1520 1521 dst_hold(dst); 1522 1523 for (; i < nx; i++) { 1524 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); 1525 struct dst_entry *dst1 = &xdst->u.dst; 1526 1527 err = PTR_ERR(xdst); 1528 if (IS_ERR(xdst)) { 1529 dst_release(dst); 1530 goto put_states; 1531 } 1532 1533 if (xfrm[i]->sel.family == AF_UNSPEC) { 1534 inner_mode = xfrm_ip2inner_mode(xfrm[i], 1535 xfrm_af2proto(family)); 1536 if (!inner_mode) { 1537 err = -EAFNOSUPPORT; 1538 dst_release(dst); 1539 goto put_states; 1540 } 1541 } else 1542 inner_mode = xfrm[i]->inner_mode; 1543 1544 if (!dst_prev) 1545 dst0 = dst1; 1546 else { 1547 dst_prev->child = dst_clone(dst1); 1548 dst1->flags |= DST_NOHASH; 1549 } 1550 1551 xdst->route = dst; 1552 dst_copy_metrics(dst1, dst); 1553 1554 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 1555 family = xfrm[i]->props.family; 1556 dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, 1557 family); 1558 err = PTR_ERR(dst); 1559 if (IS_ERR(dst)) 1560 goto put_states; 1561 } else 1562 dst_hold(dst); 1563 1564 dst1->xfrm = xfrm[i]; 1565 xdst->xfrm_genid = xfrm[i]->genid; 1566 1567 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1568 dst1->flags |= DST_HOST; 1569 dst1->lastuse = now; 1570 1571 dst1->input = dst_discard; 1572 dst1->output = inner_mode->afinfo->output; 1573 1574 dst1->next = dst_prev; 1575 dst_prev = dst1; 1576 1577 header_len += xfrm[i]->props.header_len; 1578 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1579 nfheader_len += xfrm[i]->props.header_len; 1580 trailer_len += xfrm[i]->props.trailer_len; 1581 } 1582 1583 dst_prev->child = dst; 1584 dst0->path = dst; 1585 1586 err = -ENODEV; 1587 dev = dst->dev; 1588 if (!dev) 1589 goto free_dst; 1590 1591 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1592 xfrm_init_pmtu(dst_prev); 1593 1594 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { 1595 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; 1596 1597 err = xfrm_fill_dst(xdst, dev, fl); 1598 if (err) 1599 goto free_dst; 1600 1601 dst_prev->header_len = header_len; 1602 dst_prev->trailer_len = trailer_len; 1603 header_len -= xdst->u.dst.xfrm->props.header_len; 1604 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1605 } 1606 1607 out: 1608 return dst0; 1609 1610 put_states: 1611 for (; i < nx; i++) 1612 xfrm_state_put(xfrm[i]); 1613 free_dst: 1614 if (dst0) 1615 dst_free(dst0); 1616 dst0 = ERR_PTR(err); 1617 goto out; 1618 } 1619 1620 static int inline 1621 xfrm_dst_alloc_copy(void **target, const void *src, int size) 1622 { 1623 if (!*target) { 1624 *target = kmalloc(size, GFP_ATOMIC); 1625 if (!*target) 1626 return -ENOMEM; 1627 } 1628 memcpy(*target, src, size); 1629 return 0; 1630 } 1631 1632 static int inline 1633 xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) 1634 { 1635 #ifdef CONFIG_XFRM_SUB_POLICY 1636 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1637 return xfrm_dst_alloc_copy((void **)&(xdst->partner), 1638 sel, sizeof(*sel)); 1639 #else 1640 return 0; 1641 #endif 1642 } 1643 1644 static int inline 1645 xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) 1646 { 1647 #ifdef CONFIG_XFRM_SUB_POLICY 1648 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1649 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); 1650 #else 1651 return 0; 1652 #endif 1653 } 1654 1655 static int xfrm_expand_policies(const struct flowi *fl, u16 family, 1656 struct xfrm_policy **pols, 1657 int *num_pols, int *num_xfrms) 1658 { 1659 int i; 1660 1661 if (*num_pols == 0 || !pols[0]) { 1662 *num_pols = 0; 1663 *num_xfrms = 0; 1664 return 0; 1665 } 1666 if (IS_ERR(pols[0])) 1667 return PTR_ERR(pols[0]); 1668 1669 *num_xfrms = pols[0]->xfrm_nr; 1670 1671 #ifdef CONFIG_XFRM_SUB_POLICY 1672 if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && 1673 pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1674 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), 1675 XFRM_POLICY_TYPE_MAIN, 1676 fl, family, 1677 XFRM_POLICY_OUT); 1678 if (pols[1]) { 1679 if (IS_ERR(pols[1])) { 1680 xfrm_pols_put(pols, *num_pols); 1681 return PTR_ERR(pols[1]); 1682 } 1683 (*num_pols) ++; 1684 (*num_xfrms) += pols[1]->xfrm_nr; 1685 } 1686 } 1687 #endif 1688 for (i = 0; i < *num_pols; i++) { 1689 if (pols[i]->action != XFRM_POLICY_ALLOW) { 1690 *num_xfrms = -1; 1691 break; 1692 } 1693 } 1694 1695 return 0; 1696 1697 } 1698 1699 static struct xfrm_dst * 1700 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, 1701 const struct flowi *fl, u16 family, 1702 struct dst_entry *dst_orig) 1703 { 1704 struct net *net = xp_net(pols[0]); 1705 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1706 struct dst_entry *dst; 1707 struct xfrm_dst *xdst; 1708 int err; 1709 1710 /* Try to instantiate a bundle */ 1711 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); 1712 if (err <= 0) { 1713 if (err != 0 && err != -EAGAIN) 1714 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1715 return ERR_PTR(err); 1716 } 1717 1718 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); 1719 if (IS_ERR(dst)) { 1720 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1721 return ERR_CAST(dst); 1722 } 1723 1724 xdst = (struct xfrm_dst *)dst; 1725 xdst->num_xfrms = err; 1726 if (num_pols > 1) 1727 err = xfrm_dst_update_parent(dst, &pols[1]->selector); 1728 else 1729 err = xfrm_dst_update_origin(dst, fl); 1730 if (unlikely(err)) { 1731 dst_free(dst); 1732 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1733 return ERR_PTR(err); 1734 } 1735 1736 xdst->num_pols = num_pols; 1737 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); 1738 xdst->policy_genid = atomic_read(&pols[0]->genid); 1739 1740 return xdst; 1741 } 1742 1743 static void xfrm_policy_queue_process(unsigned long arg) 1744 { 1745 int err = 0; 1746 struct sk_buff *skb; 1747 struct sock *sk; 1748 struct dst_entry *dst; 1749 struct net_device *dev; 1750 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1751 struct xfrm_policy_queue *pq = &pol->polq; 1752 struct flowi fl; 1753 struct sk_buff_head list; 1754 1755 spin_lock(&pq->hold_queue.lock); 1756 skb = skb_peek(&pq->hold_queue); 1757 dst = skb_dst(skb); 1758 sk = skb->sk; 1759 xfrm_decode_session(skb, &fl, dst->ops->family); 1760 spin_unlock(&pq->hold_queue.lock); 1761 1762 dst_hold(dst->path); 1763 dst = xfrm_lookup(xp_net(pol), dst->path, &fl, 1764 sk, 0); 1765 if (IS_ERR(dst)) 1766 goto purge_queue; 1767 1768 if (dst->flags & DST_XFRM_QUEUE) { 1769 dst_release(dst); 1770 1771 if (pq->timeout >= XFRM_QUEUE_TMO_MAX) 1772 goto purge_queue; 1773 1774 pq->timeout = pq->timeout << 1; 1775 mod_timer(&pq->hold_timer, jiffies + pq->timeout); 1776 return; 1777 } 1778 1779 dst_release(dst); 1780 1781 __skb_queue_head_init(&list); 1782 1783 spin_lock(&pq->hold_queue.lock); 1784 pq->timeout = 0; 1785 skb_queue_splice_init(&pq->hold_queue, &list); 1786 spin_unlock(&pq->hold_queue.lock); 1787 1788 while (!skb_queue_empty(&list)) { 1789 skb = __skb_dequeue(&list); 1790 1791 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); 1792 dst_hold(skb_dst(skb)->path); 1793 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, 1794 &fl, skb->sk, 0); 1795 if (IS_ERR(dst)) { 1796 dev_put(skb->dev); 1797 kfree_skb(skb); 1798 continue; 1799 } 1800 1801 nf_reset(skb); 1802 skb_dst_drop(skb); 1803 skb_dst_set(skb, dst); 1804 1805 dev = skb->dev; 1806 err = dst_output(skb); 1807 dev_put(dev); 1808 } 1809 1810 return; 1811 1812 purge_queue: 1813 pq->timeout = 0; 1814 xfrm_queue_purge(&pq->hold_queue); 1815 } 1816 1817 static int xdst_queue_output(struct sk_buff *skb) 1818 { 1819 unsigned long sched_next; 1820 struct dst_entry *dst = skb_dst(skb); 1821 struct xfrm_dst *xdst = (struct xfrm_dst *) dst; 1822 struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; 1823 1824 if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { 1825 kfree_skb(skb); 1826 return -EAGAIN; 1827 } 1828 1829 skb_dst_force(skb); 1830 dev_hold(skb->dev); 1831 1832 spin_lock_bh(&pq->hold_queue.lock); 1833 1834 if (!pq->timeout) 1835 pq->timeout = XFRM_QUEUE_TMO_MIN; 1836 1837 sched_next = jiffies + pq->timeout; 1838 1839 if (del_timer(&pq->hold_timer)) { 1840 if (time_before(pq->hold_timer.expires, sched_next)) 1841 sched_next = pq->hold_timer.expires; 1842 } 1843 1844 __skb_queue_tail(&pq->hold_queue, skb); 1845 mod_timer(&pq->hold_timer, sched_next); 1846 1847 spin_unlock_bh(&pq->hold_queue.lock); 1848 1849 return 0; 1850 } 1851 1852 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, 1853 struct dst_entry *dst, 1854 const struct flowi *fl, 1855 int num_xfrms, 1856 u16 family) 1857 { 1858 int err; 1859 struct net_device *dev; 1860 struct dst_entry *dst1; 1861 struct xfrm_dst *xdst; 1862 1863 xdst = xfrm_alloc_dst(net, family); 1864 if (IS_ERR(xdst)) 1865 return xdst; 1866 1867 if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || 1868 (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) 1869 return xdst; 1870 1871 dst1 = &xdst->u.dst; 1872 dst_hold(dst); 1873 xdst->route = dst; 1874 1875 dst_copy_metrics(dst1, dst); 1876 1877 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1878 dst1->flags |= DST_HOST | DST_XFRM_QUEUE; 1879 dst1->lastuse = jiffies; 1880 1881 dst1->input = dst_discard; 1882 dst1->output = xdst_queue_output; 1883 1884 dst_hold(dst); 1885 dst1->child = dst; 1886 dst1->path = dst; 1887 1888 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); 1889 1890 err = -ENODEV; 1891 dev = dst->dev; 1892 if (!dev) 1893 goto free_dst; 1894 1895 err = xfrm_fill_dst(xdst, dev, fl); 1896 if (err) 1897 goto free_dst; 1898 1899 out: 1900 return xdst; 1901 1902 free_dst: 1903 dst_release(dst1); 1904 xdst = ERR_PTR(err); 1905 goto out; 1906 } 1907 1908 static struct flow_cache_object * 1909 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, 1910 struct flow_cache_object *oldflo, void *ctx) 1911 { 1912 struct dst_entry *dst_orig = (struct dst_entry *)ctx; 1913 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1914 struct xfrm_dst *xdst, *new_xdst; 1915 int num_pols = 0, num_xfrms = 0, i, err, pol_dead; 1916 1917 /* Check if the policies from old bundle are usable */ 1918 xdst = NULL; 1919 if (oldflo) { 1920 xdst = container_of(oldflo, struct xfrm_dst, flo); 1921 num_pols = xdst->num_pols; 1922 num_xfrms = xdst->num_xfrms; 1923 pol_dead = 0; 1924 for (i = 0; i < num_pols; i++) { 1925 pols[i] = xdst->pols[i]; 1926 pol_dead |= pols[i]->walk.dead; 1927 } 1928 if (pol_dead) { 1929 dst_free(&xdst->u.dst); 1930 xdst = NULL; 1931 num_pols = 0; 1932 num_xfrms = 0; 1933 oldflo = NULL; 1934 } 1935 } 1936 1937 /* Resolve policies to use if we couldn't get them from 1938 * previous cache entry */ 1939 if (xdst == NULL) { 1940 num_pols = 1; 1941 pols[0] = __xfrm_policy_lookup(net, fl, family, dir); 1942 err = xfrm_expand_policies(fl, family, pols, 1943 &num_pols, &num_xfrms); 1944 if (err < 0) 1945 goto inc_error; 1946 if (num_pols == 0) 1947 return NULL; 1948 if (num_xfrms <= 0) 1949 goto make_dummy_bundle; 1950 } 1951 1952 new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); 1953 if (IS_ERR(new_xdst)) { 1954 err = PTR_ERR(new_xdst); 1955 if (err != -EAGAIN) 1956 goto error; 1957 if (oldflo == NULL) 1958 goto make_dummy_bundle; 1959 dst_hold(&xdst->u.dst); 1960 return oldflo; 1961 } else if (new_xdst == NULL) { 1962 num_xfrms = 0; 1963 if (oldflo == NULL) 1964 goto make_dummy_bundle; 1965 xdst->num_xfrms = 0; 1966 dst_hold(&xdst->u.dst); 1967 return oldflo; 1968 } 1969 1970 /* Kill the previous bundle */ 1971 if (xdst) { 1972 /* The policies were stolen for newly generated bundle */ 1973 xdst->num_pols = 0; 1974 dst_free(&xdst->u.dst); 1975 } 1976 1977 /* Flow cache does not have reference, it dst_free()'s, 1978 * but we do need to return one reference for original caller */ 1979 dst_hold(&new_xdst->u.dst); 1980 return &new_xdst->flo; 1981 1982 make_dummy_bundle: 1983 /* We found policies, but there's no bundles to instantiate: 1984 * either because the policy blocks, has no transformations or 1985 * we could not build template (no xfrm_states).*/ 1986 xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); 1987 if (IS_ERR(xdst)) { 1988 xfrm_pols_put(pols, num_pols); 1989 return ERR_CAST(xdst); 1990 } 1991 xdst->num_pols = num_pols; 1992 xdst->num_xfrms = num_xfrms; 1993 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); 1994 1995 dst_hold(&xdst->u.dst); 1996 return &xdst->flo; 1997 1998 inc_error: 1999 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 2000 error: 2001 if (xdst != NULL) 2002 dst_free(&xdst->u.dst); 2003 else 2004 xfrm_pols_put(pols, num_pols); 2005 return ERR_PTR(err); 2006 } 2007 2008 static struct dst_entry *make_blackhole(struct net *net, u16 family, 2009 struct dst_entry *dst_orig) 2010 { 2011 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2012 struct dst_entry *ret; 2013 2014 if (!afinfo) { 2015 dst_release(dst_orig); 2016 return ERR_PTR(-EINVAL); 2017 } else { 2018 ret = afinfo->blackhole_route(net, dst_orig); 2019 } 2020 xfrm_policy_put_afinfo(afinfo); 2021 2022 return ret; 2023 } 2024 2025 /* Main function: finds/creates a bundle for given flow. 2026 * 2027 * At the moment we eat a raw IP route. Mostly to speed up lookups 2028 * on interfaces with disabled IPsec. 2029 */ 2030 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 2031 const struct flowi *fl, 2032 struct sock *sk, int flags) 2033 { 2034 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2035 struct flow_cache_object *flo; 2036 struct xfrm_dst *xdst; 2037 struct dst_entry *dst, *route; 2038 u16 family = dst_orig->ops->family; 2039 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 2040 int i, err, num_pols, num_xfrms = 0, drop_pols = 0; 2041 2042 restart: 2043 dst = NULL; 2044 xdst = NULL; 2045 route = NULL; 2046 2047 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 2048 num_pols = 1; 2049 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 2050 err = xfrm_expand_policies(fl, family, pols, 2051 &num_pols, &num_xfrms); 2052 if (err < 0) 2053 goto dropdst; 2054 2055 if (num_pols) { 2056 if (num_xfrms <= 0) { 2057 drop_pols = num_pols; 2058 goto no_transform; 2059 } 2060 2061 xdst = xfrm_resolve_and_create_bundle( 2062 pols, num_pols, fl, 2063 family, dst_orig); 2064 if (IS_ERR(xdst)) { 2065 xfrm_pols_put(pols, num_pols); 2066 err = PTR_ERR(xdst); 2067 goto dropdst; 2068 } else if (xdst == NULL) { 2069 num_xfrms = 0; 2070 drop_pols = num_pols; 2071 goto no_transform; 2072 } 2073 2074 dst_hold(&xdst->u.dst); 2075 2076 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 2077 xdst->u.dst.next = xfrm_policy_sk_bundles; 2078 xfrm_policy_sk_bundles = &xdst->u.dst; 2079 spin_unlock_bh(&xfrm_policy_sk_bundle_lock); 2080 2081 route = xdst->route; 2082 } 2083 } 2084 2085 if (xdst == NULL) { 2086 /* To accelerate a bit... */ 2087 if ((dst_orig->flags & DST_NOXFRM) || 2088 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 2089 goto nopol; 2090 2091 flo = flow_cache_lookup(net, fl, family, dir, 2092 xfrm_bundle_lookup, dst_orig); 2093 if (flo == NULL) 2094 goto nopol; 2095 if (IS_ERR(flo)) { 2096 err = PTR_ERR(flo); 2097 goto dropdst; 2098 } 2099 xdst = container_of(flo, struct xfrm_dst, flo); 2100 2101 num_pols = xdst->num_pols; 2102 num_xfrms = xdst->num_xfrms; 2103 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); 2104 route = xdst->route; 2105 } 2106 2107 dst = &xdst->u.dst; 2108 if (route == NULL && num_xfrms > 0) { 2109 /* The only case when xfrm_bundle_lookup() returns a 2110 * bundle with null route, is when the template could 2111 * not be resolved. It means policies are there, but 2112 * bundle could not be created, since we don't yet 2113 * have the xfrm_state's. We need to wait for KM to 2114 * negotiate new SA's or bail out with error.*/ 2115 if (net->xfrm.sysctl_larval_drop) { 2116 /* EREMOTE tells the caller to generate 2117 * a one-shot blackhole route. */ 2118 dst_release(dst); 2119 xfrm_pols_put(pols, drop_pols); 2120 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2121 2122 return make_blackhole(net, family, dst_orig); 2123 } 2124 if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { 2125 DECLARE_WAITQUEUE(wait, current); 2126 2127 add_wait_queue(&net->xfrm.km_waitq, &wait); 2128 set_current_state(TASK_INTERRUPTIBLE); 2129 schedule(); 2130 set_current_state(TASK_RUNNING); 2131 remove_wait_queue(&net->xfrm.km_waitq, &wait); 2132 2133 if (!signal_pending(current)) { 2134 dst_release(dst); 2135 goto restart; 2136 } 2137 2138 err = -ERESTART; 2139 } else 2140 err = -EAGAIN; 2141 2142 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2143 goto error; 2144 } 2145 2146 no_transform: 2147 if (num_pols == 0) 2148 goto nopol; 2149 2150 if ((flags & XFRM_LOOKUP_ICMP) && 2151 !(pols[0]->flags & XFRM_POLICY_ICMP)) { 2152 err = -ENOENT; 2153 goto error; 2154 } 2155 2156 for (i = 0; i < num_pols; i++) 2157 pols[i]->curlft.use_time = get_seconds(); 2158 2159 if (num_xfrms < 0) { 2160 /* Prohibit the flow */ 2161 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); 2162 err = -EPERM; 2163 goto error; 2164 } else if (num_xfrms > 0) { 2165 /* Flow transformed */ 2166 dst_release(dst_orig); 2167 } else { 2168 /* Flow passes untransformed */ 2169 dst_release(dst); 2170 dst = dst_orig; 2171 } 2172 ok: 2173 xfrm_pols_put(pols, drop_pols); 2174 if (dst && dst->xfrm && 2175 dst->xfrm->props.mode == XFRM_MODE_TUNNEL) 2176 dst->flags |= DST_XFRM_TUNNEL; 2177 return dst; 2178 2179 nopol: 2180 if (!(flags & XFRM_LOOKUP_ICMP)) { 2181 dst = dst_orig; 2182 goto ok; 2183 } 2184 err = -ENOENT; 2185 error: 2186 dst_release(dst); 2187 dropdst: 2188 dst_release(dst_orig); 2189 xfrm_pols_put(pols, drop_pols); 2190 return ERR_PTR(err); 2191 } 2192 EXPORT_SYMBOL(xfrm_lookup); 2193 2194 static inline int 2195 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) 2196 { 2197 struct xfrm_state *x; 2198 2199 if (!skb->sp || idx < 0 || idx >= skb->sp->len) 2200 return 0; 2201 x = skb->sp->xvec[idx]; 2202 if (!x->type->reject) 2203 return 0; 2204 return x->type->reject(x, skb, fl); 2205 } 2206 2207 /* When skb is transformed back to its "native" form, we have to 2208 * check policy restrictions. At the moment we make this in maximally 2209 * stupid way. Shame on me. :-) Of course, connected sockets must 2210 * have policy cached at them. 2211 */ 2212 2213 static inline int 2214 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, 2215 unsigned short family) 2216 { 2217 if (xfrm_state_kern(x)) 2218 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); 2219 return x->id.proto == tmpl->id.proto && 2220 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 2221 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 2222 x->props.mode == tmpl->mode && 2223 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || 2224 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && 2225 !(x->props.mode != XFRM_MODE_TRANSPORT && 2226 xfrm_state_addr_cmp(tmpl, x, family)); 2227 } 2228 2229 /* 2230 * 0 or more than 0 is returned when validation is succeeded (either bypass 2231 * because of optional transport mode, or next index of the mathced secpath 2232 * state with the template. 2233 * -1 is returned when no matching template is found. 2234 * Otherwise "-2 - errored_index" is returned. 2235 */ 2236 static inline int 2237 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, 2238 unsigned short family) 2239 { 2240 int idx = start; 2241 2242 if (tmpl->optional) { 2243 if (tmpl->mode == XFRM_MODE_TRANSPORT) 2244 return start; 2245 } else 2246 start = -1; 2247 for (; idx < sp->len; idx++) { 2248 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 2249 return ++idx; 2250 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { 2251 if (start == -1) 2252 start = -2-idx; 2253 break; 2254 } 2255 } 2256 return start; 2257 } 2258 2259 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, 2260 unsigned int family, int reverse) 2261 { 2262 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2263 int err; 2264 2265 if (unlikely(afinfo == NULL)) 2266 return -EAFNOSUPPORT; 2267 2268 afinfo->decode_session(skb, fl, reverse); 2269 err = security_xfrm_decode_session(skb, &fl->flowi_secid); 2270 xfrm_policy_put_afinfo(afinfo); 2271 return err; 2272 } 2273 EXPORT_SYMBOL(__xfrm_decode_session); 2274 2275 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) 2276 { 2277 for (; k < sp->len; k++) { 2278 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 2279 *idxp = k; 2280 return 1; 2281 } 2282 } 2283 2284 return 0; 2285 } 2286 2287 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 2288 unsigned short family) 2289 { 2290 struct net *net = dev_net(skb->dev); 2291 struct xfrm_policy *pol; 2292 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2293 int npols = 0; 2294 int xfrm_nr; 2295 int pi; 2296 int reverse; 2297 struct flowi fl; 2298 u8 fl_dir; 2299 int xerr_idx = -1; 2300 2301 reverse = dir & ~XFRM_POLICY_MASK; 2302 dir &= XFRM_POLICY_MASK; 2303 fl_dir = policy_to_flow_dir(dir); 2304 2305 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { 2306 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 2307 return 0; 2308 } 2309 2310 nf_nat_decode_session(skb, &fl, family); 2311 2312 /* First, check used SA against their selectors. */ 2313 if (skb->sp) { 2314 int i; 2315 2316 for (i=skb->sp->len-1; i>=0; i--) { 2317 struct xfrm_state *x = skb->sp->xvec[i]; 2318 if (!xfrm_selector_match(&x->sel, &fl, family)) { 2319 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 2320 return 0; 2321 } 2322 } 2323 } 2324 2325 pol = NULL; 2326 if (sk && sk->sk_policy[dir]) { 2327 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 2328 if (IS_ERR(pol)) { 2329 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2330 return 0; 2331 } 2332 } 2333 2334 if (!pol) { 2335 struct flow_cache_object *flo; 2336 2337 flo = flow_cache_lookup(net, &fl, family, fl_dir, 2338 xfrm_policy_lookup, NULL); 2339 if (IS_ERR_OR_NULL(flo)) 2340 pol = ERR_CAST(flo); 2341 else 2342 pol = container_of(flo, struct xfrm_policy, flo); 2343 } 2344 2345 if (IS_ERR(pol)) { 2346 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2347 return 0; 2348 } 2349 2350 if (!pol) { 2351 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 2352 xfrm_secpath_reject(xerr_idx, skb, &fl); 2353 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); 2354 return 0; 2355 } 2356 return 1; 2357 } 2358 2359 pol->curlft.use_time = get_seconds(); 2360 2361 pols[0] = pol; 2362 npols ++; 2363 #ifdef CONFIG_XFRM_SUB_POLICY 2364 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 2365 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, 2366 &fl, family, 2367 XFRM_POLICY_IN); 2368 if (pols[1]) { 2369 if (IS_ERR(pols[1])) { 2370 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2371 return 0; 2372 } 2373 pols[1]->curlft.use_time = get_seconds(); 2374 npols ++; 2375 } 2376 } 2377 #endif 2378 2379 if (pol->action == XFRM_POLICY_ALLOW) { 2380 struct sec_path *sp; 2381 static struct sec_path dummy; 2382 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; 2383 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; 2384 struct xfrm_tmpl **tpp = tp; 2385 int ti = 0; 2386 int i, k; 2387 2388 if ((sp = skb->sp) == NULL) 2389 sp = &dummy; 2390 2391 for (pi = 0; pi < npols; pi++) { 2392 if (pols[pi] != pol && 2393 pols[pi]->action != XFRM_POLICY_ALLOW) { 2394 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2395 goto reject; 2396 } 2397 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { 2398 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 2399 goto reject_error; 2400 } 2401 for (i = 0; i < pols[pi]->xfrm_nr; i++) 2402 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 2403 } 2404 xfrm_nr = ti; 2405 if (npols > 1) { 2406 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); 2407 tpp = stp; 2408 } 2409 2410 /* For each tunnel xfrm, find the first matching tmpl. 2411 * For each tmpl before that, find corresponding xfrm. 2412 * Order is _important_. Later we will implement 2413 * some barriers, but at the moment barriers 2414 * are implied between each two transformations. 2415 */ 2416 for (i = xfrm_nr-1, k = 0; i >= 0; i--) { 2417 k = xfrm_policy_ok(tpp[i], sp, k, family); 2418 if (k < 0) { 2419 if (k < -1) 2420 /* "-2 - errored_index" returned */ 2421 xerr_idx = -(2+k); 2422 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2423 goto reject; 2424 } 2425 } 2426 2427 if (secpath_has_nontransport(sp, k, &xerr_idx)) { 2428 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2429 goto reject; 2430 } 2431 2432 xfrm_pols_put(pols, npols); 2433 return 1; 2434 } 2435 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2436 2437 reject: 2438 xfrm_secpath_reject(xerr_idx, skb, &fl); 2439 reject_error: 2440 xfrm_pols_put(pols, npols); 2441 return 0; 2442 } 2443 EXPORT_SYMBOL(__xfrm_policy_check); 2444 2445 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 2446 { 2447 struct net *net = dev_net(skb->dev); 2448 struct flowi fl; 2449 struct dst_entry *dst; 2450 int res = 1; 2451 2452 if (xfrm_decode_session(skb, &fl, family) < 0) { 2453 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); 2454 return 0; 2455 } 2456 2457 skb_dst_force(skb); 2458 2459 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); 2460 if (IS_ERR(dst)) { 2461 res = 0; 2462 dst = NULL; 2463 } 2464 skb_dst_set(skb, dst); 2465 return res; 2466 } 2467 EXPORT_SYMBOL(__xfrm_route_forward); 2468 2469 /* Optimize later using cookies and generation ids. */ 2470 2471 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 2472 { 2473 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete 2474 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to 2475 * get validated by dst_ops->check on every use. We do this 2476 * because when a normal route referenced by an XFRM dst is 2477 * obsoleted we do not go looking around for all parent 2478 * referencing XFRM dsts so that we can invalidate them. It 2479 * is just too much work. Instead we make the checks here on 2480 * every use. For example: 2481 * 2482 * XFRM dst A --> IPv4 dst X 2483 * 2484 * X is the "xdst->route" of A (X is also the "dst->path" of A 2485 * in this example). If X is marked obsolete, "A" will not 2486 * notice. That's what we are validating here via the 2487 * stale_bundle() check. 2488 * 2489 * When a policy's bundle is pruned, we dst_free() the XFRM 2490 * dst which causes it's ->obsolete field to be set to 2491 * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like 2492 * this, we want to force a new route lookup. 2493 */ 2494 if (dst->obsolete < 0 && !stale_bundle(dst)) 2495 return dst; 2496 2497 return NULL; 2498 } 2499 2500 static int stale_bundle(struct dst_entry *dst) 2501 { 2502 return !xfrm_bundle_ok((struct xfrm_dst *)dst); 2503 } 2504 2505 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2506 { 2507 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2508 dst->dev = dev_net(dev)->loopback_dev; 2509 dev_hold(dst->dev); 2510 dev_put(dev); 2511 } 2512 } 2513 EXPORT_SYMBOL(xfrm_dst_ifdown); 2514 2515 static void xfrm_link_failure(struct sk_buff *skb) 2516 { 2517 /* Impossible. Such dst must be popped before reaches point of failure. */ 2518 } 2519 2520 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2521 { 2522 if (dst) { 2523 if (dst->obsolete) { 2524 dst_release(dst); 2525 dst = NULL; 2526 } 2527 } 2528 return dst; 2529 } 2530 2531 static void __xfrm_garbage_collect(struct net *net) 2532 { 2533 struct dst_entry *head, *next; 2534 2535 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 2536 head = xfrm_policy_sk_bundles; 2537 xfrm_policy_sk_bundles = NULL; 2538 spin_unlock_bh(&xfrm_policy_sk_bundle_lock); 2539 2540 while (head) { 2541 next = head->next; 2542 dst_free(head); 2543 head = next; 2544 } 2545 } 2546 2547 static void xfrm_garbage_collect(struct net *net) 2548 { 2549 flow_cache_flush(); 2550 __xfrm_garbage_collect(net); 2551 } 2552 2553 static void xfrm_garbage_collect_deferred(struct net *net) 2554 { 2555 flow_cache_flush_deferred(); 2556 __xfrm_garbage_collect(net); 2557 } 2558 2559 static void xfrm_init_pmtu(struct dst_entry *dst) 2560 { 2561 do { 2562 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2563 u32 pmtu, route_mtu_cached; 2564 2565 pmtu = dst_mtu(dst->child); 2566 xdst->child_mtu_cached = pmtu; 2567 2568 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2569 2570 route_mtu_cached = dst_mtu(xdst->route); 2571 xdst->route_mtu_cached = route_mtu_cached; 2572 2573 if (pmtu > route_mtu_cached) 2574 pmtu = route_mtu_cached; 2575 2576 dst_metric_set(dst, RTAX_MTU, pmtu); 2577 } while ((dst = dst->next)); 2578 } 2579 2580 /* Check that the bundle accepts the flow and its components are 2581 * still valid. 2582 */ 2583 2584 static int xfrm_bundle_ok(struct xfrm_dst *first) 2585 { 2586 struct dst_entry *dst = &first->u.dst; 2587 struct xfrm_dst *last; 2588 u32 mtu; 2589 2590 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2591 (dst->dev && !netif_running(dst->dev))) 2592 return 0; 2593 2594 if (dst->flags & DST_XFRM_QUEUE) 2595 return 1; 2596 2597 last = NULL; 2598 2599 do { 2600 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2601 2602 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2603 return 0; 2604 if (xdst->xfrm_genid != dst->xfrm->genid) 2605 return 0; 2606 if (xdst->num_pols > 0 && 2607 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2608 return 0; 2609 2610 mtu = dst_mtu(dst->child); 2611 if (xdst->child_mtu_cached != mtu) { 2612 last = xdst; 2613 xdst->child_mtu_cached = mtu; 2614 } 2615 2616 if (!dst_check(xdst->route, xdst->route_cookie)) 2617 return 0; 2618 mtu = dst_mtu(xdst->route); 2619 if (xdst->route_mtu_cached != mtu) { 2620 last = xdst; 2621 xdst->route_mtu_cached = mtu; 2622 } 2623 2624 dst = dst->child; 2625 } while (dst->xfrm); 2626 2627 if (likely(!last)) 2628 return 1; 2629 2630 mtu = last->child_mtu_cached; 2631 for (;;) { 2632 dst = &last->u.dst; 2633 2634 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2635 if (mtu > last->route_mtu_cached) 2636 mtu = last->route_mtu_cached; 2637 dst_metric_set(dst, RTAX_MTU, mtu); 2638 2639 if (last == first) 2640 break; 2641 2642 last = (struct xfrm_dst *)last->u.dst.next; 2643 last->child_mtu_cached = mtu; 2644 } 2645 2646 return 1; 2647 } 2648 2649 static unsigned int xfrm_default_advmss(const struct dst_entry *dst) 2650 { 2651 return dst_metric_advmss(dst->path); 2652 } 2653 2654 static unsigned int xfrm_mtu(const struct dst_entry *dst) 2655 { 2656 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2657 2658 return mtu ? : dst_mtu(dst->path); 2659 } 2660 2661 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, 2662 struct sk_buff *skb, 2663 const void *daddr) 2664 { 2665 return dst->path->ops->neigh_lookup(dst, skb, daddr); 2666 } 2667 2668 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2669 { 2670 struct net *net; 2671 int err = 0; 2672 if (unlikely(afinfo == NULL)) 2673 return -EINVAL; 2674 if (unlikely(afinfo->family >= NPROTO)) 2675 return -EAFNOSUPPORT; 2676 spin_lock(&xfrm_policy_afinfo_lock); 2677 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2678 err = -ENOBUFS; 2679 else { 2680 struct dst_ops *dst_ops = afinfo->dst_ops; 2681 if (likely(dst_ops->kmem_cachep == NULL)) 2682 dst_ops->kmem_cachep = xfrm_dst_cache; 2683 if (likely(dst_ops->check == NULL)) 2684 dst_ops->check = xfrm_dst_check; 2685 if (likely(dst_ops->default_advmss == NULL)) 2686 dst_ops->default_advmss = xfrm_default_advmss; 2687 if (likely(dst_ops->mtu == NULL)) 2688 dst_ops->mtu = xfrm_mtu; 2689 if (likely(dst_ops->negative_advice == NULL)) 2690 dst_ops->negative_advice = xfrm_negative_advice; 2691 if (likely(dst_ops->link_failure == NULL)) 2692 dst_ops->link_failure = xfrm_link_failure; 2693 if (likely(dst_ops->neigh_lookup == NULL)) 2694 dst_ops->neigh_lookup = xfrm_neigh_lookup; 2695 if (likely(afinfo->garbage_collect == NULL)) 2696 afinfo->garbage_collect = xfrm_garbage_collect_deferred; 2697 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); 2698 } 2699 spin_unlock(&xfrm_policy_afinfo_lock); 2700 2701 rtnl_lock(); 2702 for_each_net(net) { 2703 struct dst_ops *xfrm_dst_ops; 2704 2705 switch (afinfo->family) { 2706 case AF_INET: 2707 xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; 2708 break; 2709 #if IS_ENABLED(CONFIG_IPV6) 2710 case AF_INET6: 2711 xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; 2712 break; 2713 #endif 2714 default: 2715 BUG(); 2716 } 2717 *xfrm_dst_ops = *afinfo->dst_ops; 2718 } 2719 rtnl_unlock(); 2720 2721 return err; 2722 } 2723 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 2724 2725 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 2726 { 2727 int err = 0; 2728 if (unlikely(afinfo == NULL)) 2729 return -EINVAL; 2730 if (unlikely(afinfo->family >= NPROTO)) 2731 return -EAFNOSUPPORT; 2732 spin_lock(&xfrm_policy_afinfo_lock); 2733 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 2734 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 2735 err = -EINVAL; 2736 else 2737 RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], 2738 NULL); 2739 } 2740 spin_unlock(&xfrm_policy_afinfo_lock); 2741 if (!err) { 2742 struct dst_ops *dst_ops = afinfo->dst_ops; 2743 2744 synchronize_rcu(); 2745 2746 dst_ops->kmem_cachep = NULL; 2747 dst_ops->check = NULL; 2748 dst_ops->negative_advice = NULL; 2749 dst_ops->link_failure = NULL; 2750 afinfo->garbage_collect = NULL; 2751 } 2752 return err; 2753 } 2754 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 2755 2756 static void __net_init xfrm_dst_ops_init(struct net *net) 2757 { 2758 struct xfrm_policy_afinfo *afinfo; 2759 2760 rcu_read_lock(); 2761 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); 2762 if (afinfo) 2763 net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; 2764 #if IS_ENABLED(CONFIG_IPV6) 2765 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); 2766 if (afinfo) 2767 net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; 2768 #endif 2769 rcu_read_unlock(); 2770 } 2771 2772 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2773 { 2774 struct net_device *dev = ptr; 2775 2776 switch (event) { 2777 case NETDEV_DOWN: 2778 xfrm_garbage_collect(dev_net(dev)); 2779 } 2780 return NOTIFY_DONE; 2781 } 2782 2783 static struct notifier_block xfrm_dev_notifier = { 2784 .notifier_call = xfrm_dev_event, 2785 }; 2786 2787 #ifdef CONFIG_XFRM_STATISTICS 2788 static int __net_init xfrm_statistics_init(struct net *net) 2789 { 2790 int rv; 2791 2792 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, 2793 sizeof(struct linux_xfrm_mib), 2794 __alignof__(struct linux_xfrm_mib)) < 0) 2795 return -ENOMEM; 2796 rv = xfrm_proc_init(net); 2797 if (rv < 0) 2798 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2799 return rv; 2800 } 2801 2802 static void xfrm_statistics_fini(struct net *net) 2803 { 2804 xfrm_proc_fini(net); 2805 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2806 } 2807 #else 2808 static int __net_init xfrm_statistics_init(struct net *net) 2809 { 2810 return 0; 2811 } 2812 2813 static void xfrm_statistics_fini(struct net *net) 2814 { 2815 } 2816 #endif 2817 2818 static int __net_init xfrm_policy_init(struct net *net) 2819 { 2820 unsigned int hmask, sz; 2821 int dir; 2822 2823 if (net_eq(net, &init_net)) 2824 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2825 sizeof(struct xfrm_dst), 2826 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2827 NULL); 2828 2829 hmask = 8 - 1; 2830 sz = (hmask+1) * sizeof(struct hlist_head); 2831 2832 net->xfrm.policy_byidx = xfrm_hash_alloc(sz); 2833 if (!net->xfrm.policy_byidx) 2834 goto out_byidx; 2835 net->xfrm.policy_idx_hmask = hmask; 2836 2837 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2838 struct xfrm_policy_hash *htab; 2839 2840 net->xfrm.policy_count[dir] = 0; 2841 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 2842 2843 htab = &net->xfrm.policy_bydst[dir]; 2844 htab->table = xfrm_hash_alloc(sz); 2845 if (!htab->table) 2846 goto out_bydst; 2847 htab->hmask = hmask; 2848 } 2849 2850 INIT_LIST_HEAD(&net->xfrm.policy_all); 2851 INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); 2852 if (net_eq(net, &init_net)) 2853 register_netdevice_notifier(&xfrm_dev_notifier); 2854 return 0; 2855 2856 out_bydst: 2857 for (dir--; dir >= 0; dir--) { 2858 struct xfrm_policy_hash *htab; 2859 2860 htab = &net->xfrm.policy_bydst[dir]; 2861 xfrm_hash_free(htab->table, sz); 2862 } 2863 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2864 out_byidx: 2865 return -ENOMEM; 2866 } 2867 2868 static void xfrm_policy_fini(struct net *net) 2869 { 2870 struct xfrm_audit audit_info; 2871 unsigned int sz; 2872 int dir; 2873 2874 flush_work(&net->xfrm.policy_hash_work); 2875 #ifdef CONFIG_XFRM_SUB_POLICY 2876 audit_info.loginuid = INVALID_UID; 2877 audit_info.sessionid = -1; 2878 audit_info.secid = 0; 2879 xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); 2880 #endif 2881 audit_info.loginuid = INVALID_UID; 2882 audit_info.sessionid = -1; 2883 audit_info.secid = 0; 2884 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); 2885 2886 WARN_ON(!list_empty(&net->xfrm.policy_all)); 2887 2888 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2889 struct xfrm_policy_hash *htab; 2890 2891 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); 2892 2893 htab = &net->xfrm.policy_bydst[dir]; 2894 sz = (htab->hmask + 1) * sizeof(struct hlist_head); 2895 WARN_ON(!hlist_empty(htab->table)); 2896 xfrm_hash_free(htab->table, sz); 2897 } 2898 2899 sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); 2900 WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); 2901 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2902 } 2903 2904 static int __net_init xfrm_net_init(struct net *net) 2905 { 2906 int rv; 2907 2908 rv = xfrm_statistics_init(net); 2909 if (rv < 0) 2910 goto out_statistics; 2911 rv = xfrm_state_init(net); 2912 if (rv < 0) 2913 goto out_state; 2914 rv = xfrm_policy_init(net); 2915 if (rv < 0) 2916 goto out_policy; 2917 xfrm_dst_ops_init(net); 2918 rv = xfrm_sysctl_init(net); 2919 if (rv < 0) 2920 goto out_sysctl; 2921 return 0; 2922 2923 out_sysctl: 2924 xfrm_policy_fini(net); 2925 out_policy: 2926 xfrm_state_fini(net); 2927 out_state: 2928 xfrm_statistics_fini(net); 2929 out_statistics: 2930 return rv; 2931 } 2932 2933 static void __net_exit xfrm_net_exit(struct net *net) 2934 { 2935 xfrm_sysctl_fini(net); 2936 xfrm_policy_fini(net); 2937 xfrm_state_fini(net); 2938 xfrm_statistics_fini(net); 2939 } 2940 2941 static struct pernet_operations __net_initdata xfrm_net_ops = { 2942 .init = xfrm_net_init, 2943 .exit = xfrm_net_exit, 2944 }; 2945 2946 void __init xfrm_init(void) 2947 { 2948 register_pernet_subsys(&xfrm_net_ops); 2949 xfrm_input_init(); 2950 } 2951 2952 #ifdef CONFIG_AUDITSYSCALL 2953 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2954 struct audit_buffer *audit_buf) 2955 { 2956 struct xfrm_sec_ctx *ctx = xp->security; 2957 struct xfrm_selector *sel = &xp->selector; 2958 2959 if (ctx) 2960 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2961 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 2962 2963 switch(sel->family) { 2964 case AF_INET: 2965 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); 2966 if (sel->prefixlen_s != 32) 2967 audit_log_format(audit_buf, " src_prefixlen=%d", 2968 sel->prefixlen_s); 2969 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); 2970 if (sel->prefixlen_d != 32) 2971 audit_log_format(audit_buf, " dst_prefixlen=%d", 2972 sel->prefixlen_d); 2973 break; 2974 case AF_INET6: 2975 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); 2976 if (sel->prefixlen_s != 128) 2977 audit_log_format(audit_buf, " src_prefixlen=%d", 2978 sel->prefixlen_s); 2979 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); 2980 if (sel->prefixlen_d != 128) 2981 audit_log_format(audit_buf, " dst_prefixlen=%d", 2982 sel->prefixlen_d); 2983 break; 2984 } 2985 } 2986 2987 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, 2988 kuid_t auid, u32 sessionid, u32 secid) 2989 { 2990 struct audit_buffer *audit_buf; 2991 2992 audit_buf = xfrm_audit_start("SPD-add"); 2993 if (audit_buf == NULL) 2994 return; 2995 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 2996 audit_log_format(audit_buf, " res=%u", result); 2997 xfrm_audit_common_policyinfo(xp, audit_buf); 2998 audit_log_end(audit_buf); 2999 } 3000 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 3001 3002 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, 3003 kuid_t auid, u32 sessionid, u32 secid) 3004 { 3005 struct audit_buffer *audit_buf; 3006 3007 audit_buf = xfrm_audit_start("SPD-delete"); 3008 if (audit_buf == NULL) 3009 return; 3010 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 3011 audit_log_format(audit_buf, " res=%u", result); 3012 xfrm_audit_common_policyinfo(xp, audit_buf); 3013 audit_log_end(audit_buf); 3014 } 3015 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); 3016 #endif 3017 3018 #ifdef CONFIG_XFRM_MIGRATE 3019 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, 3020 const struct xfrm_selector *sel_tgt) 3021 { 3022 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 3023 if (sel_tgt->family == sel_cmp->family && 3024 xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, 3025 sel_cmp->family) && 3026 xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, 3027 sel_cmp->family) && 3028 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && 3029 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { 3030 return true; 3031 } 3032 } else { 3033 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { 3034 return true; 3035 } 3036 } 3037 return false; 3038 } 3039 3040 static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel, 3041 u8 dir, u8 type) 3042 { 3043 struct xfrm_policy *pol, *ret = NULL; 3044 struct hlist_node *entry; 3045 struct hlist_head *chain; 3046 u32 priority = ~0U; 3047 3048 read_lock_bh(&xfrm_policy_lock); 3049 chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); 3050 hlist_for_each_entry(pol, entry, chain, bydst) { 3051 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3052 pol->type == type) { 3053 ret = pol; 3054 priority = ret->priority; 3055 break; 3056 } 3057 } 3058 chain = &init_net.xfrm.policy_inexact[dir]; 3059 hlist_for_each_entry(pol, entry, chain, bydst) { 3060 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3061 pol->type == type && 3062 pol->priority < priority) { 3063 ret = pol; 3064 break; 3065 } 3066 } 3067 3068 if (ret) 3069 xfrm_pol_hold(ret); 3070 3071 read_unlock_bh(&xfrm_policy_lock); 3072 3073 return ret; 3074 } 3075 3076 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) 3077 { 3078 int match = 0; 3079 3080 if (t->mode == m->mode && t->id.proto == m->proto && 3081 (m->reqid == 0 || t->reqid == m->reqid)) { 3082 switch (t->mode) { 3083 case XFRM_MODE_TUNNEL: 3084 case XFRM_MODE_BEET: 3085 if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, 3086 m->old_family) && 3087 xfrm_addr_equal(&t->saddr, &m->old_saddr, 3088 m->old_family)) { 3089 match = 1; 3090 } 3091 break; 3092 case XFRM_MODE_TRANSPORT: 3093 /* in case of transport mode, template does not store 3094 any IP addresses, hence we just compare mode and 3095 protocol */ 3096 match = 1; 3097 break; 3098 default: 3099 break; 3100 } 3101 } 3102 return match; 3103 } 3104 3105 /* update endpoint address(es) of template(s) */ 3106 static int xfrm_policy_migrate(struct xfrm_policy *pol, 3107 struct xfrm_migrate *m, int num_migrate) 3108 { 3109 struct xfrm_migrate *mp; 3110 int i, j, n = 0; 3111 3112 write_lock_bh(&pol->lock); 3113 if (unlikely(pol->walk.dead)) { 3114 /* target policy has been deleted */ 3115 write_unlock_bh(&pol->lock); 3116 return -ENOENT; 3117 } 3118 3119 for (i = 0; i < pol->xfrm_nr; i++) { 3120 for (j = 0, mp = m; j < num_migrate; j++, mp++) { 3121 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) 3122 continue; 3123 n++; 3124 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && 3125 pol->xfrm_vec[i].mode != XFRM_MODE_BEET) 3126 continue; 3127 /* update endpoints */ 3128 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, 3129 sizeof(pol->xfrm_vec[i].id.daddr)); 3130 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, 3131 sizeof(pol->xfrm_vec[i].saddr)); 3132 pol->xfrm_vec[i].encap_family = mp->new_family; 3133 /* flush bundles */ 3134 atomic_inc(&pol->genid); 3135 } 3136 } 3137 3138 write_unlock_bh(&pol->lock); 3139 3140 if (!n) 3141 return -ENODATA; 3142 3143 return 0; 3144 } 3145 3146 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) 3147 { 3148 int i, j; 3149 3150 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) 3151 return -EINVAL; 3152 3153 for (i = 0; i < num_migrate; i++) { 3154 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, 3155 m[i].old_family) && 3156 xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, 3157 m[i].old_family)) 3158 return -EINVAL; 3159 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || 3160 xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) 3161 return -EINVAL; 3162 3163 /* check if there is any duplicated entry */ 3164 for (j = i + 1; j < num_migrate; j++) { 3165 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, 3166 sizeof(m[i].old_daddr)) && 3167 !memcmp(&m[i].old_saddr, &m[j].old_saddr, 3168 sizeof(m[i].old_saddr)) && 3169 m[i].proto == m[j].proto && 3170 m[i].mode == m[j].mode && 3171 m[i].reqid == m[j].reqid && 3172 m[i].old_family == m[j].old_family) 3173 return -EINVAL; 3174 } 3175 } 3176 3177 return 0; 3178 } 3179 3180 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, 3181 struct xfrm_migrate *m, int num_migrate, 3182 struct xfrm_kmaddress *k) 3183 { 3184 int i, err, nx_cur = 0, nx_new = 0; 3185 struct xfrm_policy *pol = NULL; 3186 struct xfrm_state *x, *xc; 3187 struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; 3188 struct xfrm_state *x_new[XFRM_MAX_DEPTH]; 3189 struct xfrm_migrate *mp; 3190 3191 if ((err = xfrm_migrate_check(m, num_migrate)) < 0) 3192 goto out; 3193 3194 /* Stage 1 - find policy */ 3195 if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { 3196 err = -ENOENT; 3197 goto out; 3198 } 3199 3200 /* Stage 2 - find and update state(s) */ 3201 for (i = 0, mp = m; i < num_migrate; i++, mp++) { 3202 if ((x = xfrm_migrate_state_find(mp))) { 3203 x_cur[nx_cur] = x; 3204 nx_cur++; 3205 if ((xc = xfrm_state_migrate(x, mp))) { 3206 x_new[nx_new] = xc; 3207 nx_new++; 3208 } else { 3209 err = -ENODATA; 3210 goto restore_state; 3211 } 3212 } 3213 } 3214 3215 /* Stage 3 - update policy */ 3216 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) 3217 goto restore_state; 3218 3219 /* Stage 4 - delete old state(s) */ 3220 if (nx_cur) { 3221 xfrm_states_put(x_cur, nx_cur); 3222 xfrm_states_delete(x_cur, nx_cur); 3223 } 3224 3225 /* Stage 5 - announce */ 3226 km_migrate(sel, dir, type, m, num_migrate, k); 3227 3228 xfrm_pol_put(pol); 3229 3230 return 0; 3231 out: 3232 return err; 3233 3234 restore_state: 3235 if (pol) 3236 xfrm_pol_put(pol); 3237 if (nx_cur) 3238 xfrm_states_put(x_cur, nx_cur); 3239 if (nx_new) 3240 xfrm_states_delete(x_new, nx_new); 3241 3242 return err; 3243 } 3244 EXPORT_SYMBOL(xfrm_migrate); 3245 #endif 3246