1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/kmod.h> 19 #include <linux/list.h> 20 #include <linux/spinlock.h> 21 #include <linux/workqueue.h> 22 #include <linux/notifier.h> 23 #include <linux/netdevice.h> 24 #include <linux/netfilter.h> 25 #include <linux/module.h> 26 #include <linux/cache.h> 27 #include <linux/audit.h> 28 #include <net/dst.h> 29 #include <net/flow.h> 30 #include <net/xfrm.h> 31 #include <net/ip.h> 32 #ifdef CONFIG_XFRM_STATISTICS 33 #include <net/snmp.h> 34 #endif 35 36 #include "xfrm_hash.h" 37 38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) 39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) 40 #define XFRM_MAX_QUEUE_LEN 100 41 42 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); 43 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] 44 __read_mostly; 45 46 static struct kmem_cache *xfrm_dst_cache __read_mostly; 47 48 static void xfrm_init_pmtu(struct dst_entry *dst); 49 static int stale_bundle(struct dst_entry *dst); 50 static int xfrm_bundle_ok(struct xfrm_dst *xdst); 51 static void xfrm_policy_queue_process(unsigned long arg); 52 53 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 54 int dir); 55 56 static inline bool 57 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 58 { 59 const struct flowi4 *fl4 = &fl->u.ip4; 60 61 return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && 62 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && 63 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && 64 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && 65 (fl4->flowi4_proto == sel->proto || !sel->proto) && 66 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); 67 } 68 69 static inline bool 70 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 71 { 72 const struct flowi6 *fl6 = &fl->u.ip6; 73 74 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && 75 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && 76 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && 77 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && 78 (fl6->flowi6_proto == sel->proto || !sel->proto) && 79 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); 80 } 81 82 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, 83 unsigned short family) 84 { 85 switch (family) { 86 case AF_INET: 87 return __xfrm4_selector_match(sel, fl); 88 case AF_INET6: 89 return __xfrm6_selector_match(sel, fl); 90 } 91 return false; 92 } 93 94 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 95 { 96 struct xfrm_policy_afinfo *afinfo; 97 98 if (unlikely(family >= NPROTO)) 99 return NULL; 100 rcu_read_lock(); 101 afinfo = rcu_dereference(xfrm_policy_afinfo[family]); 102 if (unlikely(!afinfo)) 103 rcu_read_unlock(); 104 return afinfo; 105 } 106 107 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 108 { 109 rcu_read_unlock(); 110 } 111 112 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, 113 const xfrm_address_t *saddr, 114 const xfrm_address_t *daddr, 115 int family) 116 { 117 struct xfrm_policy_afinfo *afinfo; 118 struct dst_entry *dst; 119 120 afinfo = xfrm_policy_get_afinfo(family); 121 if (unlikely(afinfo == NULL)) 122 return ERR_PTR(-EAFNOSUPPORT); 123 124 dst = afinfo->dst_lookup(net, tos, saddr, daddr); 125 126 xfrm_policy_put_afinfo(afinfo); 127 128 return dst; 129 } 130 131 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, 132 xfrm_address_t *prev_saddr, 133 xfrm_address_t *prev_daddr, 134 int family) 135 { 136 struct net *net = xs_net(x); 137 xfrm_address_t *saddr = &x->props.saddr; 138 xfrm_address_t *daddr = &x->id.daddr; 139 struct dst_entry *dst; 140 141 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { 142 saddr = x->coaddr; 143 daddr = prev_daddr; 144 } 145 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { 146 saddr = prev_saddr; 147 daddr = x->coaddr; 148 } 149 150 dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); 151 152 if (!IS_ERR(dst)) { 153 if (prev_saddr != saddr) 154 memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); 155 if (prev_daddr != daddr) 156 memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); 157 } 158 159 return dst; 160 } 161 162 static inline unsigned long make_jiffies(long secs) 163 { 164 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 165 return MAX_SCHEDULE_TIMEOUT-1; 166 else 167 return secs*HZ; 168 } 169 170 static void xfrm_policy_timer(unsigned long data) 171 { 172 struct xfrm_policy *xp = (struct xfrm_policy *)data; 173 unsigned long now = get_seconds(); 174 long next = LONG_MAX; 175 int warn = 0; 176 int dir; 177 178 read_lock(&xp->lock); 179 180 if (unlikely(xp->walk.dead)) 181 goto out; 182 183 dir = xfrm_policy_id2dir(xp->index); 184 185 if (xp->lft.hard_add_expires_seconds) { 186 long tmo = xp->lft.hard_add_expires_seconds + 187 xp->curlft.add_time - now; 188 if (tmo <= 0) 189 goto expired; 190 if (tmo < next) 191 next = tmo; 192 } 193 if (xp->lft.hard_use_expires_seconds) { 194 long tmo = xp->lft.hard_use_expires_seconds + 195 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 196 if (tmo <= 0) 197 goto expired; 198 if (tmo < next) 199 next = tmo; 200 } 201 if (xp->lft.soft_add_expires_seconds) { 202 long tmo = xp->lft.soft_add_expires_seconds + 203 xp->curlft.add_time - now; 204 if (tmo <= 0) { 205 warn = 1; 206 tmo = XFRM_KM_TIMEOUT; 207 } 208 if (tmo < next) 209 next = tmo; 210 } 211 if (xp->lft.soft_use_expires_seconds) { 212 long tmo = xp->lft.soft_use_expires_seconds + 213 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 214 if (tmo <= 0) { 215 warn = 1; 216 tmo = XFRM_KM_TIMEOUT; 217 } 218 if (tmo < next) 219 next = tmo; 220 } 221 222 if (warn) 223 km_policy_expired(xp, dir, 0, 0); 224 if (next != LONG_MAX && 225 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 226 xfrm_pol_hold(xp); 227 228 out: 229 read_unlock(&xp->lock); 230 xfrm_pol_put(xp); 231 return; 232 233 expired: 234 read_unlock(&xp->lock); 235 if (!xfrm_policy_delete(xp, dir)) 236 km_policy_expired(xp, dir, 1, 0); 237 xfrm_pol_put(xp); 238 } 239 240 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) 241 { 242 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 243 244 if (unlikely(pol->walk.dead)) 245 flo = NULL; 246 else 247 xfrm_pol_hold(pol); 248 249 return flo; 250 } 251 252 static int xfrm_policy_flo_check(struct flow_cache_object *flo) 253 { 254 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); 255 256 return !pol->walk.dead; 257 } 258 259 static void xfrm_policy_flo_delete(struct flow_cache_object *flo) 260 { 261 xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); 262 } 263 264 static const struct flow_cache_ops xfrm_policy_fc_ops = { 265 .get = xfrm_policy_flo_get, 266 .check = xfrm_policy_flo_check, 267 .delete = xfrm_policy_flo_delete, 268 }; 269 270 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 271 * SPD calls. 272 */ 273 274 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) 275 { 276 struct xfrm_policy *policy; 277 278 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 279 280 if (policy) { 281 write_pnet(&policy->xp_net, net); 282 INIT_LIST_HEAD(&policy->walk.all); 283 INIT_HLIST_NODE(&policy->bydst); 284 INIT_HLIST_NODE(&policy->byidx); 285 rwlock_init(&policy->lock); 286 atomic_set(&policy->refcnt, 1); 287 skb_queue_head_init(&policy->polq.hold_queue); 288 setup_timer(&policy->timer, xfrm_policy_timer, 289 (unsigned long)policy); 290 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, 291 (unsigned long)policy); 292 policy->flo.ops = &xfrm_policy_fc_ops; 293 } 294 return policy; 295 } 296 EXPORT_SYMBOL(xfrm_policy_alloc); 297 298 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 299 300 void xfrm_policy_destroy(struct xfrm_policy *policy) 301 { 302 BUG_ON(!policy->walk.dead); 303 304 if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) 305 BUG(); 306 307 security_xfrm_policy_free(policy->security); 308 kfree(policy); 309 } 310 EXPORT_SYMBOL(xfrm_policy_destroy); 311 312 static void xfrm_queue_purge(struct sk_buff_head *list) 313 { 314 struct sk_buff *skb; 315 316 while ((skb = skb_dequeue(list)) != NULL) 317 kfree_skb(skb); 318 } 319 320 /* Rule must be locked. Release descentant resources, announce 321 * entry dead. The rule must be unlinked from lists to the moment. 322 */ 323 324 static void xfrm_policy_kill(struct xfrm_policy *policy) 325 { 326 policy->walk.dead = 1; 327 328 atomic_inc(&policy->genid); 329 330 if (del_timer(&policy->polq.hold_timer)) 331 xfrm_pol_put(policy); 332 xfrm_queue_purge(&policy->polq.hold_queue); 333 334 if (del_timer(&policy->timer)) 335 xfrm_pol_put(policy); 336 337 xfrm_pol_put(policy); 338 } 339 340 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 341 342 static inline unsigned int idx_hash(struct net *net, u32 index) 343 { 344 return __idx_hash(index, net->xfrm.policy_idx_hmask); 345 } 346 347 static struct hlist_head *policy_hash_bysel(struct net *net, 348 const struct xfrm_selector *sel, 349 unsigned short family, int dir) 350 { 351 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 352 unsigned int hash = __sel_hash(sel, family, hmask); 353 354 return (hash == hmask + 1 ? 355 &net->xfrm.policy_inexact[dir] : 356 net->xfrm.policy_bydst[dir].table + hash); 357 } 358 359 static struct hlist_head *policy_hash_direct(struct net *net, 360 const xfrm_address_t *daddr, 361 const xfrm_address_t *saddr, 362 unsigned short family, int dir) 363 { 364 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 365 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 366 367 return net->xfrm.policy_bydst[dir].table + hash; 368 } 369 370 static void xfrm_dst_hash_transfer(struct hlist_head *list, 371 struct hlist_head *ndsttable, 372 unsigned int nhashmask) 373 { 374 struct hlist_node *tmp, *entry0 = NULL; 375 struct xfrm_policy *pol; 376 unsigned int h0 = 0; 377 378 redo: 379 hlist_for_each_entry_safe(pol, tmp, list, bydst) { 380 unsigned int h; 381 382 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 383 pol->family, nhashmask); 384 if (!entry0) { 385 hlist_del(&pol->bydst); 386 hlist_add_head(&pol->bydst, ndsttable+h); 387 h0 = h; 388 } else { 389 if (h != h0) 390 continue; 391 hlist_del(&pol->bydst); 392 hlist_add_after(entry0, &pol->bydst); 393 } 394 entry0 = &pol->bydst; 395 } 396 if (!hlist_empty(list)) { 397 entry0 = NULL; 398 goto redo; 399 } 400 } 401 402 static void xfrm_idx_hash_transfer(struct hlist_head *list, 403 struct hlist_head *nidxtable, 404 unsigned int nhashmask) 405 { 406 struct hlist_node *tmp; 407 struct xfrm_policy *pol; 408 409 hlist_for_each_entry_safe(pol, tmp, list, byidx) { 410 unsigned int h; 411 412 h = __idx_hash(pol->index, nhashmask); 413 hlist_add_head(&pol->byidx, nidxtable+h); 414 } 415 } 416 417 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) 418 { 419 return ((old_hmask + 1) << 1) - 1; 420 } 421 422 static void xfrm_bydst_resize(struct net *net, int dir) 423 { 424 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 425 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 426 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 427 struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; 428 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 429 int i; 430 431 if (!ndst) 432 return; 433 434 write_lock_bh(&net->xfrm.xfrm_policy_lock); 435 436 for (i = hmask; i >= 0; i--) 437 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 438 439 net->xfrm.policy_bydst[dir].table = ndst; 440 net->xfrm.policy_bydst[dir].hmask = nhashmask; 441 442 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 443 444 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 445 } 446 447 static void xfrm_byidx_resize(struct net *net, int total) 448 { 449 unsigned int hmask = net->xfrm.policy_idx_hmask; 450 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 451 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 452 struct hlist_head *oidx = net->xfrm.policy_byidx; 453 struct hlist_head *nidx = xfrm_hash_alloc(nsize); 454 int i; 455 456 if (!nidx) 457 return; 458 459 write_lock_bh(&net->xfrm.xfrm_policy_lock); 460 461 for (i = hmask; i >= 0; i--) 462 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 463 464 net->xfrm.policy_byidx = nidx; 465 net->xfrm.policy_idx_hmask = nhashmask; 466 467 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 468 469 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 470 } 471 472 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) 473 { 474 unsigned int cnt = net->xfrm.policy_count[dir]; 475 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 476 477 if (total) 478 *total += cnt; 479 480 if ((hmask + 1) < xfrm_policy_hashmax && 481 cnt > hmask) 482 return 1; 483 484 return 0; 485 } 486 487 static inline int xfrm_byidx_should_resize(struct net *net, int total) 488 { 489 unsigned int hmask = net->xfrm.policy_idx_hmask; 490 491 if ((hmask + 1) < xfrm_policy_hashmax && 492 total > hmask) 493 return 1; 494 495 return 0; 496 } 497 498 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) 499 { 500 read_lock_bh(&net->xfrm.xfrm_policy_lock); 501 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; 502 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; 503 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; 504 si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; 505 si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; 506 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 507 si->spdhcnt = net->xfrm.policy_idx_hmask; 508 si->spdhmcnt = xfrm_policy_hashmax; 509 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 510 } 511 EXPORT_SYMBOL(xfrm_spd_getinfo); 512 513 static DEFINE_MUTEX(hash_resize_mutex); 514 static void xfrm_hash_resize(struct work_struct *work) 515 { 516 struct net *net = container_of(work, struct net, xfrm.policy_hash_work); 517 int dir, total; 518 519 mutex_lock(&hash_resize_mutex); 520 521 total = 0; 522 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 523 if (xfrm_bydst_should_resize(net, dir, &total)) 524 xfrm_bydst_resize(net, dir); 525 } 526 if (xfrm_byidx_should_resize(net, total)) 527 xfrm_byidx_resize(net, total); 528 529 mutex_unlock(&hash_resize_mutex); 530 } 531 532 /* Generate new index... KAME seems to generate them ordered by cost 533 * of an absolute inpredictability of ordering of rules. This will not pass. */ 534 static u32 xfrm_gen_index(struct net *net, int dir, u32 index) 535 { 536 static u32 idx_generator; 537 538 for (;;) { 539 struct hlist_head *list; 540 struct xfrm_policy *p; 541 u32 idx; 542 int found; 543 544 if (!index) { 545 idx = (idx_generator | dir); 546 idx_generator += 8; 547 } else { 548 idx = index; 549 index = 0; 550 } 551 552 if (idx == 0) 553 idx = 8; 554 list = net->xfrm.policy_byidx + idx_hash(net, idx); 555 found = 0; 556 hlist_for_each_entry(p, list, byidx) { 557 if (p->index == idx) { 558 found = 1; 559 break; 560 } 561 } 562 if (!found) 563 return idx; 564 } 565 } 566 567 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) 568 { 569 u32 *p1 = (u32 *) s1; 570 u32 *p2 = (u32 *) s2; 571 int len = sizeof(struct xfrm_selector) / sizeof(u32); 572 int i; 573 574 for (i = 0; i < len; i++) { 575 if (p1[i] != p2[i]) 576 return 1; 577 } 578 579 return 0; 580 } 581 582 static void xfrm_policy_requeue(struct xfrm_policy *old, 583 struct xfrm_policy *new) 584 { 585 struct xfrm_policy_queue *pq = &old->polq; 586 struct sk_buff_head list; 587 588 __skb_queue_head_init(&list); 589 590 spin_lock_bh(&pq->hold_queue.lock); 591 skb_queue_splice_init(&pq->hold_queue, &list); 592 if (del_timer(&pq->hold_timer)) 593 xfrm_pol_put(old); 594 spin_unlock_bh(&pq->hold_queue.lock); 595 596 if (skb_queue_empty(&list)) 597 return; 598 599 pq = &new->polq; 600 601 spin_lock_bh(&pq->hold_queue.lock); 602 skb_queue_splice(&list, &pq->hold_queue); 603 pq->timeout = XFRM_QUEUE_TMO_MIN; 604 if (!mod_timer(&pq->hold_timer, jiffies)) 605 xfrm_pol_hold(new); 606 spin_unlock_bh(&pq->hold_queue.lock); 607 } 608 609 static bool xfrm_policy_mark_match(struct xfrm_policy *policy, 610 struct xfrm_policy *pol) 611 { 612 u32 mark = policy->mark.v & policy->mark.m; 613 614 if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) 615 return true; 616 617 if ((mark & pol->mark.m) == pol->mark.v && 618 policy->priority == pol->priority) 619 return true; 620 621 return false; 622 } 623 624 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 625 { 626 struct net *net = xp_net(policy); 627 struct xfrm_policy *pol; 628 struct xfrm_policy *delpol; 629 struct hlist_head *chain; 630 struct hlist_node *newpos; 631 632 write_lock_bh(&net->xfrm.xfrm_policy_lock); 633 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); 634 delpol = NULL; 635 newpos = NULL; 636 hlist_for_each_entry(pol, chain, bydst) { 637 if (pol->type == policy->type && 638 !selector_cmp(&pol->selector, &policy->selector) && 639 xfrm_policy_mark_match(policy, pol) && 640 xfrm_sec_ctx_match(pol->security, policy->security) && 641 !WARN_ON(delpol)) { 642 if (excl) { 643 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 644 return -EEXIST; 645 } 646 delpol = pol; 647 if (policy->priority > pol->priority) 648 continue; 649 } else if (policy->priority >= pol->priority) { 650 newpos = &pol->bydst; 651 continue; 652 } 653 if (delpol) 654 break; 655 } 656 if (newpos) 657 hlist_add_after(newpos, &policy->bydst); 658 else 659 hlist_add_head(&policy->bydst, chain); 660 xfrm_pol_hold(policy); 661 net->xfrm.policy_count[dir]++; 662 atomic_inc(&net->xfrm.flow_cache_genid); 663 664 /* After previous checking, family can either be AF_INET or AF_INET6 */ 665 if (policy->family == AF_INET) 666 rt_genid_bump_ipv4(net); 667 else 668 rt_genid_bump_ipv6(net); 669 670 if (delpol) { 671 xfrm_policy_requeue(delpol, policy); 672 __xfrm_policy_unlink(delpol, dir); 673 } 674 policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); 675 hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); 676 policy->curlft.add_time = get_seconds(); 677 policy->curlft.use_time = 0; 678 if (!mod_timer(&policy->timer, jiffies + HZ)) 679 xfrm_pol_hold(policy); 680 list_add(&policy->walk.all, &net->xfrm.policy_all); 681 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 682 683 if (delpol) 684 xfrm_policy_kill(delpol); 685 else if (xfrm_bydst_should_resize(net, dir, NULL)) 686 schedule_work(&net->xfrm.policy_hash_work); 687 688 return 0; 689 } 690 EXPORT_SYMBOL(xfrm_policy_insert); 691 692 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, 693 int dir, struct xfrm_selector *sel, 694 struct xfrm_sec_ctx *ctx, int delete, 695 int *err) 696 { 697 struct xfrm_policy *pol, *ret; 698 struct hlist_head *chain; 699 700 *err = 0; 701 write_lock_bh(&net->xfrm.xfrm_policy_lock); 702 chain = policy_hash_bysel(net, sel, sel->family, dir); 703 ret = NULL; 704 hlist_for_each_entry(pol, chain, bydst) { 705 if (pol->type == type && 706 (mark & pol->mark.m) == pol->mark.v && 707 !selector_cmp(sel, &pol->selector) && 708 xfrm_sec_ctx_match(ctx, pol->security)) { 709 xfrm_pol_hold(pol); 710 if (delete) { 711 *err = security_xfrm_policy_delete( 712 pol->security); 713 if (*err) { 714 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 715 return pol; 716 } 717 __xfrm_policy_unlink(pol, dir); 718 } 719 ret = pol; 720 break; 721 } 722 } 723 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 724 725 if (ret && delete) 726 xfrm_policy_kill(ret); 727 return ret; 728 } 729 EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 730 731 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, 732 int dir, u32 id, int delete, int *err) 733 { 734 struct xfrm_policy *pol, *ret; 735 struct hlist_head *chain; 736 737 *err = -ENOENT; 738 if (xfrm_policy_id2dir(id) != dir) 739 return NULL; 740 741 *err = 0; 742 write_lock_bh(&net->xfrm.xfrm_policy_lock); 743 chain = net->xfrm.policy_byidx + idx_hash(net, id); 744 ret = NULL; 745 hlist_for_each_entry(pol, chain, byidx) { 746 if (pol->type == type && pol->index == id && 747 (mark & pol->mark.m) == pol->mark.v) { 748 xfrm_pol_hold(pol); 749 if (delete) { 750 *err = security_xfrm_policy_delete( 751 pol->security); 752 if (*err) { 753 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 754 return pol; 755 } 756 __xfrm_policy_unlink(pol, dir); 757 } 758 ret = pol; 759 break; 760 } 761 } 762 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 763 764 if (ret && delete) 765 xfrm_policy_kill(ret); 766 return ret; 767 } 768 EXPORT_SYMBOL(xfrm_policy_byid); 769 770 #ifdef CONFIG_SECURITY_NETWORK_XFRM 771 static inline int 772 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 773 { 774 int dir, err = 0; 775 776 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 777 struct xfrm_policy *pol; 778 int i; 779 780 hlist_for_each_entry(pol, 781 &net->xfrm.policy_inexact[dir], bydst) { 782 if (pol->type != type) 783 continue; 784 err = security_xfrm_policy_delete(pol->security); 785 if (err) { 786 xfrm_audit_policy_delete(pol, 0, 787 audit_info->loginuid, 788 audit_info->sessionid, 789 audit_info->secid); 790 return err; 791 } 792 } 793 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 794 hlist_for_each_entry(pol, 795 net->xfrm.policy_bydst[dir].table + i, 796 bydst) { 797 if (pol->type != type) 798 continue; 799 err = security_xfrm_policy_delete( 800 pol->security); 801 if (err) { 802 xfrm_audit_policy_delete(pol, 0, 803 audit_info->loginuid, 804 audit_info->sessionid, 805 audit_info->secid); 806 return err; 807 } 808 } 809 } 810 } 811 return err; 812 } 813 #else 814 static inline int 815 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) 816 { 817 return 0; 818 } 819 #endif 820 821 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) 822 { 823 int dir, err = 0, cnt = 0; 824 825 write_lock_bh(&net->xfrm.xfrm_policy_lock); 826 827 err = xfrm_policy_flush_secctx_check(net, type, audit_info); 828 if (err) 829 goto out; 830 831 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 832 struct xfrm_policy *pol; 833 int i; 834 835 again1: 836 hlist_for_each_entry(pol, 837 &net->xfrm.policy_inexact[dir], bydst) { 838 if (pol->type != type) 839 continue; 840 __xfrm_policy_unlink(pol, dir); 841 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 842 cnt++; 843 844 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, 845 audit_info->sessionid, 846 audit_info->secid); 847 848 xfrm_policy_kill(pol); 849 850 write_lock_bh(&net->xfrm.xfrm_policy_lock); 851 goto again1; 852 } 853 854 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 855 again2: 856 hlist_for_each_entry(pol, 857 net->xfrm.policy_bydst[dir].table + i, 858 bydst) { 859 if (pol->type != type) 860 continue; 861 __xfrm_policy_unlink(pol, dir); 862 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 863 cnt++; 864 865 xfrm_audit_policy_delete(pol, 1, 866 audit_info->loginuid, 867 audit_info->sessionid, 868 audit_info->secid); 869 xfrm_policy_kill(pol); 870 871 write_lock_bh(&net->xfrm.xfrm_policy_lock); 872 goto again2; 873 } 874 } 875 876 } 877 if (!cnt) 878 err = -ESRCH; 879 out: 880 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 881 return err; 882 } 883 EXPORT_SYMBOL(xfrm_policy_flush); 884 885 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, 886 int (*func)(struct xfrm_policy *, int, int, void*), 887 void *data) 888 { 889 struct xfrm_policy *pol; 890 struct xfrm_policy_walk_entry *x; 891 int error = 0; 892 893 if (walk->type >= XFRM_POLICY_TYPE_MAX && 894 walk->type != XFRM_POLICY_TYPE_ANY) 895 return -EINVAL; 896 897 if (list_empty(&walk->walk.all) && walk->seq != 0) 898 return 0; 899 900 write_lock_bh(&net->xfrm.xfrm_policy_lock); 901 if (list_empty(&walk->walk.all)) 902 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); 903 else 904 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); 905 list_for_each_entry_from(x, &net->xfrm.policy_all, all) { 906 if (x->dead) 907 continue; 908 pol = container_of(x, struct xfrm_policy, walk); 909 if (walk->type != XFRM_POLICY_TYPE_ANY && 910 walk->type != pol->type) 911 continue; 912 error = func(pol, xfrm_policy_id2dir(pol->index), 913 walk->seq, data); 914 if (error) { 915 list_move_tail(&walk->walk.all, &x->all); 916 goto out; 917 } 918 walk->seq++; 919 } 920 if (walk->seq == 0) { 921 error = -ENOENT; 922 goto out; 923 } 924 list_del_init(&walk->walk.all); 925 out: 926 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 927 return error; 928 } 929 EXPORT_SYMBOL(xfrm_policy_walk); 930 931 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) 932 { 933 INIT_LIST_HEAD(&walk->walk.all); 934 walk->walk.dead = 1; 935 walk->type = type; 936 walk->seq = 0; 937 } 938 EXPORT_SYMBOL(xfrm_policy_walk_init); 939 940 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) 941 { 942 if (list_empty(&walk->walk.all)) 943 return; 944 945 write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ 946 list_del(&walk->walk.all); 947 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 948 } 949 EXPORT_SYMBOL(xfrm_policy_walk_done); 950 951 /* 952 * Find policy to apply to this flow. 953 * 954 * Returns 0 if policy found, else an -errno. 955 */ 956 static int xfrm_policy_match(const struct xfrm_policy *pol, 957 const struct flowi *fl, 958 u8 type, u16 family, int dir) 959 { 960 const struct xfrm_selector *sel = &pol->selector; 961 int ret = -ESRCH; 962 bool match; 963 964 if (pol->family != family || 965 (fl->flowi_mark & pol->mark.m) != pol->mark.v || 966 pol->type != type) 967 return ret; 968 969 match = xfrm_selector_match(sel, fl, family); 970 if (match) 971 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, 972 dir); 973 974 return ret; 975 } 976 977 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, 978 const struct flowi *fl, 979 u16 family, u8 dir) 980 { 981 int err; 982 struct xfrm_policy *pol, *ret; 983 const xfrm_address_t *daddr, *saddr; 984 struct hlist_head *chain; 985 u32 priority = ~0U; 986 987 daddr = xfrm_flowi_daddr(fl, family); 988 saddr = xfrm_flowi_saddr(fl, family); 989 if (unlikely(!daddr || !saddr)) 990 return NULL; 991 992 read_lock_bh(&net->xfrm.xfrm_policy_lock); 993 chain = policy_hash_direct(net, daddr, saddr, family, dir); 994 ret = NULL; 995 hlist_for_each_entry(pol, chain, bydst) { 996 err = xfrm_policy_match(pol, fl, type, family, dir); 997 if (err) { 998 if (err == -ESRCH) 999 continue; 1000 else { 1001 ret = ERR_PTR(err); 1002 goto fail; 1003 } 1004 } else { 1005 ret = pol; 1006 priority = ret->priority; 1007 break; 1008 } 1009 } 1010 chain = &net->xfrm.policy_inexact[dir]; 1011 hlist_for_each_entry(pol, chain, bydst) { 1012 err = xfrm_policy_match(pol, fl, type, family, dir); 1013 if (err) { 1014 if (err == -ESRCH) 1015 continue; 1016 else { 1017 ret = ERR_PTR(err); 1018 goto fail; 1019 } 1020 } else if (pol->priority < priority) { 1021 ret = pol; 1022 break; 1023 } 1024 } 1025 if (ret) 1026 xfrm_pol_hold(ret); 1027 fail: 1028 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1029 1030 return ret; 1031 } 1032 1033 static struct xfrm_policy * 1034 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) 1035 { 1036 #ifdef CONFIG_XFRM_SUB_POLICY 1037 struct xfrm_policy *pol; 1038 1039 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 1040 if (pol != NULL) 1041 return pol; 1042 #endif 1043 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1044 } 1045 1046 static int flow_to_policy_dir(int dir) 1047 { 1048 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1049 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1050 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1051 return dir; 1052 1053 switch (dir) { 1054 default: 1055 case FLOW_DIR_IN: 1056 return XFRM_POLICY_IN; 1057 case FLOW_DIR_OUT: 1058 return XFRM_POLICY_OUT; 1059 case FLOW_DIR_FWD: 1060 return XFRM_POLICY_FWD; 1061 } 1062 } 1063 1064 static struct flow_cache_object * 1065 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, 1066 u8 dir, struct flow_cache_object *old_obj, void *ctx) 1067 { 1068 struct xfrm_policy *pol; 1069 1070 if (old_obj) 1071 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 1072 1073 pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); 1074 if (IS_ERR_OR_NULL(pol)) 1075 return ERR_CAST(pol); 1076 1077 /* Resolver returns two references: 1078 * one for cache and one for caller of flow_cache_lookup() */ 1079 xfrm_pol_hold(pol); 1080 1081 return &pol->flo; 1082 } 1083 1084 static inline int policy_to_flow_dir(int dir) 1085 { 1086 if (XFRM_POLICY_IN == FLOW_DIR_IN && 1087 XFRM_POLICY_OUT == FLOW_DIR_OUT && 1088 XFRM_POLICY_FWD == FLOW_DIR_FWD) 1089 return dir; 1090 switch (dir) { 1091 default: 1092 case XFRM_POLICY_IN: 1093 return FLOW_DIR_IN; 1094 case XFRM_POLICY_OUT: 1095 return FLOW_DIR_OUT; 1096 case XFRM_POLICY_FWD: 1097 return FLOW_DIR_FWD; 1098 } 1099 } 1100 1101 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, 1102 const struct flowi *fl) 1103 { 1104 struct xfrm_policy *pol; 1105 struct net *net = sock_net(sk); 1106 1107 read_lock_bh(&net->xfrm.xfrm_policy_lock); 1108 if ((pol = sk->sk_policy[dir]) != NULL) { 1109 bool match = xfrm_selector_match(&pol->selector, fl, 1110 sk->sk_family); 1111 int err = 0; 1112 1113 if (match) { 1114 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1115 pol = NULL; 1116 goto out; 1117 } 1118 err = security_xfrm_policy_lookup(pol->security, 1119 fl->flowi_secid, 1120 policy_to_flow_dir(dir)); 1121 if (!err) 1122 xfrm_pol_hold(pol); 1123 else if (err == -ESRCH) 1124 pol = NULL; 1125 else 1126 pol = ERR_PTR(err); 1127 } else 1128 pol = NULL; 1129 } 1130 out: 1131 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1132 return pol; 1133 } 1134 1135 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1136 { 1137 struct net *net = xp_net(pol); 1138 struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, 1139 pol->family, dir); 1140 1141 list_add(&pol->walk.all, &net->xfrm.policy_all); 1142 hlist_add_head(&pol->bydst, chain); 1143 hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); 1144 net->xfrm.policy_count[dir]++; 1145 xfrm_pol_hold(pol); 1146 1147 if (xfrm_bydst_should_resize(net, dir, NULL)) 1148 schedule_work(&net->xfrm.policy_hash_work); 1149 } 1150 1151 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1152 int dir) 1153 { 1154 struct net *net = xp_net(pol); 1155 1156 if (hlist_unhashed(&pol->bydst)) 1157 return NULL; 1158 1159 hlist_del_init(&pol->bydst); 1160 hlist_del(&pol->byidx); 1161 list_del(&pol->walk.all); 1162 net->xfrm.policy_count[dir]--; 1163 1164 return pol; 1165 } 1166 1167 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1168 { 1169 struct net *net = xp_net(pol); 1170 1171 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1172 pol = __xfrm_policy_unlink(pol, dir); 1173 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1174 if (pol) { 1175 xfrm_policy_kill(pol); 1176 return 0; 1177 } 1178 return -ENOENT; 1179 } 1180 EXPORT_SYMBOL(xfrm_policy_delete); 1181 1182 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1183 { 1184 struct net *net = xp_net(pol); 1185 struct xfrm_policy *old_pol; 1186 1187 #ifdef CONFIG_XFRM_SUB_POLICY 1188 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) 1189 return -EINVAL; 1190 #endif 1191 1192 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1193 old_pol = sk->sk_policy[dir]; 1194 sk->sk_policy[dir] = pol; 1195 if (pol) { 1196 pol->curlft.add_time = get_seconds(); 1197 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); 1198 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1199 } 1200 if (old_pol) { 1201 if (pol) 1202 xfrm_policy_requeue(old_pol, pol); 1203 1204 /* Unlinking succeeds always. This is the only function 1205 * allowed to delete or replace socket policy. 1206 */ 1207 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1208 } 1209 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1210 1211 if (old_pol) { 1212 xfrm_policy_kill(old_pol); 1213 } 1214 return 0; 1215 } 1216 1217 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) 1218 { 1219 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); 1220 struct net *net = xp_net(old); 1221 1222 if (newp) { 1223 newp->selector = old->selector; 1224 if (security_xfrm_policy_clone(old->security, 1225 &newp->security)) { 1226 kfree(newp); 1227 return NULL; /* ENOMEM */ 1228 } 1229 newp->lft = old->lft; 1230 newp->curlft = old->curlft; 1231 newp->mark = old->mark; 1232 newp->action = old->action; 1233 newp->flags = old->flags; 1234 newp->xfrm_nr = old->xfrm_nr; 1235 newp->index = old->index; 1236 newp->type = old->type; 1237 memcpy(newp->xfrm_vec, old->xfrm_vec, 1238 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1239 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1240 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 1241 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1242 xfrm_pol_put(newp); 1243 } 1244 return newp; 1245 } 1246 1247 int __xfrm_sk_clone_policy(struct sock *sk) 1248 { 1249 struct xfrm_policy *p0 = sk->sk_policy[0], 1250 *p1 = sk->sk_policy[1]; 1251 1252 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 1253 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 1254 return -ENOMEM; 1255 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 1256 return -ENOMEM; 1257 return 0; 1258 } 1259 1260 static int 1261 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, 1262 unsigned short family) 1263 { 1264 int err; 1265 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1266 1267 if (unlikely(afinfo == NULL)) 1268 return -EINVAL; 1269 err = afinfo->get_saddr(net, local, remote); 1270 xfrm_policy_put_afinfo(afinfo); 1271 return err; 1272 } 1273 1274 /* Resolve list of templates for the flow, given policy. */ 1275 1276 static int 1277 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, 1278 struct xfrm_state **xfrm, unsigned short family) 1279 { 1280 struct net *net = xp_net(policy); 1281 int nx; 1282 int i, error; 1283 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1284 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1285 xfrm_address_t tmp; 1286 1287 for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { 1288 struct xfrm_state *x; 1289 xfrm_address_t *remote = daddr; 1290 xfrm_address_t *local = saddr; 1291 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1292 1293 if (tmpl->mode == XFRM_MODE_TUNNEL || 1294 tmpl->mode == XFRM_MODE_BEET) { 1295 remote = &tmpl->id.daddr; 1296 local = &tmpl->saddr; 1297 if (xfrm_addr_any(local, tmpl->encap_family)) { 1298 error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); 1299 if (error) 1300 goto fail; 1301 local = &tmp; 1302 } 1303 } 1304 1305 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1306 1307 if (x && x->km.state == XFRM_STATE_VALID) { 1308 xfrm[nx++] = x; 1309 daddr = remote; 1310 saddr = local; 1311 continue; 1312 } 1313 if (x) { 1314 error = (x->km.state == XFRM_STATE_ERROR ? 1315 -EINVAL : -EAGAIN); 1316 xfrm_state_put(x); 1317 } else if (error == -ESRCH) { 1318 error = -EAGAIN; 1319 } 1320 1321 if (!tmpl->optional) 1322 goto fail; 1323 } 1324 return nx; 1325 1326 fail: 1327 for (nx--; nx >= 0; nx--) 1328 xfrm_state_put(xfrm[nx]); 1329 return error; 1330 } 1331 1332 static int 1333 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, 1334 struct xfrm_state **xfrm, unsigned short family) 1335 { 1336 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1337 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1338 int cnx = 0; 1339 int error; 1340 int ret; 1341 int i; 1342 1343 for (i = 0; i < npols; i++) { 1344 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { 1345 error = -ENOBUFS; 1346 goto fail; 1347 } 1348 1349 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); 1350 if (ret < 0) { 1351 error = ret; 1352 goto fail; 1353 } else 1354 cnx += ret; 1355 } 1356 1357 /* found states are sorted for outbound processing */ 1358 if (npols > 1) 1359 xfrm_state_sort(xfrm, tpp, cnx, family); 1360 1361 return cnx; 1362 1363 fail: 1364 for (cnx--; cnx >= 0; cnx--) 1365 xfrm_state_put(tpp[cnx]); 1366 return error; 1367 1368 } 1369 1370 /* Check that the bundle accepts the flow and its components are 1371 * still valid. 1372 */ 1373 1374 static inline int xfrm_get_tos(const struct flowi *fl, int family) 1375 { 1376 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1377 int tos; 1378 1379 if (!afinfo) 1380 return -EINVAL; 1381 1382 tos = afinfo->get_tos(fl); 1383 1384 xfrm_policy_put_afinfo(afinfo); 1385 1386 return tos; 1387 } 1388 1389 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) 1390 { 1391 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1392 struct dst_entry *dst = &xdst->u.dst; 1393 1394 if (xdst->route == NULL) { 1395 /* Dummy bundle - if it has xfrms we were not 1396 * able to build bundle as template resolution failed. 1397 * It means we need to try again resolving. */ 1398 if (xdst->num_xfrms > 0) 1399 return NULL; 1400 } else if (dst->flags & DST_XFRM_QUEUE) { 1401 return NULL; 1402 } else { 1403 /* Real bundle */ 1404 if (stale_bundle(dst)) 1405 return NULL; 1406 } 1407 1408 dst_hold(dst); 1409 return flo; 1410 } 1411 1412 static int xfrm_bundle_flo_check(struct flow_cache_object *flo) 1413 { 1414 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1415 struct dst_entry *dst = &xdst->u.dst; 1416 1417 if (!xdst->route) 1418 return 0; 1419 if (stale_bundle(dst)) 1420 return 0; 1421 1422 return 1; 1423 } 1424 1425 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) 1426 { 1427 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); 1428 struct dst_entry *dst = &xdst->u.dst; 1429 1430 dst_free(dst); 1431 } 1432 1433 static const struct flow_cache_ops xfrm_bundle_fc_ops = { 1434 .get = xfrm_bundle_flo_get, 1435 .check = xfrm_bundle_flo_check, 1436 .delete = xfrm_bundle_flo_delete, 1437 }; 1438 1439 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) 1440 { 1441 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1442 struct dst_ops *dst_ops; 1443 struct xfrm_dst *xdst; 1444 1445 if (!afinfo) 1446 return ERR_PTR(-EINVAL); 1447 1448 switch (family) { 1449 case AF_INET: 1450 dst_ops = &net->xfrm.xfrm4_dst_ops; 1451 break; 1452 #if IS_ENABLED(CONFIG_IPV6) 1453 case AF_INET6: 1454 dst_ops = &net->xfrm.xfrm6_dst_ops; 1455 break; 1456 #endif 1457 default: 1458 BUG(); 1459 } 1460 xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); 1461 1462 if (likely(xdst)) { 1463 struct dst_entry *dst = &xdst->u.dst; 1464 1465 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); 1466 xdst->flo.ops = &xfrm_bundle_fc_ops; 1467 if (afinfo->init_dst) 1468 afinfo->init_dst(net, xdst); 1469 } else 1470 xdst = ERR_PTR(-ENOBUFS); 1471 1472 xfrm_policy_put_afinfo(afinfo); 1473 1474 return xdst; 1475 } 1476 1477 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, 1478 int nfheader_len) 1479 { 1480 struct xfrm_policy_afinfo *afinfo = 1481 xfrm_policy_get_afinfo(dst->ops->family); 1482 int err; 1483 1484 if (!afinfo) 1485 return -EINVAL; 1486 1487 err = afinfo->init_path(path, dst, nfheader_len); 1488 1489 xfrm_policy_put_afinfo(afinfo); 1490 1491 return err; 1492 } 1493 1494 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 1495 const struct flowi *fl) 1496 { 1497 struct xfrm_policy_afinfo *afinfo = 1498 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1499 int err; 1500 1501 if (!afinfo) 1502 return -EINVAL; 1503 1504 err = afinfo->fill_dst(xdst, dev, fl); 1505 1506 xfrm_policy_put_afinfo(afinfo); 1507 1508 return err; 1509 } 1510 1511 1512 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 1513 * all the metrics... Shortly, bundle a bundle. 1514 */ 1515 1516 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1517 struct xfrm_state **xfrm, int nx, 1518 const struct flowi *fl, 1519 struct dst_entry *dst) 1520 { 1521 struct net *net = xp_net(policy); 1522 unsigned long now = jiffies; 1523 struct net_device *dev; 1524 struct xfrm_mode *inner_mode; 1525 struct dst_entry *dst_prev = NULL; 1526 struct dst_entry *dst0 = NULL; 1527 int i = 0; 1528 int err; 1529 int header_len = 0; 1530 int nfheader_len = 0; 1531 int trailer_len = 0; 1532 int tos; 1533 int family = policy->selector.family; 1534 xfrm_address_t saddr, daddr; 1535 1536 xfrm_flowi_addr_get(fl, &saddr, &daddr, family); 1537 1538 tos = xfrm_get_tos(fl, family); 1539 err = tos; 1540 if (tos < 0) 1541 goto put_states; 1542 1543 dst_hold(dst); 1544 1545 for (; i < nx; i++) { 1546 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); 1547 struct dst_entry *dst1 = &xdst->u.dst; 1548 1549 err = PTR_ERR(xdst); 1550 if (IS_ERR(xdst)) { 1551 dst_release(dst); 1552 goto put_states; 1553 } 1554 1555 if (xfrm[i]->sel.family == AF_UNSPEC) { 1556 inner_mode = xfrm_ip2inner_mode(xfrm[i], 1557 xfrm_af2proto(family)); 1558 if (!inner_mode) { 1559 err = -EAFNOSUPPORT; 1560 dst_release(dst); 1561 goto put_states; 1562 } 1563 } else 1564 inner_mode = xfrm[i]->inner_mode; 1565 1566 if (!dst_prev) 1567 dst0 = dst1; 1568 else { 1569 dst_prev->child = dst_clone(dst1); 1570 dst1->flags |= DST_NOHASH; 1571 } 1572 1573 xdst->route = dst; 1574 dst_copy_metrics(dst1, dst); 1575 1576 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 1577 family = xfrm[i]->props.family; 1578 dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, 1579 family); 1580 err = PTR_ERR(dst); 1581 if (IS_ERR(dst)) 1582 goto put_states; 1583 } else 1584 dst_hold(dst); 1585 1586 dst1->xfrm = xfrm[i]; 1587 xdst->xfrm_genid = xfrm[i]->genid; 1588 1589 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1590 dst1->flags |= DST_HOST; 1591 dst1->lastuse = now; 1592 1593 dst1->input = dst_discard; 1594 dst1->output = inner_mode->afinfo->output; 1595 1596 dst1->next = dst_prev; 1597 dst_prev = dst1; 1598 1599 header_len += xfrm[i]->props.header_len; 1600 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1601 nfheader_len += xfrm[i]->props.header_len; 1602 trailer_len += xfrm[i]->props.trailer_len; 1603 } 1604 1605 dst_prev->child = dst; 1606 dst0->path = dst; 1607 1608 err = -ENODEV; 1609 dev = dst->dev; 1610 if (!dev) 1611 goto free_dst; 1612 1613 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1614 xfrm_init_pmtu(dst_prev); 1615 1616 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { 1617 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; 1618 1619 err = xfrm_fill_dst(xdst, dev, fl); 1620 if (err) 1621 goto free_dst; 1622 1623 dst_prev->header_len = header_len; 1624 dst_prev->trailer_len = trailer_len; 1625 header_len -= xdst->u.dst.xfrm->props.header_len; 1626 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1627 } 1628 1629 out: 1630 return dst0; 1631 1632 put_states: 1633 for (; i < nx; i++) 1634 xfrm_state_put(xfrm[i]); 1635 free_dst: 1636 if (dst0) 1637 dst_free(dst0); 1638 dst0 = ERR_PTR(err); 1639 goto out; 1640 } 1641 1642 #ifdef CONFIG_XFRM_SUB_POLICY 1643 static int xfrm_dst_alloc_copy(void **target, const void *src, int size) 1644 { 1645 if (!*target) { 1646 *target = kmalloc(size, GFP_ATOMIC); 1647 if (!*target) 1648 return -ENOMEM; 1649 } 1650 1651 memcpy(*target, src, size); 1652 return 0; 1653 } 1654 #endif 1655 1656 static int xfrm_dst_update_parent(struct dst_entry *dst, 1657 const struct xfrm_selector *sel) 1658 { 1659 #ifdef CONFIG_XFRM_SUB_POLICY 1660 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1661 return xfrm_dst_alloc_copy((void **)&(xdst->partner), 1662 sel, sizeof(*sel)); 1663 #else 1664 return 0; 1665 #endif 1666 } 1667 1668 static int xfrm_dst_update_origin(struct dst_entry *dst, 1669 const struct flowi *fl) 1670 { 1671 #ifdef CONFIG_XFRM_SUB_POLICY 1672 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1673 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); 1674 #else 1675 return 0; 1676 #endif 1677 } 1678 1679 static int xfrm_expand_policies(const struct flowi *fl, u16 family, 1680 struct xfrm_policy **pols, 1681 int *num_pols, int *num_xfrms) 1682 { 1683 int i; 1684 1685 if (*num_pols == 0 || !pols[0]) { 1686 *num_pols = 0; 1687 *num_xfrms = 0; 1688 return 0; 1689 } 1690 if (IS_ERR(pols[0])) 1691 return PTR_ERR(pols[0]); 1692 1693 *num_xfrms = pols[0]->xfrm_nr; 1694 1695 #ifdef CONFIG_XFRM_SUB_POLICY 1696 if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && 1697 pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1698 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), 1699 XFRM_POLICY_TYPE_MAIN, 1700 fl, family, 1701 XFRM_POLICY_OUT); 1702 if (pols[1]) { 1703 if (IS_ERR(pols[1])) { 1704 xfrm_pols_put(pols, *num_pols); 1705 return PTR_ERR(pols[1]); 1706 } 1707 (*num_pols)++; 1708 (*num_xfrms) += pols[1]->xfrm_nr; 1709 } 1710 } 1711 #endif 1712 for (i = 0; i < *num_pols; i++) { 1713 if (pols[i]->action != XFRM_POLICY_ALLOW) { 1714 *num_xfrms = -1; 1715 break; 1716 } 1717 } 1718 1719 return 0; 1720 1721 } 1722 1723 static struct xfrm_dst * 1724 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, 1725 const struct flowi *fl, u16 family, 1726 struct dst_entry *dst_orig) 1727 { 1728 struct net *net = xp_net(pols[0]); 1729 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1730 struct dst_entry *dst; 1731 struct xfrm_dst *xdst; 1732 int err; 1733 1734 /* Try to instantiate a bundle */ 1735 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); 1736 if (err <= 0) { 1737 if (err != 0 && err != -EAGAIN) 1738 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1739 return ERR_PTR(err); 1740 } 1741 1742 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); 1743 if (IS_ERR(dst)) { 1744 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1745 return ERR_CAST(dst); 1746 } 1747 1748 xdst = (struct xfrm_dst *)dst; 1749 xdst->num_xfrms = err; 1750 if (num_pols > 1) 1751 err = xfrm_dst_update_parent(dst, &pols[1]->selector); 1752 else 1753 err = xfrm_dst_update_origin(dst, fl); 1754 if (unlikely(err)) { 1755 dst_free(dst); 1756 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1757 return ERR_PTR(err); 1758 } 1759 1760 xdst->num_pols = num_pols; 1761 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); 1762 xdst->policy_genid = atomic_read(&pols[0]->genid); 1763 1764 return xdst; 1765 } 1766 1767 static void xfrm_policy_queue_process(unsigned long arg) 1768 { 1769 int err = 0; 1770 struct sk_buff *skb; 1771 struct sock *sk; 1772 struct dst_entry *dst; 1773 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1774 struct xfrm_policy_queue *pq = &pol->polq; 1775 struct flowi fl; 1776 struct sk_buff_head list; 1777 1778 spin_lock(&pq->hold_queue.lock); 1779 skb = skb_peek(&pq->hold_queue); 1780 if (!skb) { 1781 spin_unlock(&pq->hold_queue.lock); 1782 goto out; 1783 } 1784 dst = skb_dst(skb); 1785 sk = skb->sk; 1786 xfrm_decode_session(skb, &fl, dst->ops->family); 1787 spin_unlock(&pq->hold_queue.lock); 1788 1789 dst_hold(dst->path); 1790 dst = xfrm_lookup(xp_net(pol), dst->path, &fl, 1791 sk, 0); 1792 if (IS_ERR(dst)) 1793 goto purge_queue; 1794 1795 if (dst->flags & DST_XFRM_QUEUE) { 1796 dst_release(dst); 1797 1798 if (pq->timeout >= XFRM_QUEUE_TMO_MAX) 1799 goto purge_queue; 1800 1801 pq->timeout = pq->timeout << 1; 1802 if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) 1803 xfrm_pol_hold(pol); 1804 goto out; 1805 } 1806 1807 dst_release(dst); 1808 1809 __skb_queue_head_init(&list); 1810 1811 spin_lock(&pq->hold_queue.lock); 1812 pq->timeout = 0; 1813 skb_queue_splice_init(&pq->hold_queue, &list); 1814 spin_unlock(&pq->hold_queue.lock); 1815 1816 while (!skb_queue_empty(&list)) { 1817 skb = __skb_dequeue(&list); 1818 1819 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); 1820 dst_hold(skb_dst(skb)->path); 1821 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, 1822 &fl, skb->sk, 0); 1823 if (IS_ERR(dst)) { 1824 kfree_skb(skb); 1825 continue; 1826 } 1827 1828 nf_reset(skb); 1829 skb_dst_drop(skb); 1830 skb_dst_set(skb, dst); 1831 1832 err = dst_output(skb); 1833 } 1834 1835 out: 1836 xfrm_pol_put(pol); 1837 return; 1838 1839 purge_queue: 1840 pq->timeout = 0; 1841 xfrm_queue_purge(&pq->hold_queue); 1842 xfrm_pol_put(pol); 1843 } 1844 1845 static int xdst_queue_output(struct sk_buff *skb) 1846 { 1847 unsigned long sched_next; 1848 struct dst_entry *dst = skb_dst(skb); 1849 struct xfrm_dst *xdst = (struct xfrm_dst *) dst; 1850 struct xfrm_policy *pol = xdst->pols[0]; 1851 struct xfrm_policy_queue *pq = &pol->polq; 1852 const struct sk_buff *fclone = skb + 1; 1853 1854 if (unlikely(skb->fclone == SKB_FCLONE_ORIG && 1855 fclone->fclone == SKB_FCLONE_CLONE)) { 1856 kfree_skb(skb); 1857 return 0; 1858 } 1859 1860 if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { 1861 kfree_skb(skb); 1862 return -EAGAIN; 1863 } 1864 1865 skb_dst_force(skb); 1866 1867 spin_lock_bh(&pq->hold_queue.lock); 1868 1869 if (!pq->timeout) 1870 pq->timeout = XFRM_QUEUE_TMO_MIN; 1871 1872 sched_next = jiffies + pq->timeout; 1873 1874 if (del_timer(&pq->hold_timer)) { 1875 if (time_before(pq->hold_timer.expires, sched_next)) 1876 sched_next = pq->hold_timer.expires; 1877 xfrm_pol_put(pol); 1878 } 1879 1880 __skb_queue_tail(&pq->hold_queue, skb); 1881 if (!mod_timer(&pq->hold_timer, sched_next)) 1882 xfrm_pol_hold(pol); 1883 1884 spin_unlock_bh(&pq->hold_queue.lock); 1885 1886 return 0; 1887 } 1888 1889 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, 1890 struct dst_entry *dst, 1891 const struct flowi *fl, 1892 int num_xfrms, 1893 u16 family) 1894 { 1895 int err; 1896 struct net_device *dev; 1897 struct dst_entry *dst1; 1898 struct xfrm_dst *xdst; 1899 1900 xdst = xfrm_alloc_dst(net, family); 1901 if (IS_ERR(xdst)) 1902 return xdst; 1903 1904 if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) 1905 return xdst; 1906 1907 dst1 = &xdst->u.dst; 1908 dst_hold(dst); 1909 xdst->route = dst; 1910 1911 dst_copy_metrics(dst1, dst); 1912 1913 dst1->obsolete = DST_OBSOLETE_FORCE_CHK; 1914 dst1->flags |= DST_HOST | DST_XFRM_QUEUE; 1915 dst1->lastuse = jiffies; 1916 1917 dst1->input = dst_discard; 1918 dst1->output = xdst_queue_output; 1919 1920 dst_hold(dst); 1921 dst1->child = dst; 1922 dst1->path = dst; 1923 1924 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); 1925 1926 err = -ENODEV; 1927 dev = dst->dev; 1928 if (!dev) 1929 goto free_dst; 1930 1931 err = xfrm_fill_dst(xdst, dev, fl); 1932 if (err) 1933 goto free_dst; 1934 1935 out: 1936 return xdst; 1937 1938 free_dst: 1939 dst_release(dst1); 1940 xdst = ERR_PTR(err); 1941 goto out; 1942 } 1943 1944 static struct flow_cache_object * 1945 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, 1946 struct flow_cache_object *oldflo, void *ctx) 1947 { 1948 struct dst_entry *dst_orig = (struct dst_entry *)ctx; 1949 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1950 struct xfrm_dst *xdst, *new_xdst; 1951 int num_pols = 0, num_xfrms = 0, i, err, pol_dead; 1952 1953 /* Check if the policies from old bundle are usable */ 1954 xdst = NULL; 1955 if (oldflo) { 1956 xdst = container_of(oldflo, struct xfrm_dst, flo); 1957 num_pols = xdst->num_pols; 1958 num_xfrms = xdst->num_xfrms; 1959 pol_dead = 0; 1960 for (i = 0; i < num_pols; i++) { 1961 pols[i] = xdst->pols[i]; 1962 pol_dead |= pols[i]->walk.dead; 1963 } 1964 if (pol_dead) { 1965 dst_free(&xdst->u.dst); 1966 xdst = NULL; 1967 num_pols = 0; 1968 num_xfrms = 0; 1969 oldflo = NULL; 1970 } 1971 } 1972 1973 /* Resolve policies to use if we couldn't get them from 1974 * previous cache entry */ 1975 if (xdst == NULL) { 1976 num_pols = 1; 1977 pols[0] = __xfrm_policy_lookup(net, fl, family, 1978 flow_to_policy_dir(dir)); 1979 err = xfrm_expand_policies(fl, family, pols, 1980 &num_pols, &num_xfrms); 1981 if (err < 0) 1982 goto inc_error; 1983 if (num_pols == 0) 1984 return NULL; 1985 if (num_xfrms <= 0) 1986 goto make_dummy_bundle; 1987 } 1988 1989 new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); 1990 if (IS_ERR(new_xdst)) { 1991 err = PTR_ERR(new_xdst); 1992 if (err != -EAGAIN) 1993 goto error; 1994 if (oldflo == NULL) 1995 goto make_dummy_bundle; 1996 dst_hold(&xdst->u.dst); 1997 return oldflo; 1998 } else if (new_xdst == NULL) { 1999 num_xfrms = 0; 2000 if (oldflo == NULL) 2001 goto make_dummy_bundle; 2002 xdst->num_xfrms = 0; 2003 dst_hold(&xdst->u.dst); 2004 return oldflo; 2005 } 2006 2007 /* Kill the previous bundle */ 2008 if (xdst) { 2009 /* The policies were stolen for newly generated bundle */ 2010 xdst->num_pols = 0; 2011 dst_free(&xdst->u.dst); 2012 } 2013 2014 /* Flow cache does not have reference, it dst_free()'s, 2015 * but we do need to return one reference for original caller */ 2016 dst_hold(&new_xdst->u.dst); 2017 return &new_xdst->flo; 2018 2019 make_dummy_bundle: 2020 /* We found policies, but there's no bundles to instantiate: 2021 * either because the policy blocks, has no transformations or 2022 * we could not build template (no xfrm_states).*/ 2023 xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); 2024 if (IS_ERR(xdst)) { 2025 xfrm_pols_put(pols, num_pols); 2026 return ERR_CAST(xdst); 2027 } 2028 xdst->num_pols = num_pols; 2029 xdst->num_xfrms = num_xfrms; 2030 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); 2031 2032 dst_hold(&xdst->u.dst); 2033 return &xdst->flo; 2034 2035 inc_error: 2036 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 2037 error: 2038 if (xdst != NULL) 2039 dst_free(&xdst->u.dst); 2040 else 2041 xfrm_pols_put(pols, num_pols); 2042 return ERR_PTR(err); 2043 } 2044 2045 static struct dst_entry *make_blackhole(struct net *net, u16 family, 2046 struct dst_entry *dst_orig) 2047 { 2048 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2049 struct dst_entry *ret; 2050 2051 if (!afinfo) { 2052 dst_release(dst_orig); 2053 return ERR_PTR(-EINVAL); 2054 } else { 2055 ret = afinfo->blackhole_route(net, dst_orig); 2056 } 2057 xfrm_policy_put_afinfo(afinfo); 2058 2059 return ret; 2060 } 2061 2062 /* Main function: finds/creates a bundle for given flow. 2063 * 2064 * At the moment we eat a raw IP route. Mostly to speed up lookups 2065 * on interfaces with disabled IPsec. 2066 */ 2067 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 2068 const struct flowi *fl, 2069 struct sock *sk, int flags) 2070 { 2071 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2072 struct flow_cache_object *flo; 2073 struct xfrm_dst *xdst; 2074 struct dst_entry *dst, *route; 2075 u16 family = dst_orig->ops->family; 2076 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 2077 int i, err, num_pols, num_xfrms = 0, drop_pols = 0; 2078 2079 dst = NULL; 2080 xdst = NULL; 2081 route = NULL; 2082 2083 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 2084 num_pols = 1; 2085 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 2086 err = xfrm_expand_policies(fl, family, pols, 2087 &num_pols, &num_xfrms); 2088 if (err < 0) 2089 goto dropdst; 2090 2091 if (num_pols) { 2092 if (num_xfrms <= 0) { 2093 drop_pols = num_pols; 2094 goto no_transform; 2095 } 2096 2097 xdst = xfrm_resolve_and_create_bundle( 2098 pols, num_pols, fl, 2099 family, dst_orig); 2100 if (IS_ERR(xdst)) { 2101 xfrm_pols_put(pols, num_pols); 2102 err = PTR_ERR(xdst); 2103 goto dropdst; 2104 } else if (xdst == NULL) { 2105 num_xfrms = 0; 2106 drop_pols = num_pols; 2107 goto no_transform; 2108 } 2109 2110 route = xdst->route; 2111 } 2112 } 2113 2114 if (xdst == NULL) { 2115 /* To accelerate a bit... */ 2116 if ((dst_orig->flags & DST_NOXFRM) || 2117 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 2118 goto nopol; 2119 2120 flo = flow_cache_lookup(net, fl, family, dir, 2121 xfrm_bundle_lookup, dst_orig); 2122 if (flo == NULL) 2123 goto nopol; 2124 if (IS_ERR(flo)) { 2125 err = PTR_ERR(flo); 2126 goto dropdst; 2127 } 2128 xdst = container_of(flo, struct xfrm_dst, flo); 2129 2130 num_pols = xdst->num_pols; 2131 num_xfrms = xdst->num_xfrms; 2132 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); 2133 route = xdst->route; 2134 } 2135 2136 dst = &xdst->u.dst; 2137 if (route == NULL && num_xfrms > 0) { 2138 /* The only case when xfrm_bundle_lookup() returns a 2139 * bundle with null route, is when the template could 2140 * not be resolved. It means policies are there, but 2141 * bundle could not be created, since we don't yet 2142 * have the xfrm_state's. We need to wait for KM to 2143 * negotiate new SA's or bail out with error.*/ 2144 if (net->xfrm.sysctl_larval_drop) { 2145 dst_release(dst); 2146 xfrm_pols_put(pols, drop_pols); 2147 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2148 2149 return make_blackhole(net, family, dst_orig); 2150 } 2151 2152 err = -EAGAIN; 2153 2154 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2155 goto error; 2156 } 2157 2158 no_transform: 2159 if (num_pols == 0) 2160 goto nopol; 2161 2162 if ((flags & XFRM_LOOKUP_ICMP) && 2163 !(pols[0]->flags & XFRM_POLICY_ICMP)) { 2164 err = -ENOENT; 2165 goto error; 2166 } 2167 2168 for (i = 0; i < num_pols; i++) 2169 pols[i]->curlft.use_time = get_seconds(); 2170 2171 if (num_xfrms < 0) { 2172 /* Prohibit the flow */ 2173 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); 2174 err = -EPERM; 2175 goto error; 2176 } else if (num_xfrms > 0) { 2177 /* Flow transformed */ 2178 dst_release(dst_orig); 2179 } else { 2180 /* Flow passes untransformed */ 2181 dst_release(dst); 2182 dst = dst_orig; 2183 } 2184 ok: 2185 xfrm_pols_put(pols, drop_pols); 2186 if (dst && dst->xfrm && 2187 dst->xfrm->props.mode == XFRM_MODE_TUNNEL) 2188 dst->flags |= DST_XFRM_TUNNEL; 2189 return dst; 2190 2191 nopol: 2192 if (!(flags & XFRM_LOOKUP_ICMP)) { 2193 dst = dst_orig; 2194 goto ok; 2195 } 2196 err = -ENOENT; 2197 error: 2198 dst_release(dst); 2199 dropdst: 2200 dst_release(dst_orig); 2201 xfrm_pols_put(pols, drop_pols); 2202 return ERR_PTR(err); 2203 } 2204 EXPORT_SYMBOL(xfrm_lookup); 2205 2206 static inline int 2207 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) 2208 { 2209 struct xfrm_state *x; 2210 2211 if (!skb->sp || idx < 0 || idx >= skb->sp->len) 2212 return 0; 2213 x = skb->sp->xvec[idx]; 2214 if (!x->type->reject) 2215 return 0; 2216 return x->type->reject(x, skb, fl); 2217 } 2218 2219 /* When skb is transformed back to its "native" form, we have to 2220 * check policy restrictions. At the moment we make this in maximally 2221 * stupid way. Shame on me. :-) Of course, connected sockets must 2222 * have policy cached at them. 2223 */ 2224 2225 static inline int 2226 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, 2227 unsigned short family) 2228 { 2229 if (xfrm_state_kern(x)) 2230 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); 2231 return x->id.proto == tmpl->id.proto && 2232 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 2233 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 2234 x->props.mode == tmpl->mode && 2235 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || 2236 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && 2237 !(x->props.mode != XFRM_MODE_TRANSPORT && 2238 xfrm_state_addr_cmp(tmpl, x, family)); 2239 } 2240 2241 /* 2242 * 0 or more than 0 is returned when validation is succeeded (either bypass 2243 * because of optional transport mode, or next index of the mathced secpath 2244 * state with the template. 2245 * -1 is returned when no matching template is found. 2246 * Otherwise "-2 - errored_index" is returned. 2247 */ 2248 static inline int 2249 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, 2250 unsigned short family) 2251 { 2252 int idx = start; 2253 2254 if (tmpl->optional) { 2255 if (tmpl->mode == XFRM_MODE_TRANSPORT) 2256 return start; 2257 } else 2258 start = -1; 2259 for (; idx < sp->len; idx++) { 2260 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 2261 return ++idx; 2262 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { 2263 if (start == -1) 2264 start = -2-idx; 2265 break; 2266 } 2267 } 2268 return start; 2269 } 2270 2271 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, 2272 unsigned int family, int reverse) 2273 { 2274 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 2275 int err; 2276 2277 if (unlikely(afinfo == NULL)) 2278 return -EAFNOSUPPORT; 2279 2280 afinfo->decode_session(skb, fl, reverse); 2281 err = security_xfrm_decode_session(skb, &fl->flowi_secid); 2282 xfrm_policy_put_afinfo(afinfo); 2283 return err; 2284 } 2285 EXPORT_SYMBOL(__xfrm_decode_session); 2286 2287 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) 2288 { 2289 for (; k < sp->len; k++) { 2290 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 2291 *idxp = k; 2292 return 1; 2293 } 2294 } 2295 2296 return 0; 2297 } 2298 2299 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 2300 unsigned short family) 2301 { 2302 struct net *net = dev_net(skb->dev); 2303 struct xfrm_policy *pol; 2304 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2305 int npols = 0; 2306 int xfrm_nr; 2307 int pi; 2308 int reverse; 2309 struct flowi fl; 2310 u8 fl_dir; 2311 int xerr_idx = -1; 2312 2313 reverse = dir & ~XFRM_POLICY_MASK; 2314 dir &= XFRM_POLICY_MASK; 2315 fl_dir = policy_to_flow_dir(dir); 2316 2317 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { 2318 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 2319 return 0; 2320 } 2321 2322 nf_nat_decode_session(skb, &fl, family); 2323 2324 /* First, check used SA against their selectors. */ 2325 if (skb->sp) { 2326 int i; 2327 2328 for (i = skb->sp->len-1; i >= 0; i--) { 2329 struct xfrm_state *x = skb->sp->xvec[i]; 2330 if (!xfrm_selector_match(&x->sel, &fl, family)) { 2331 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 2332 return 0; 2333 } 2334 } 2335 } 2336 2337 pol = NULL; 2338 if (sk && sk->sk_policy[dir]) { 2339 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 2340 if (IS_ERR(pol)) { 2341 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2342 return 0; 2343 } 2344 } 2345 2346 if (!pol) { 2347 struct flow_cache_object *flo; 2348 2349 flo = flow_cache_lookup(net, &fl, family, fl_dir, 2350 xfrm_policy_lookup, NULL); 2351 if (IS_ERR_OR_NULL(flo)) 2352 pol = ERR_CAST(flo); 2353 else 2354 pol = container_of(flo, struct xfrm_policy, flo); 2355 } 2356 2357 if (IS_ERR(pol)) { 2358 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2359 return 0; 2360 } 2361 2362 if (!pol) { 2363 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 2364 xfrm_secpath_reject(xerr_idx, skb, &fl); 2365 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); 2366 return 0; 2367 } 2368 return 1; 2369 } 2370 2371 pol->curlft.use_time = get_seconds(); 2372 2373 pols[0] = pol; 2374 npols++; 2375 #ifdef CONFIG_XFRM_SUB_POLICY 2376 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 2377 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, 2378 &fl, family, 2379 XFRM_POLICY_IN); 2380 if (pols[1]) { 2381 if (IS_ERR(pols[1])) { 2382 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2383 return 0; 2384 } 2385 pols[1]->curlft.use_time = get_seconds(); 2386 npols++; 2387 } 2388 } 2389 #endif 2390 2391 if (pol->action == XFRM_POLICY_ALLOW) { 2392 struct sec_path *sp; 2393 static struct sec_path dummy; 2394 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; 2395 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; 2396 struct xfrm_tmpl **tpp = tp; 2397 int ti = 0; 2398 int i, k; 2399 2400 if ((sp = skb->sp) == NULL) 2401 sp = &dummy; 2402 2403 for (pi = 0; pi < npols; pi++) { 2404 if (pols[pi] != pol && 2405 pols[pi]->action != XFRM_POLICY_ALLOW) { 2406 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2407 goto reject; 2408 } 2409 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { 2410 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 2411 goto reject_error; 2412 } 2413 for (i = 0; i < pols[pi]->xfrm_nr; i++) 2414 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 2415 } 2416 xfrm_nr = ti; 2417 if (npols > 1) { 2418 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); 2419 tpp = stp; 2420 } 2421 2422 /* For each tunnel xfrm, find the first matching tmpl. 2423 * For each tmpl before that, find corresponding xfrm. 2424 * Order is _important_. Later we will implement 2425 * some barriers, but at the moment barriers 2426 * are implied between each two transformations. 2427 */ 2428 for (i = xfrm_nr-1, k = 0; i >= 0; i--) { 2429 k = xfrm_policy_ok(tpp[i], sp, k, family); 2430 if (k < 0) { 2431 if (k < -1) 2432 /* "-2 - errored_index" returned */ 2433 xerr_idx = -(2+k); 2434 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2435 goto reject; 2436 } 2437 } 2438 2439 if (secpath_has_nontransport(sp, k, &xerr_idx)) { 2440 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); 2441 goto reject; 2442 } 2443 2444 xfrm_pols_put(pols, npols); 2445 return 1; 2446 } 2447 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); 2448 2449 reject: 2450 xfrm_secpath_reject(xerr_idx, skb, &fl); 2451 reject_error: 2452 xfrm_pols_put(pols, npols); 2453 return 0; 2454 } 2455 EXPORT_SYMBOL(__xfrm_policy_check); 2456 2457 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 2458 { 2459 struct net *net = dev_net(skb->dev); 2460 struct flowi fl; 2461 struct dst_entry *dst; 2462 int res = 1; 2463 2464 if (xfrm_decode_session(skb, &fl, family) < 0) { 2465 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); 2466 return 0; 2467 } 2468 2469 skb_dst_force(skb); 2470 2471 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); 2472 if (IS_ERR(dst)) { 2473 res = 0; 2474 dst = NULL; 2475 } 2476 skb_dst_set(skb, dst); 2477 return res; 2478 } 2479 EXPORT_SYMBOL(__xfrm_route_forward); 2480 2481 /* Optimize later using cookies and generation ids. */ 2482 2483 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 2484 { 2485 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete 2486 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to 2487 * get validated by dst_ops->check on every use. We do this 2488 * because when a normal route referenced by an XFRM dst is 2489 * obsoleted we do not go looking around for all parent 2490 * referencing XFRM dsts so that we can invalidate them. It 2491 * is just too much work. Instead we make the checks here on 2492 * every use. For example: 2493 * 2494 * XFRM dst A --> IPv4 dst X 2495 * 2496 * X is the "xdst->route" of A (X is also the "dst->path" of A 2497 * in this example). If X is marked obsolete, "A" will not 2498 * notice. That's what we are validating here via the 2499 * stale_bundle() check. 2500 * 2501 * When a policy's bundle is pruned, we dst_free() the XFRM 2502 * dst which causes it's ->obsolete field to be set to 2503 * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like 2504 * this, we want to force a new route lookup. 2505 */ 2506 if (dst->obsolete < 0 && !stale_bundle(dst)) 2507 return dst; 2508 2509 return NULL; 2510 } 2511 2512 static int stale_bundle(struct dst_entry *dst) 2513 { 2514 return !xfrm_bundle_ok((struct xfrm_dst *)dst); 2515 } 2516 2517 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2518 { 2519 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2520 dst->dev = dev_net(dev)->loopback_dev; 2521 dev_hold(dst->dev); 2522 dev_put(dev); 2523 } 2524 } 2525 EXPORT_SYMBOL(xfrm_dst_ifdown); 2526 2527 static void xfrm_link_failure(struct sk_buff *skb) 2528 { 2529 /* Impossible. Such dst must be popped before reaches point of failure. */ 2530 } 2531 2532 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2533 { 2534 if (dst) { 2535 if (dst->obsolete) { 2536 dst_release(dst); 2537 dst = NULL; 2538 } 2539 } 2540 return dst; 2541 } 2542 2543 void xfrm_garbage_collect(struct net *net) 2544 { 2545 flow_cache_flush(net); 2546 } 2547 EXPORT_SYMBOL(xfrm_garbage_collect); 2548 2549 static void xfrm_garbage_collect_deferred(struct net *net) 2550 { 2551 flow_cache_flush_deferred(net); 2552 } 2553 2554 static void xfrm_init_pmtu(struct dst_entry *dst) 2555 { 2556 do { 2557 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2558 u32 pmtu, route_mtu_cached; 2559 2560 pmtu = dst_mtu(dst->child); 2561 xdst->child_mtu_cached = pmtu; 2562 2563 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2564 2565 route_mtu_cached = dst_mtu(xdst->route); 2566 xdst->route_mtu_cached = route_mtu_cached; 2567 2568 if (pmtu > route_mtu_cached) 2569 pmtu = route_mtu_cached; 2570 2571 dst_metric_set(dst, RTAX_MTU, pmtu); 2572 } while ((dst = dst->next)); 2573 } 2574 2575 /* Check that the bundle accepts the flow and its components are 2576 * still valid. 2577 */ 2578 2579 static int xfrm_bundle_ok(struct xfrm_dst *first) 2580 { 2581 struct dst_entry *dst = &first->u.dst; 2582 struct xfrm_dst *last; 2583 u32 mtu; 2584 2585 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2586 (dst->dev && !netif_running(dst->dev))) 2587 return 0; 2588 2589 if (dst->flags & DST_XFRM_QUEUE) 2590 return 1; 2591 2592 last = NULL; 2593 2594 do { 2595 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2596 2597 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2598 return 0; 2599 if (xdst->xfrm_genid != dst->xfrm->genid) 2600 return 0; 2601 if (xdst->num_pols > 0 && 2602 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2603 return 0; 2604 2605 mtu = dst_mtu(dst->child); 2606 if (xdst->child_mtu_cached != mtu) { 2607 last = xdst; 2608 xdst->child_mtu_cached = mtu; 2609 } 2610 2611 if (!dst_check(xdst->route, xdst->route_cookie)) 2612 return 0; 2613 mtu = dst_mtu(xdst->route); 2614 if (xdst->route_mtu_cached != mtu) { 2615 last = xdst; 2616 xdst->route_mtu_cached = mtu; 2617 } 2618 2619 dst = dst->child; 2620 } while (dst->xfrm); 2621 2622 if (likely(!last)) 2623 return 1; 2624 2625 mtu = last->child_mtu_cached; 2626 for (;;) { 2627 dst = &last->u.dst; 2628 2629 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2630 if (mtu > last->route_mtu_cached) 2631 mtu = last->route_mtu_cached; 2632 dst_metric_set(dst, RTAX_MTU, mtu); 2633 2634 if (last == first) 2635 break; 2636 2637 last = (struct xfrm_dst *)last->u.dst.next; 2638 last->child_mtu_cached = mtu; 2639 } 2640 2641 return 1; 2642 } 2643 2644 static unsigned int xfrm_default_advmss(const struct dst_entry *dst) 2645 { 2646 return dst_metric_advmss(dst->path); 2647 } 2648 2649 static unsigned int xfrm_mtu(const struct dst_entry *dst) 2650 { 2651 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2652 2653 return mtu ? : dst_mtu(dst->path); 2654 } 2655 2656 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, 2657 struct sk_buff *skb, 2658 const void *daddr) 2659 { 2660 return dst->path->ops->neigh_lookup(dst, skb, daddr); 2661 } 2662 2663 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2664 { 2665 struct net *net; 2666 int err = 0; 2667 if (unlikely(afinfo == NULL)) 2668 return -EINVAL; 2669 if (unlikely(afinfo->family >= NPROTO)) 2670 return -EAFNOSUPPORT; 2671 spin_lock(&xfrm_policy_afinfo_lock); 2672 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2673 err = -ENOBUFS; 2674 else { 2675 struct dst_ops *dst_ops = afinfo->dst_ops; 2676 if (likely(dst_ops->kmem_cachep == NULL)) 2677 dst_ops->kmem_cachep = xfrm_dst_cache; 2678 if (likely(dst_ops->check == NULL)) 2679 dst_ops->check = xfrm_dst_check; 2680 if (likely(dst_ops->default_advmss == NULL)) 2681 dst_ops->default_advmss = xfrm_default_advmss; 2682 if (likely(dst_ops->mtu == NULL)) 2683 dst_ops->mtu = xfrm_mtu; 2684 if (likely(dst_ops->negative_advice == NULL)) 2685 dst_ops->negative_advice = xfrm_negative_advice; 2686 if (likely(dst_ops->link_failure == NULL)) 2687 dst_ops->link_failure = xfrm_link_failure; 2688 if (likely(dst_ops->neigh_lookup == NULL)) 2689 dst_ops->neigh_lookup = xfrm_neigh_lookup; 2690 if (likely(afinfo->garbage_collect == NULL)) 2691 afinfo->garbage_collect = xfrm_garbage_collect_deferred; 2692 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); 2693 } 2694 spin_unlock(&xfrm_policy_afinfo_lock); 2695 2696 rtnl_lock(); 2697 for_each_net(net) { 2698 struct dst_ops *xfrm_dst_ops; 2699 2700 switch (afinfo->family) { 2701 case AF_INET: 2702 xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; 2703 break; 2704 #if IS_ENABLED(CONFIG_IPV6) 2705 case AF_INET6: 2706 xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; 2707 break; 2708 #endif 2709 default: 2710 BUG(); 2711 } 2712 *xfrm_dst_ops = *afinfo->dst_ops; 2713 } 2714 rtnl_unlock(); 2715 2716 return err; 2717 } 2718 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 2719 2720 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 2721 { 2722 int err = 0; 2723 if (unlikely(afinfo == NULL)) 2724 return -EINVAL; 2725 if (unlikely(afinfo->family >= NPROTO)) 2726 return -EAFNOSUPPORT; 2727 spin_lock(&xfrm_policy_afinfo_lock); 2728 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 2729 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 2730 err = -EINVAL; 2731 else 2732 RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], 2733 NULL); 2734 } 2735 spin_unlock(&xfrm_policy_afinfo_lock); 2736 if (!err) { 2737 struct dst_ops *dst_ops = afinfo->dst_ops; 2738 2739 synchronize_rcu(); 2740 2741 dst_ops->kmem_cachep = NULL; 2742 dst_ops->check = NULL; 2743 dst_ops->negative_advice = NULL; 2744 dst_ops->link_failure = NULL; 2745 afinfo->garbage_collect = NULL; 2746 } 2747 return err; 2748 } 2749 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 2750 2751 static void __net_init xfrm_dst_ops_init(struct net *net) 2752 { 2753 struct xfrm_policy_afinfo *afinfo; 2754 2755 rcu_read_lock(); 2756 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); 2757 if (afinfo) 2758 net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; 2759 #if IS_ENABLED(CONFIG_IPV6) 2760 afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); 2761 if (afinfo) 2762 net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; 2763 #endif 2764 rcu_read_unlock(); 2765 } 2766 2767 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2768 { 2769 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2770 2771 switch (event) { 2772 case NETDEV_DOWN: 2773 xfrm_garbage_collect(dev_net(dev)); 2774 } 2775 return NOTIFY_DONE; 2776 } 2777 2778 static struct notifier_block xfrm_dev_notifier = { 2779 .notifier_call = xfrm_dev_event, 2780 }; 2781 2782 #ifdef CONFIG_XFRM_STATISTICS 2783 static int __net_init xfrm_statistics_init(struct net *net) 2784 { 2785 int rv; 2786 2787 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, 2788 sizeof(struct linux_xfrm_mib), 2789 __alignof__(struct linux_xfrm_mib)) < 0) 2790 return -ENOMEM; 2791 rv = xfrm_proc_init(net); 2792 if (rv < 0) 2793 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2794 return rv; 2795 } 2796 2797 static void xfrm_statistics_fini(struct net *net) 2798 { 2799 xfrm_proc_fini(net); 2800 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); 2801 } 2802 #else 2803 static int __net_init xfrm_statistics_init(struct net *net) 2804 { 2805 return 0; 2806 } 2807 2808 static void xfrm_statistics_fini(struct net *net) 2809 { 2810 } 2811 #endif 2812 2813 static int __net_init xfrm_policy_init(struct net *net) 2814 { 2815 unsigned int hmask, sz; 2816 int dir; 2817 2818 if (net_eq(net, &init_net)) 2819 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2820 sizeof(struct xfrm_dst), 2821 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2822 NULL); 2823 2824 hmask = 8 - 1; 2825 sz = (hmask+1) * sizeof(struct hlist_head); 2826 2827 net->xfrm.policy_byidx = xfrm_hash_alloc(sz); 2828 if (!net->xfrm.policy_byidx) 2829 goto out_byidx; 2830 net->xfrm.policy_idx_hmask = hmask; 2831 2832 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2833 struct xfrm_policy_hash *htab; 2834 2835 net->xfrm.policy_count[dir] = 0; 2836 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 2837 2838 htab = &net->xfrm.policy_bydst[dir]; 2839 htab->table = xfrm_hash_alloc(sz); 2840 if (!htab->table) 2841 goto out_bydst; 2842 htab->hmask = hmask; 2843 } 2844 2845 INIT_LIST_HEAD(&net->xfrm.policy_all); 2846 INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); 2847 if (net_eq(net, &init_net)) 2848 register_netdevice_notifier(&xfrm_dev_notifier); 2849 return 0; 2850 2851 out_bydst: 2852 for (dir--; dir >= 0; dir--) { 2853 struct xfrm_policy_hash *htab; 2854 2855 htab = &net->xfrm.policy_bydst[dir]; 2856 xfrm_hash_free(htab->table, sz); 2857 } 2858 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2859 out_byidx: 2860 return -ENOMEM; 2861 } 2862 2863 static void xfrm_policy_fini(struct net *net) 2864 { 2865 struct xfrm_audit audit_info; 2866 unsigned int sz; 2867 int dir; 2868 2869 flush_work(&net->xfrm.policy_hash_work); 2870 #ifdef CONFIG_XFRM_SUB_POLICY 2871 audit_info.loginuid = INVALID_UID; 2872 audit_info.sessionid = (unsigned int)-1; 2873 audit_info.secid = 0; 2874 xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); 2875 #endif 2876 audit_info.loginuid = INVALID_UID; 2877 audit_info.sessionid = (unsigned int)-1; 2878 audit_info.secid = 0; 2879 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); 2880 2881 WARN_ON(!list_empty(&net->xfrm.policy_all)); 2882 2883 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2884 struct xfrm_policy_hash *htab; 2885 2886 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); 2887 2888 htab = &net->xfrm.policy_bydst[dir]; 2889 sz = (htab->hmask + 1) * sizeof(struct hlist_head); 2890 WARN_ON(!hlist_empty(htab->table)); 2891 xfrm_hash_free(htab->table, sz); 2892 } 2893 2894 sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); 2895 WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); 2896 xfrm_hash_free(net->xfrm.policy_byidx, sz); 2897 } 2898 2899 static int __net_init xfrm_net_init(struct net *net) 2900 { 2901 int rv; 2902 2903 rv = xfrm_statistics_init(net); 2904 if (rv < 0) 2905 goto out_statistics; 2906 rv = xfrm_state_init(net); 2907 if (rv < 0) 2908 goto out_state; 2909 rv = xfrm_policy_init(net); 2910 if (rv < 0) 2911 goto out_policy; 2912 xfrm_dst_ops_init(net); 2913 rv = xfrm_sysctl_init(net); 2914 if (rv < 0) 2915 goto out_sysctl; 2916 rv = flow_cache_init(net); 2917 if (rv < 0) 2918 goto out; 2919 2920 /* Initialize the per-net locks here */ 2921 spin_lock_init(&net->xfrm.xfrm_state_lock); 2922 rwlock_init(&net->xfrm.xfrm_policy_lock); 2923 mutex_init(&net->xfrm.xfrm_cfg_mutex); 2924 2925 return 0; 2926 2927 out: 2928 xfrm_sysctl_fini(net); 2929 out_sysctl: 2930 xfrm_policy_fini(net); 2931 out_policy: 2932 xfrm_state_fini(net); 2933 out_state: 2934 xfrm_statistics_fini(net); 2935 out_statistics: 2936 return rv; 2937 } 2938 2939 static void __net_exit xfrm_net_exit(struct net *net) 2940 { 2941 flow_cache_fini(net); 2942 xfrm_sysctl_fini(net); 2943 xfrm_policy_fini(net); 2944 xfrm_state_fini(net); 2945 xfrm_statistics_fini(net); 2946 } 2947 2948 static struct pernet_operations __net_initdata xfrm_net_ops = { 2949 .init = xfrm_net_init, 2950 .exit = xfrm_net_exit, 2951 }; 2952 2953 void __init xfrm_init(void) 2954 { 2955 register_pernet_subsys(&xfrm_net_ops); 2956 xfrm_input_init(); 2957 } 2958 2959 #ifdef CONFIG_AUDITSYSCALL 2960 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2961 struct audit_buffer *audit_buf) 2962 { 2963 struct xfrm_sec_ctx *ctx = xp->security; 2964 struct xfrm_selector *sel = &xp->selector; 2965 2966 if (ctx) 2967 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2968 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 2969 2970 switch (sel->family) { 2971 case AF_INET: 2972 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); 2973 if (sel->prefixlen_s != 32) 2974 audit_log_format(audit_buf, " src_prefixlen=%d", 2975 sel->prefixlen_s); 2976 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); 2977 if (sel->prefixlen_d != 32) 2978 audit_log_format(audit_buf, " dst_prefixlen=%d", 2979 sel->prefixlen_d); 2980 break; 2981 case AF_INET6: 2982 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); 2983 if (sel->prefixlen_s != 128) 2984 audit_log_format(audit_buf, " src_prefixlen=%d", 2985 sel->prefixlen_s); 2986 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); 2987 if (sel->prefixlen_d != 128) 2988 audit_log_format(audit_buf, " dst_prefixlen=%d", 2989 sel->prefixlen_d); 2990 break; 2991 } 2992 } 2993 2994 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, 2995 kuid_t auid, unsigned int sessionid, u32 secid) 2996 { 2997 struct audit_buffer *audit_buf; 2998 2999 audit_buf = xfrm_audit_start("SPD-add"); 3000 if (audit_buf == NULL) 3001 return; 3002 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 3003 audit_log_format(audit_buf, " res=%u", result); 3004 xfrm_audit_common_policyinfo(xp, audit_buf); 3005 audit_log_end(audit_buf); 3006 } 3007 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 3008 3009 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, 3010 kuid_t auid, unsigned int sessionid, u32 secid) 3011 { 3012 struct audit_buffer *audit_buf; 3013 3014 audit_buf = xfrm_audit_start("SPD-delete"); 3015 if (audit_buf == NULL) 3016 return; 3017 xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); 3018 audit_log_format(audit_buf, " res=%u", result); 3019 xfrm_audit_common_policyinfo(xp, audit_buf); 3020 audit_log_end(audit_buf); 3021 } 3022 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); 3023 #endif 3024 3025 #ifdef CONFIG_XFRM_MIGRATE 3026 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, 3027 const struct xfrm_selector *sel_tgt) 3028 { 3029 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 3030 if (sel_tgt->family == sel_cmp->family && 3031 xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, 3032 sel_cmp->family) && 3033 xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, 3034 sel_cmp->family) && 3035 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && 3036 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { 3037 return true; 3038 } 3039 } else { 3040 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { 3041 return true; 3042 } 3043 } 3044 return false; 3045 } 3046 3047 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, 3048 u8 dir, u8 type, struct net *net) 3049 { 3050 struct xfrm_policy *pol, *ret = NULL; 3051 struct hlist_head *chain; 3052 u32 priority = ~0U; 3053 3054 read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ 3055 chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); 3056 hlist_for_each_entry(pol, chain, bydst) { 3057 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3058 pol->type == type) { 3059 ret = pol; 3060 priority = ret->priority; 3061 break; 3062 } 3063 } 3064 chain = &net->xfrm.policy_inexact[dir]; 3065 hlist_for_each_entry(pol, chain, bydst) { 3066 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3067 pol->type == type && 3068 pol->priority < priority) { 3069 ret = pol; 3070 break; 3071 } 3072 } 3073 3074 if (ret) 3075 xfrm_pol_hold(ret); 3076 3077 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 3078 3079 return ret; 3080 } 3081 3082 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) 3083 { 3084 int match = 0; 3085 3086 if (t->mode == m->mode && t->id.proto == m->proto && 3087 (m->reqid == 0 || t->reqid == m->reqid)) { 3088 switch (t->mode) { 3089 case XFRM_MODE_TUNNEL: 3090 case XFRM_MODE_BEET: 3091 if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, 3092 m->old_family) && 3093 xfrm_addr_equal(&t->saddr, &m->old_saddr, 3094 m->old_family)) { 3095 match = 1; 3096 } 3097 break; 3098 case XFRM_MODE_TRANSPORT: 3099 /* in case of transport mode, template does not store 3100 any IP addresses, hence we just compare mode and 3101 protocol */ 3102 match = 1; 3103 break; 3104 default: 3105 break; 3106 } 3107 } 3108 return match; 3109 } 3110 3111 /* update endpoint address(es) of template(s) */ 3112 static int xfrm_policy_migrate(struct xfrm_policy *pol, 3113 struct xfrm_migrate *m, int num_migrate) 3114 { 3115 struct xfrm_migrate *mp; 3116 int i, j, n = 0; 3117 3118 write_lock_bh(&pol->lock); 3119 if (unlikely(pol->walk.dead)) { 3120 /* target policy has been deleted */ 3121 write_unlock_bh(&pol->lock); 3122 return -ENOENT; 3123 } 3124 3125 for (i = 0; i < pol->xfrm_nr; i++) { 3126 for (j = 0, mp = m; j < num_migrate; j++, mp++) { 3127 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) 3128 continue; 3129 n++; 3130 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && 3131 pol->xfrm_vec[i].mode != XFRM_MODE_BEET) 3132 continue; 3133 /* update endpoints */ 3134 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, 3135 sizeof(pol->xfrm_vec[i].id.daddr)); 3136 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, 3137 sizeof(pol->xfrm_vec[i].saddr)); 3138 pol->xfrm_vec[i].encap_family = mp->new_family; 3139 /* flush bundles */ 3140 atomic_inc(&pol->genid); 3141 } 3142 } 3143 3144 write_unlock_bh(&pol->lock); 3145 3146 if (!n) 3147 return -ENODATA; 3148 3149 return 0; 3150 } 3151 3152 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) 3153 { 3154 int i, j; 3155 3156 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) 3157 return -EINVAL; 3158 3159 for (i = 0; i < num_migrate; i++) { 3160 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, 3161 m[i].old_family) && 3162 xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, 3163 m[i].old_family)) 3164 return -EINVAL; 3165 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || 3166 xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) 3167 return -EINVAL; 3168 3169 /* check if there is any duplicated entry */ 3170 for (j = i + 1; j < num_migrate; j++) { 3171 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, 3172 sizeof(m[i].old_daddr)) && 3173 !memcmp(&m[i].old_saddr, &m[j].old_saddr, 3174 sizeof(m[i].old_saddr)) && 3175 m[i].proto == m[j].proto && 3176 m[i].mode == m[j].mode && 3177 m[i].reqid == m[j].reqid && 3178 m[i].old_family == m[j].old_family) 3179 return -EINVAL; 3180 } 3181 } 3182 3183 return 0; 3184 } 3185 3186 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, 3187 struct xfrm_migrate *m, int num_migrate, 3188 struct xfrm_kmaddress *k, struct net *net) 3189 { 3190 int i, err, nx_cur = 0, nx_new = 0; 3191 struct xfrm_policy *pol = NULL; 3192 struct xfrm_state *x, *xc; 3193 struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; 3194 struct xfrm_state *x_new[XFRM_MAX_DEPTH]; 3195 struct xfrm_migrate *mp; 3196 3197 if ((err = xfrm_migrate_check(m, num_migrate)) < 0) 3198 goto out; 3199 3200 /* Stage 1 - find policy */ 3201 if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { 3202 err = -ENOENT; 3203 goto out; 3204 } 3205 3206 /* Stage 2 - find and update state(s) */ 3207 for (i = 0, mp = m; i < num_migrate; i++, mp++) { 3208 if ((x = xfrm_migrate_state_find(mp, net))) { 3209 x_cur[nx_cur] = x; 3210 nx_cur++; 3211 if ((xc = xfrm_state_migrate(x, mp))) { 3212 x_new[nx_new] = xc; 3213 nx_new++; 3214 } else { 3215 err = -ENODATA; 3216 goto restore_state; 3217 } 3218 } 3219 } 3220 3221 /* Stage 3 - update policy */ 3222 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) 3223 goto restore_state; 3224 3225 /* Stage 4 - delete old state(s) */ 3226 if (nx_cur) { 3227 xfrm_states_put(x_cur, nx_cur); 3228 xfrm_states_delete(x_cur, nx_cur); 3229 } 3230 3231 /* Stage 5 - announce */ 3232 km_migrate(sel, dir, type, m, num_migrate, k); 3233 3234 xfrm_pol_put(pol); 3235 3236 return 0; 3237 out: 3238 return err; 3239 3240 restore_state: 3241 if (pol) 3242 xfrm_pol_put(pol); 3243 if (nx_cur) 3244 xfrm_states_put(x_cur, nx_cur); 3245 if (nx_new) 3246 xfrm_states_delete(x_new, nx_new); 3247 3248 return err; 3249 } 3250 EXPORT_SYMBOL(xfrm_migrate); 3251 #endif 3252