1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/kmod.h> 19 #include <linux/list.h> 20 #include <linux/spinlock.h> 21 #include <linux/workqueue.h> 22 #include <linux/notifier.h> 23 #include <linux/netdevice.h> 24 #include <linux/netfilter.h> 25 #include <linux/module.h> 26 #include <linux/cache.h> 27 #include <linux/audit.h> 28 #include <net/dst.h> 29 #include <net/xfrm.h> 30 #include <net/ip.h> 31 #ifdef CONFIG_XFRM_STATISTICS 32 #include <net/snmp.h> 33 #endif 34 35 #include "xfrm_hash.h" 36 37 int sysctl_xfrm_larval_drop __read_mostly; 38 39 #ifdef CONFIG_XFRM_STATISTICS 40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; 41 EXPORT_SYMBOL(xfrm_statistics); 42 #endif 43 44 DEFINE_MUTEX(xfrm_cfg_mutex); 45 EXPORT_SYMBOL(xfrm_cfg_mutex); 46 47 static DEFINE_RWLOCK(xfrm_policy_lock); 48 49 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; 50 EXPORT_SYMBOL(xfrm_policy_count); 51 52 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 53 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; 54 55 static struct kmem_cache *xfrm_dst_cache __read_mostly; 56 57 static struct work_struct xfrm_policy_gc_work; 58 static HLIST_HEAD(xfrm_policy_gc_list); 59 static DEFINE_SPINLOCK(xfrm_policy_gc_lock); 60 61 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 62 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 63 static void xfrm_init_pmtu(struct dst_entry *dst); 64 65 static inline int 66 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) 67 { 68 return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && 69 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && 70 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 71 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 72 (fl->proto == sel->proto || !sel->proto) && 73 (fl->oif == sel->ifindex || !sel->ifindex); 74 } 75 76 static inline int 77 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) 78 { 79 return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && 80 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && 81 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 82 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 83 (fl->proto == sel->proto || !sel->proto) && 84 (fl->oif == sel->ifindex || !sel->ifindex); 85 } 86 87 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, 88 unsigned short family) 89 { 90 switch (family) { 91 case AF_INET: 92 return __xfrm4_selector_match(sel, fl); 93 case AF_INET6: 94 return __xfrm6_selector_match(sel, fl); 95 } 96 return 0; 97 } 98 99 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, 100 int family) 101 { 102 xfrm_address_t *saddr = &x->props.saddr; 103 xfrm_address_t *daddr = &x->id.daddr; 104 struct xfrm_policy_afinfo *afinfo; 105 struct dst_entry *dst; 106 107 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) 108 saddr = x->coaddr; 109 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) 110 daddr = x->coaddr; 111 112 afinfo = xfrm_policy_get_afinfo(family); 113 if (unlikely(afinfo == NULL)) 114 return ERR_PTR(-EAFNOSUPPORT); 115 116 dst = afinfo->dst_lookup(tos, saddr, daddr); 117 xfrm_policy_put_afinfo(afinfo); 118 return dst; 119 } 120 121 static inline unsigned long make_jiffies(long secs) 122 { 123 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 124 return MAX_SCHEDULE_TIMEOUT-1; 125 else 126 return secs*HZ; 127 } 128 129 static void xfrm_policy_timer(unsigned long data) 130 { 131 struct xfrm_policy *xp = (struct xfrm_policy*)data; 132 unsigned long now = get_seconds(); 133 long next = LONG_MAX; 134 int warn = 0; 135 int dir; 136 137 read_lock(&xp->lock); 138 139 if (xp->dead) 140 goto out; 141 142 dir = xfrm_policy_id2dir(xp->index); 143 144 if (xp->lft.hard_add_expires_seconds) { 145 long tmo = xp->lft.hard_add_expires_seconds + 146 xp->curlft.add_time - now; 147 if (tmo <= 0) 148 goto expired; 149 if (tmo < next) 150 next = tmo; 151 } 152 if (xp->lft.hard_use_expires_seconds) { 153 long tmo = xp->lft.hard_use_expires_seconds + 154 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 155 if (tmo <= 0) 156 goto expired; 157 if (tmo < next) 158 next = tmo; 159 } 160 if (xp->lft.soft_add_expires_seconds) { 161 long tmo = xp->lft.soft_add_expires_seconds + 162 xp->curlft.add_time - now; 163 if (tmo <= 0) { 164 warn = 1; 165 tmo = XFRM_KM_TIMEOUT; 166 } 167 if (tmo < next) 168 next = tmo; 169 } 170 if (xp->lft.soft_use_expires_seconds) { 171 long tmo = xp->lft.soft_use_expires_seconds + 172 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 173 if (tmo <= 0) { 174 warn = 1; 175 tmo = XFRM_KM_TIMEOUT; 176 } 177 if (tmo < next) 178 next = tmo; 179 } 180 181 if (warn) 182 km_policy_expired(xp, dir, 0, 0); 183 if (next != LONG_MAX && 184 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 185 xfrm_pol_hold(xp); 186 187 out: 188 read_unlock(&xp->lock); 189 xfrm_pol_put(xp); 190 return; 191 192 expired: 193 read_unlock(&xp->lock); 194 if (!xfrm_policy_delete(xp, dir)) 195 km_policy_expired(xp, dir, 1, 0); 196 xfrm_pol_put(xp); 197 } 198 199 200 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 201 * SPD calls. 202 */ 203 204 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) 205 { 206 struct xfrm_policy *policy; 207 208 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 209 210 if (policy) { 211 INIT_HLIST_NODE(&policy->bydst); 212 INIT_HLIST_NODE(&policy->byidx); 213 rwlock_init(&policy->lock); 214 atomic_set(&policy->refcnt, 1); 215 setup_timer(&policy->timer, xfrm_policy_timer, 216 (unsigned long)policy); 217 } 218 return policy; 219 } 220 EXPORT_SYMBOL(xfrm_policy_alloc); 221 222 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 223 224 void xfrm_policy_destroy(struct xfrm_policy *policy) 225 { 226 BUG_ON(!policy->dead); 227 228 BUG_ON(policy->bundles); 229 230 if (del_timer(&policy->timer)) 231 BUG(); 232 233 security_xfrm_policy_free(policy); 234 kfree(policy); 235 } 236 EXPORT_SYMBOL(xfrm_policy_destroy); 237 238 static void xfrm_policy_gc_kill(struct xfrm_policy *policy) 239 { 240 struct dst_entry *dst; 241 242 while ((dst = policy->bundles) != NULL) { 243 policy->bundles = dst->next; 244 dst_free(dst); 245 } 246 247 if (del_timer(&policy->timer)) 248 atomic_dec(&policy->refcnt); 249 250 if (atomic_read(&policy->refcnt) > 1) 251 flow_cache_flush(); 252 253 xfrm_pol_put(policy); 254 } 255 256 static void xfrm_policy_gc_task(struct work_struct *work) 257 { 258 struct xfrm_policy *policy; 259 struct hlist_node *entry, *tmp; 260 struct hlist_head gc_list; 261 262 spin_lock_bh(&xfrm_policy_gc_lock); 263 gc_list.first = xfrm_policy_gc_list.first; 264 INIT_HLIST_HEAD(&xfrm_policy_gc_list); 265 spin_unlock_bh(&xfrm_policy_gc_lock); 266 267 hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) 268 xfrm_policy_gc_kill(policy); 269 } 270 271 /* Rule must be locked. Release descentant resources, announce 272 * entry dead. The rule must be unlinked from lists to the moment. 273 */ 274 275 static void xfrm_policy_kill(struct xfrm_policy *policy) 276 { 277 int dead; 278 279 write_lock_bh(&policy->lock); 280 dead = policy->dead; 281 policy->dead = 1; 282 write_unlock_bh(&policy->lock); 283 284 if (unlikely(dead)) { 285 WARN_ON(1); 286 return; 287 } 288 289 spin_lock(&xfrm_policy_gc_lock); 290 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); 291 spin_unlock(&xfrm_policy_gc_lock); 292 293 schedule_work(&xfrm_policy_gc_work); 294 } 295 296 struct xfrm_policy_hash { 297 struct hlist_head *table; 298 unsigned int hmask; 299 }; 300 301 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; 302 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; 303 static struct hlist_head *xfrm_policy_byidx __read_mostly; 304 static unsigned int xfrm_idx_hmask __read_mostly; 305 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 306 307 static inline unsigned int idx_hash(u32 index) 308 { 309 return __idx_hash(index, xfrm_idx_hmask); 310 } 311 312 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) 313 { 314 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 315 unsigned int hash = __sel_hash(sel, family, hmask); 316 317 return (hash == hmask + 1 ? 318 &xfrm_policy_inexact[dir] : 319 xfrm_policy_bydst[dir].table + hash); 320 } 321 322 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) 323 { 324 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 325 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 326 327 return xfrm_policy_bydst[dir].table + hash; 328 } 329 330 static void xfrm_dst_hash_transfer(struct hlist_head *list, 331 struct hlist_head *ndsttable, 332 unsigned int nhashmask) 333 { 334 struct hlist_node *entry, *tmp; 335 struct xfrm_policy *pol; 336 337 hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { 338 unsigned int h; 339 340 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 341 pol->family, nhashmask); 342 hlist_add_head(&pol->bydst, ndsttable+h); 343 } 344 } 345 346 static void xfrm_idx_hash_transfer(struct hlist_head *list, 347 struct hlist_head *nidxtable, 348 unsigned int nhashmask) 349 { 350 struct hlist_node *entry, *tmp; 351 struct xfrm_policy *pol; 352 353 hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { 354 unsigned int h; 355 356 h = __idx_hash(pol->index, nhashmask); 357 hlist_add_head(&pol->byidx, nidxtable+h); 358 } 359 } 360 361 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) 362 { 363 return ((old_hmask + 1) << 1) - 1; 364 } 365 366 static void xfrm_bydst_resize(int dir) 367 { 368 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 369 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 370 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 371 struct hlist_head *odst = xfrm_policy_bydst[dir].table; 372 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 373 int i; 374 375 if (!ndst) 376 return; 377 378 write_lock_bh(&xfrm_policy_lock); 379 380 for (i = hmask; i >= 0; i--) 381 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 382 383 xfrm_policy_bydst[dir].table = ndst; 384 xfrm_policy_bydst[dir].hmask = nhashmask; 385 386 write_unlock_bh(&xfrm_policy_lock); 387 388 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 389 } 390 391 static void xfrm_byidx_resize(int total) 392 { 393 unsigned int hmask = xfrm_idx_hmask; 394 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 395 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 396 struct hlist_head *oidx = xfrm_policy_byidx; 397 struct hlist_head *nidx = xfrm_hash_alloc(nsize); 398 int i; 399 400 if (!nidx) 401 return; 402 403 write_lock_bh(&xfrm_policy_lock); 404 405 for (i = hmask; i >= 0; i--) 406 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 407 408 xfrm_policy_byidx = nidx; 409 xfrm_idx_hmask = nhashmask; 410 411 write_unlock_bh(&xfrm_policy_lock); 412 413 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 414 } 415 416 static inline int xfrm_bydst_should_resize(int dir, int *total) 417 { 418 unsigned int cnt = xfrm_policy_count[dir]; 419 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 420 421 if (total) 422 *total += cnt; 423 424 if ((hmask + 1) < xfrm_policy_hashmax && 425 cnt > hmask) 426 return 1; 427 428 return 0; 429 } 430 431 static inline int xfrm_byidx_should_resize(int total) 432 { 433 unsigned int hmask = xfrm_idx_hmask; 434 435 if ((hmask + 1) < xfrm_policy_hashmax && 436 total > hmask) 437 return 1; 438 439 return 0; 440 } 441 442 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) 443 { 444 read_lock_bh(&xfrm_policy_lock); 445 si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; 446 si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; 447 si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; 448 si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; 449 si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; 450 si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 451 si->spdhcnt = xfrm_idx_hmask; 452 si->spdhmcnt = xfrm_policy_hashmax; 453 read_unlock_bh(&xfrm_policy_lock); 454 } 455 EXPORT_SYMBOL(xfrm_spd_getinfo); 456 457 static DEFINE_MUTEX(hash_resize_mutex); 458 static void xfrm_hash_resize(struct work_struct *__unused) 459 { 460 int dir, total; 461 462 mutex_lock(&hash_resize_mutex); 463 464 total = 0; 465 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 466 if (xfrm_bydst_should_resize(dir, &total)) 467 xfrm_bydst_resize(dir); 468 } 469 if (xfrm_byidx_should_resize(total)) 470 xfrm_byidx_resize(total); 471 472 mutex_unlock(&hash_resize_mutex); 473 } 474 475 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); 476 477 /* Generate new index... KAME seems to generate them ordered by cost 478 * of an absolute inpredictability of ordering of rules. This will not pass. */ 479 static u32 xfrm_gen_index(u8 type, int dir) 480 { 481 static u32 idx_generator; 482 483 for (;;) { 484 struct hlist_node *entry; 485 struct hlist_head *list; 486 struct xfrm_policy *p; 487 u32 idx; 488 int found; 489 490 idx = (idx_generator | dir); 491 idx_generator += 8; 492 if (idx == 0) 493 idx = 8; 494 list = xfrm_policy_byidx + idx_hash(idx); 495 found = 0; 496 hlist_for_each_entry(p, entry, list, byidx) { 497 if (p->index == idx) { 498 found = 1; 499 break; 500 } 501 } 502 if (!found) 503 return idx; 504 } 505 } 506 507 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) 508 { 509 u32 *p1 = (u32 *) s1; 510 u32 *p2 = (u32 *) s2; 511 int len = sizeof(struct xfrm_selector) / sizeof(u32); 512 int i; 513 514 for (i = 0; i < len; i++) { 515 if (p1[i] != p2[i]) 516 return 1; 517 } 518 519 return 0; 520 } 521 522 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 523 { 524 struct xfrm_policy *pol; 525 struct xfrm_policy *delpol; 526 struct hlist_head *chain; 527 struct hlist_node *entry, *newpos; 528 struct dst_entry *gc_list; 529 530 write_lock_bh(&xfrm_policy_lock); 531 chain = policy_hash_bysel(&policy->selector, policy->family, dir); 532 delpol = NULL; 533 newpos = NULL; 534 hlist_for_each_entry(pol, entry, chain, bydst) { 535 if (pol->type == policy->type && 536 !selector_cmp(&pol->selector, &policy->selector) && 537 xfrm_sec_ctx_match(pol->security, policy->security) && 538 !WARN_ON(delpol)) { 539 if (excl) { 540 write_unlock_bh(&xfrm_policy_lock); 541 return -EEXIST; 542 } 543 delpol = pol; 544 if (policy->priority > pol->priority) 545 continue; 546 } else if (policy->priority >= pol->priority) { 547 newpos = &pol->bydst; 548 continue; 549 } 550 if (delpol) 551 break; 552 } 553 if (newpos) 554 hlist_add_after(newpos, &policy->bydst); 555 else 556 hlist_add_head(&policy->bydst, chain); 557 xfrm_pol_hold(policy); 558 xfrm_policy_count[dir]++; 559 atomic_inc(&flow_cache_genid); 560 if (delpol) { 561 hlist_del(&delpol->bydst); 562 hlist_del(&delpol->byidx); 563 xfrm_policy_count[dir]--; 564 } 565 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); 566 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); 567 policy->curlft.add_time = get_seconds(); 568 policy->curlft.use_time = 0; 569 if (!mod_timer(&policy->timer, jiffies + HZ)) 570 xfrm_pol_hold(policy); 571 write_unlock_bh(&xfrm_policy_lock); 572 573 if (delpol) 574 xfrm_policy_kill(delpol); 575 else if (xfrm_bydst_should_resize(dir, NULL)) 576 schedule_work(&xfrm_hash_work); 577 578 read_lock_bh(&xfrm_policy_lock); 579 gc_list = NULL; 580 entry = &policy->bydst; 581 hlist_for_each_entry_continue(policy, entry, bydst) { 582 struct dst_entry *dst; 583 584 write_lock(&policy->lock); 585 dst = policy->bundles; 586 if (dst) { 587 struct dst_entry *tail = dst; 588 while (tail->next) 589 tail = tail->next; 590 tail->next = gc_list; 591 gc_list = dst; 592 593 policy->bundles = NULL; 594 } 595 write_unlock(&policy->lock); 596 } 597 read_unlock_bh(&xfrm_policy_lock); 598 599 while (gc_list) { 600 struct dst_entry *dst = gc_list; 601 602 gc_list = dst->next; 603 dst_free(dst); 604 } 605 606 return 0; 607 } 608 EXPORT_SYMBOL(xfrm_policy_insert); 609 610 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, 611 struct xfrm_selector *sel, 612 struct xfrm_sec_ctx *ctx, int delete, 613 int *err) 614 { 615 struct xfrm_policy *pol, *ret; 616 struct hlist_head *chain; 617 struct hlist_node *entry; 618 619 *err = 0; 620 write_lock_bh(&xfrm_policy_lock); 621 chain = policy_hash_bysel(sel, sel->family, dir); 622 ret = NULL; 623 hlist_for_each_entry(pol, entry, chain, bydst) { 624 if (pol->type == type && 625 !selector_cmp(sel, &pol->selector) && 626 xfrm_sec_ctx_match(ctx, pol->security)) { 627 xfrm_pol_hold(pol); 628 if (delete) { 629 *err = security_xfrm_policy_delete(pol); 630 if (*err) { 631 write_unlock_bh(&xfrm_policy_lock); 632 return pol; 633 } 634 hlist_del(&pol->bydst); 635 hlist_del(&pol->byidx); 636 xfrm_policy_count[dir]--; 637 } 638 ret = pol; 639 break; 640 } 641 } 642 write_unlock_bh(&xfrm_policy_lock); 643 644 if (ret && delete) { 645 atomic_inc(&flow_cache_genid); 646 xfrm_policy_kill(ret); 647 } 648 return ret; 649 } 650 EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 651 652 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, 653 int *err) 654 { 655 struct xfrm_policy *pol, *ret; 656 struct hlist_head *chain; 657 struct hlist_node *entry; 658 659 *err = -ENOENT; 660 if (xfrm_policy_id2dir(id) != dir) 661 return NULL; 662 663 *err = 0; 664 write_lock_bh(&xfrm_policy_lock); 665 chain = xfrm_policy_byidx + idx_hash(id); 666 ret = NULL; 667 hlist_for_each_entry(pol, entry, chain, byidx) { 668 if (pol->type == type && pol->index == id) { 669 xfrm_pol_hold(pol); 670 if (delete) { 671 *err = security_xfrm_policy_delete(pol); 672 if (*err) { 673 write_unlock_bh(&xfrm_policy_lock); 674 return pol; 675 } 676 hlist_del(&pol->bydst); 677 hlist_del(&pol->byidx); 678 xfrm_policy_count[dir]--; 679 } 680 ret = pol; 681 break; 682 } 683 } 684 write_unlock_bh(&xfrm_policy_lock); 685 686 if (ret && delete) { 687 atomic_inc(&flow_cache_genid); 688 xfrm_policy_kill(ret); 689 } 690 return ret; 691 } 692 EXPORT_SYMBOL(xfrm_policy_byid); 693 694 #ifdef CONFIG_SECURITY_NETWORK_XFRM 695 static inline int 696 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) 697 { 698 int dir, err = 0; 699 700 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 701 struct xfrm_policy *pol; 702 struct hlist_node *entry; 703 int i; 704 705 hlist_for_each_entry(pol, entry, 706 &xfrm_policy_inexact[dir], bydst) { 707 if (pol->type != type) 708 continue; 709 err = security_xfrm_policy_delete(pol); 710 if (err) { 711 xfrm_audit_policy_delete(pol, 0, 712 audit_info->loginuid, 713 audit_info->secid); 714 return err; 715 } 716 } 717 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 718 hlist_for_each_entry(pol, entry, 719 xfrm_policy_bydst[dir].table + i, 720 bydst) { 721 if (pol->type != type) 722 continue; 723 err = security_xfrm_policy_delete(pol); 724 if (err) { 725 xfrm_audit_policy_delete(pol, 0, 726 audit_info->loginuid, 727 audit_info->secid); 728 return err; 729 } 730 } 731 } 732 } 733 return err; 734 } 735 #else 736 static inline int 737 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) 738 { 739 return 0; 740 } 741 #endif 742 743 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) 744 { 745 int dir, err = 0; 746 747 write_lock_bh(&xfrm_policy_lock); 748 749 err = xfrm_policy_flush_secctx_check(type, audit_info); 750 if (err) 751 goto out; 752 753 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 754 struct xfrm_policy *pol; 755 struct hlist_node *entry; 756 int i, killed; 757 758 killed = 0; 759 again1: 760 hlist_for_each_entry(pol, entry, 761 &xfrm_policy_inexact[dir], bydst) { 762 if (pol->type != type) 763 continue; 764 hlist_del(&pol->bydst); 765 hlist_del(&pol->byidx); 766 write_unlock_bh(&xfrm_policy_lock); 767 768 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, 769 audit_info->secid); 770 771 xfrm_policy_kill(pol); 772 killed++; 773 774 write_lock_bh(&xfrm_policy_lock); 775 goto again1; 776 } 777 778 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 779 again2: 780 hlist_for_each_entry(pol, entry, 781 xfrm_policy_bydst[dir].table + i, 782 bydst) { 783 if (pol->type != type) 784 continue; 785 hlist_del(&pol->bydst); 786 hlist_del(&pol->byidx); 787 write_unlock_bh(&xfrm_policy_lock); 788 789 xfrm_audit_policy_delete(pol, 1, 790 audit_info->loginuid, 791 audit_info->secid); 792 xfrm_policy_kill(pol); 793 killed++; 794 795 write_lock_bh(&xfrm_policy_lock); 796 goto again2; 797 } 798 } 799 800 xfrm_policy_count[dir] -= killed; 801 } 802 atomic_inc(&flow_cache_genid); 803 out: 804 write_unlock_bh(&xfrm_policy_lock); 805 return err; 806 } 807 EXPORT_SYMBOL(xfrm_policy_flush); 808 809 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), 810 void *data) 811 { 812 struct xfrm_policy *pol, *last = NULL; 813 struct hlist_node *entry; 814 int dir, last_dir = 0, count, error; 815 816 read_lock_bh(&xfrm_policy_lock); 817 count = 0; 818 819 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 820 struct hlist_head *table = xfrm_policy_bydst[dir].table; 821 int i; 822 823 hlist_for_each_entry(pol, entry, 824 &xfrm_policy_inexact[dir], bydst) { 825 if (pol->type != type) 826 continue; 827 if (last) { 828 error = func(last, last_dir % XFRM_POLICY_MAX, 829 count, data); 830 if (error) 831 goto out; 832 } 833 last = pol; 834 last_dir = dir; 835 count++; 836 } 837 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 838 hlist_for_each_entry(pol, entry, table + i, bydst) { 839 if (pol->type != type) 840 continue; 841 if (last) { 842 error = func(last, last_dir % XFRM_POLICY_MAX, 843 count, data); 844 if (error) 845 goto out; 846 } 847 last = pol; 848 last_dir = dir; 849 count++; 850 } 851 } 852 } 853 if (count == 0) { 854 error = -ENOENT; 855 goto out; 856 } 857 error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); 858 out: 859 read_unlock_bh(&xfrm_policy_lock); 860 return error; 861 } 862 EXPORT_SYMBOL(xfrm_policy_walk); 863 864 /* 865 * Find policy to apply to this flow. 866 * 867 * Returns 0 if policy found, else an -errno. 868 */ 869 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, 870 u8 type, u16 family, int dir) 871 { 872 struct xfrm_selector *sel = &pol->selector; 873 int match, ret = -ESRCH; 874 875 if (pol->family != family || 876 pol->type != type) 877 return ret; 878 879 match = xfrm_selector_match(sel, fl, family); 880 if (match) 881 ret = security_xfrm_policy_lookup(pol, fl->secid, dir); 882 883 return ret; 884 } 885 886 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, 887 u16 family, u8 dir) 888 { 889 int err; 890 struct xfrm_policy *pol, *ret; 891 xfrm_address_t *daddr, *saddr; 892 struct hlist_node *entry; 893 struct hlist_head *chain; 894 u32 priority = ~0U; 895 896 daddr = xfrm_flowi_daddr(fl, family); 897 saddr = xfrm_flowi_saddr(fl, family); 898 if (unlikely(!daddr || !saddr)) 899 return NULL; 900 901 read_lock_bh(&xfrm_policy_lock); 902 chain = policy_hash_direct(daddr, saddr, family, dir); 903 ret = NULL; 904 hlist_for_each_entry(pol, entry, chain, bydst) { 905 err = xfrm_policy_match(pol, fl, type, family, dir); 906 if (err) { 907 if (err == -ESRCH) 908 continue; 909 else { 910 ret = ERR_PTR(err); 911 goto fail; 912 } 913 } else { 914 ret = pol; 915 priority = ret->priority; 916 break; 917 } 918 } 919 chain = &xfrm_policy_inexact[dir]; 920 hlist_for_each_entry(pol, entry, chain, bydst) { 921 err = xfrm_policy_match(pol, fl, type, family, dir); 922 if (err) { 923 if (err == -ESRCH) 924 continue; 925 else { 926 ret = ERR_PTR(err); 927 goto fail; 928 } 929 } else if (pol->priority < priority) { 930 ret = pol; 931 break; 932 } 933 } 934 if (ret) 935 xfrm_pol_hold(ret); 936 fail: 937 read_unlock_bh(&xfrm_policy_lock); 938 939 return ret; 940 } 941 942 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, 943 void **objp, atomic_t **obj_refp) 944 { 945 struct xfrm_policy *pol; 946 int err = 0; 947 948 #ifdef CONFIG_XFRM_SUB_POLICY 949 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); 950 if (IS_ERR(pol)) { 951 err = PTR_ERR(pol); 952 pol = NULL; 953 } 954 if (pol || err) 955 goto end; 956 #endif 957 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); 958 if (IS_ERR(pol)) { 959 err = PTR_ERR(pol); 960 pol = NULL; 961 } 962 #ifdef CONFIG_XFRM_SUB_POLICY 963 end: 964 #endif 965 if ((*objp = (void *) pol) != NULL) 966 *obj_refp = &pol->refcnt; 967 return err; 968 } 969 970 static inline int policy_to_flow_dir(int dir) 971 { 972 if (XFRM_POLICY_IN == FLOW_DIR_IN && 973 XFRM_POLICY_OUT == FLOW_DIR_OUT && 974 XFRM_POLICY_FWD == FLOW_DIR_FWD) 975 return dir; 976 switch (dir) { 977 default: 978 case XFRM_POLICY_IN: 979 return FLOW_DIR_IN; 980 case XFRM_POLICY_OUT: 981 return FLOW_DIR_OUT; 982 case XFRM_POLICY_FWD: 983 return FLOW_DIR_FWD; 984 } 985 } 986 987 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 988 { 989 struct xfrm_policy *pol; 990 991 read_lock_bh(&xfrm_policy_lock); 992 if ((pol = sk->sk_policy[dir]) != NULL) { 993 int match = xfrm_selector_match(&pol->selector, fl, 994 sk->sk_family); 995 int err = 0; 996 997 if (match) { 998 err = security_xfrm_policy_lookup(pol, fl->secid, 999 policy_to_flow_dir(dir)); 1000 if (!err) 1001 xfrm_pol_hold(pol); 1002 else if (err == -ESRCH) 1003 pol = NULL; 1004 else 1005 pol = ERR_PTR(err); 1006 } else 1007 pol = NULL; 1008 } 1009 read_unlock_bh(&xfrm_policy_lock); 1010 return pol; 1011 } 1012 1013 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1014 { 1015 struct hlist_head *chain = policy_hash_bysel(&pol->selector, 1016 pol->family, dir); 1017 1018 hlist_add_head(&pol->bydst, chain); 1019 hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); 1020 xfrm_policy_count[dir]++; 1021 xfrm_pol_hold(pol); 1022 1023 if (xfrm_bydst_should_resize(dir, NULL)) 1024 schedule_work(&xfrm_hash_work); 1025 } 1026 1027 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1028 int dir) 1029 { 1030 if (hlist_unhashed(&pol->bydst)) 1031 return NULL; 1032 1033 hlist_del(&pol->bydst); 1034 hlist_del(&pol->byidx); 1035 xfrm_policy_count[dir]--; 1036 1037 return pol; 1038 } 1039 1040 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1041 { 1042 write_lock_bh(&xfrm_policy_lock); 1043 pol = __xfrm_policy_unlink(pol, dir); 1044 write_unlock_bh(&xfrm_policy_lock); 1045 if (pol) { 1046 if (dir < XFRM_POLICY_MAX) 1047 atomic_inc(&flow_cache_genid); 1048 xfrm_policy_kill(pol); 1049 return 0; 1050 } 1051 return -ENOENT; 1052 } 1053 EXPORT_SYMBOL(xfrm_policy_delete); 1054 1055 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1056 { 1057 struct xfrm_policy *old_pol; 1058 1059 #ifdef CONFIG_XFRM_SUB_POLICY 1060 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) 1061 return -EINVAL; 1062 #endif 1063 1064 write_lock_bh(&xfrm_policy_lock); 1065 old_pol = sk->sk_policy[dir]; 1066 sk->sk_policy[dir] = pol; 1067 if (pol) { 1068 pol->curlft.add_time = get_seconds(); 1069 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); 1070 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1071 } 1072 if (old_pol) 1073 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1074 write_unlock_bh(&xfrm_policy_lock); 1075 1076 if (old_pol) { 1077 xfrm_policy_kill(old_pol); 1078 } 1079 return 0; 1080 } 1081 1082 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) 1083 { 1084 struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); 1085 1086 if (newp) { 1087 newp->selector = old->selector; 1088 if (security_xfrm_policy_clone(old, newp)) { 1089 kfree(newp); 1090 return NULL; /* ENOMEM */ 1091 } 1092 newp->lft = old->lft; 1093 newp->curlft = old->curlft; 1094 newp->action = old->action; 1095 newp->flags = old->flags; 1096 newp->xfrm_nr = old->xfrm_nr; 1097 newp->index = old->index; 1098 newp->type = old->type; 1099 memcpy(newp->xfrm_vec, old->xfrm_vec, 1100 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1101 write_lock_bh(&xfrm_policy_lock); 1102 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 1103 write_unlock_bh(&xfrm_policy_lock); 1104 xfrm_pol_put(newp); 1105 } 1106 return newp; 1107 } 1108 1109 int __xfrm_sk_clone_policy(struct sock *sk) 1110 { 1111 struct xfrm_policy *p0 = sk->sk_policy[0], 1112 *p1 = sk->sk_policy[1]; 1113 1114 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 1115 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 1116 return -ENOMEM; 1117 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 1118 return -ENOMEM; 1119 return 0; 1120 } 1121 1122 static int 1123 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, 1124 unsigned short family) 1125 { 1126 int err; 1127 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1128 1129 if (unlikely(afinfo == NULL)) 1130 return -EINVAL; 1131 err = afinfo->get_saddr(local, remote); 1132 xfrm_policy_put_afinfo(afinfo); 1133 return err; 1134 } 1135 1136 /* Resolve list of templates for the flow, given policy. */ 1137 1138 static int 1139 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, 1140 struct xfrm_state **xfrm, 1141 unsigned short family) 1142 { 1143 int nx; 1144 int i, error; 1145 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1146 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1147 xfrm_address_t tmp; 1148 1149 for (nx=0, i = 0; i < policy->xfrm_nr; i++) { 1150 struct xfrm_state *x; 1151 xfrm_address_t *remote = daddr; 1152 xfrm_address_t *local = saddr; 1153 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1154 1155 if (tmpl->mode == XFRM_MODE_TUNNEL || 1156 tmpl->mode == XFRM_MODE_BEET) { 1157 remote = &tmpl->id.daddr; 1158 local = &tmpl->saddr; 1159 family = tmpl->encap_family; 1160 if (xfrm_addr_any(local, family)) { 1161 error = xfrm_get_saddr(&tmp, remote, family); 1162 if (error) 1163 goto fail; 1164 local = &tmp; 1165 } 1166 } 1167 1168 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1169 1170 if (x && x->km.state == XFRM_STATE_VALID) { 1171 xfrm[nx++] = x; 1172 daddr = remote; 1173 saddr = local; 1174 continue; 1175 } 1176 if (x) { 1177 error = (x->km.state == XFRM_STATE_ERROR ? 1178 -EINVAL : -EAGAIN); 1179 xfrm_state_put(x); 1180 } 1181 1182 if (!tmpl->optional) 1183 goto fail; 1184 } 1185 return nx; 1186 1187 fail: 1188 for (nx--; nx>=0; nx--) 1189 xfrm_state_put(xfrm[nx]); 1190 return error; 1191 } 1192 1193 static int 1194 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, 1195 struct xfrm_state **xfrm, 1196 unsigned short family) 1197 { 1198 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1199 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1200 int cnx = 0; 1201 int error; 1202 int ret; 1203 int i; 1204 1205 for (i = 0; i < npols; i++) { 1206 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { 1207 error = -ENOBUFS; 1208 goto fail; 1209 } 1210 1211 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); 1212 if (ret < 0) { 1213 error = ret; 1214 goto fail; 1215 } else 1216 cnx += ret; 1217 } 1218 1219 /* found states are sorted for outbound processing */ 1220 if (npols > 1) 1221 xfrm_state_sort(xfrm, tpp, cnx, family); 1222 1223 return cnx; 1224 1225 fail: 1226 for (cnx--; cnx>=0; cnx--) 1227 xfrm_state_put(tpp[cnx]); 1228 return error; 1229 1230 } 1231 1232 /* Check that the bundle accepts the flow and its components are 1233 * still valid. 1234 */ 1235 1236 static struct dst_entry * 1237 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) 1238 { 1239 struct dst_entry *x; 1240 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1241 if (unlikely(afinfo == NULL)) 1242 return ERR_PTR(-EINVAL); 1243 x = afinfo->find_bundle(fl, policy); 1244 xfrm_policy_put_afinfo(afinfo); 1245 return x; 1246 } 1247 1248 static inline int xfrm_get_tos(struct flowi *fl, int family) 1249 { 1250 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1251 int tos; 1252 1253 if (!afinfo) 1254 return -EINVAL; 1255 1256 tos = afinfo->get_tos(fl); 1257 1258 xfrm_policy_put_afinfo(afinfo); 1259 1260 return tos; 1261 } 1262 1263 static inline struct xfrm_dst *xfrm_alloc_dst(int family) 1264 { 1265 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1266 struct xfrm_dst *xdst; 1267 1268 if (!afinfo) 1269 return ERR_PTR(-EINVAL); 1270 1271 xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS); 1272 1273 xfrm_policy_put_afinfo(afinfo); 1274 1275 return xdst; 1276 } 1277 1278 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, 1279 int nfheader_len) 1280 { 1281 struct xfrm_policy_afinfo *afinfo = 1282 xfrm_policy_get_afinfo(dst->ops->family); 1283 int err; 1284 1285 if (!afinfo) 1286 return -EINVAL; 1287 1288 err = afinfo->init_path(path, dst, nfheader_len); 1289 1290 xfrm_policy_put_afinfo(afinfo); 1291 1292 return err; 1293 } 1294 1295 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) 1296 { 1297 struct xfrm_policy_afinfo *afinfo = 1298 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1299 int err; 1300 1301 if (!afinfo) 1302 return -EINVAL; 1303 1304 err = afinfo->fill_dst(xdst, dev); 1305 1306 xfrm_policy_put_afinfo(afinfo); 1307 1308 return err; 1309 } 1310 1311 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 1312 * all the metrics... Shortly, bundle a bundle. 1313 */ 1314 1315 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1316 struct xfrm_state **xfrm, int nx, 1317 struct flowi *fl, 1318 struct dst_entry *dst) 1319 { 1320 unsigned long now = jiffies; 1321 struct net_device *dev; 1322 struct dst_entry *dst_prev = NULL; 1323 struct dst_entry *dst0 = NULL; 1324 int i = 0; 1325 int err; 1326 int header_len = 0; 1327 int nfheader_len = 0; 1328 int trailer_len = 0; 1329 int tos; 1330 int family = policy->selector.family; 1331 1332 tos = xfrm_get_tos(fl, family); 1333 err = tos; 1334 if (tos < 0) 1335 goto put_states; 1336 1337 dst_hold(dst); 1338 1339 for (; i < nx; i++) { 1340 struct xfrm_dst *xdst = xfrm_alloc_dst(family); 1341 struct dst_entry *dst1 = &xdst->u.dst; 1342 1343 err = PTR_ERR(xdst); 1344 if (IS_ERR(xdst)) { 1345 dst_release(dst); 1346 goto put_states; 1347 } 1348 1349 if (!dst_prev) 1350 dst0 = dst1; 1351 else { 1352 dst_prev->child = dst_clone(dst1); 1353 dst1->flags |= DST_NOHASH; 1354 } 1355 1356 xdst->route = dst; 1357 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics)); 1358 1359 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 1360 family = xfrm[i]->props.family; 1361 dst = xfrm_dst_lookup(xfrm[i], tos, family); 1362 err = PTR_ERR(dst); 1363 if (IS_ERR(dst)) 1364 goto put_states; 1365 } else 1366 dst_hold(dst); 1367 1368 dst1->xfrm = xfrm[i]; 1369 xdst->genid = xfrm[i]->genid; 1370 1371 dst1->obsolete = -1; 1372 dst1->flags |= DST_HOST; 1373 dst1->lastuse = now; 1374 1375 dst1->input = dst_discard; 1376 dst1->output = xfrm[i]->outer_mode->afinfo->output; 1377 1378 dst1->next = dst_prev; 1379 dst_prev = dst1; 1380 1381 header_len += xfrm[i]->props.header_len; 1382 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1383 nfheader_len += xfrm[i]->props.header_len; 1384 trailer_len += xfrm[i]->props.trailer_len; 1385 } 1386 1387 dst_prev->child = dst; 1388 dst0->path = dst; 1389 1390 err = -ENODEV; 1391 dev = dst->dev; 1392 if (!dev) 1393 goto free_dst; 1394 1395 /* Copy neighbout for reachability confirmation */ 1396 dst0->neighbour = neigh_clone(dst->neighbour); 1397 1398 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1399 xfrm_init_pmtu(dst_prev); 1400 1401 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { 1402 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; 1403 1404 err = xfrm_fill_dst(xdst, dev); 1405 if (err) 1406 goto free_dst; 1407 1408 dst_prev->header_len = header_len; 1409 dst_prev->trailer_len = trailer_len; 1410 header_len -= xdst->u.dst.xfrm->props.header_len; 1411 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1412 } 1413 1414 out: 1415 return dst0; 1416 1417 put_states: 1418 for (; i < nx; i++) 1419 xfrm_state_put(xfrm[i]); 1420 free_dst: 1421 if (dst0) 1422 dst_free(dst0); 1423 dst0 = ERR_PTR(err); 1424 goto out; 1425 } 1426 1427 static int inline 1428 xfrm_dst_alloc_copy(void **target, void *src, int size) 1429 { 1430 if (!*target) { 1431 *target = kmalloc(size, GFP_ATOMIC); 1432 if (!*target) 1433 return -ENOMEM; 1434 } 1435 memcpy(*target, src, size); 1436 return 0; 1437 } 1438 1439 static int inline 1440 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) 1441 { 1442 #ifdef CONFIG_XFRM_SUB_POLICY 1443 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1444 return xfrm_dst_alloc_copy((void **)&(xdst->partner), 1445 sel, sizeof(*sel)); 1446 #else 1447 return 0; 1448 #endif 1449 } 1450 1451 static int inline 1452 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) 1453 { 1454 #ifdef CONFIG_XFRM_SUB_POLICY 1455 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1456 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); 1457 #else 1458 return 0; 1459 #endif 1460 } 1461 1462 static int stale_bundle(struct dst_entry *dst); 1463 1464 /* Main function: finds/creates a bundle for given flow. 1465 * 1466 * At the moment we eat a raw IP route. Mostly to speed up lookups 1467 * on interfaces with disabled IPsec. 1468 */ 1469 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, 1470 struct sock *sk, int flags) 1471 { 1472 struct xfrm_policy *policy; 1473 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1474 int npols; 1475 int pol_dead; 1476 int xfrm_nr; 1477 int pi; 1478 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1479 struct dst_entry *dst, *dst_orig = *dst_p; 1480 int nx = 0; 1481 int err; 1482 u32 genid; 1483 u16 family; 1484 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 1485 1486 restart: 1487 genid = atomic_read(&flow_cache_genid); 1488 policy = NULL; 1489 for (pi = 0; pi < ARRAY_SIZE(pols); pi++) 1490 pols[pi] = NULL; 1491 npols = 0; 1492 pol_dead = 0; 1493 xfrm_nr = 0; 1494 1495 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 1496 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 1497 err = PTR_ERR(policy); 1498 if (IS_ERR(policy)) { 1499 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); 1500 goto dropdst; 1501 } 1502 } 1503 1504 if (!policy) { 1505 /* To accelerate a bit... */ 1506 if ((dst_orig->flags & DST_NOXFRM) || 1507 !xfrm_policy_count[XFRM_POLICY_OUT]) 1508 goto nopol; 1509 1510 policy = flow_cache_lookup(fl, dst_orig->ops->family, 1511 dir, xfrm_policy_lookup); 1512 err = PTR_ERR(policy); 1513 if (IS_ERR(policy)) { 1514 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); 1515 goto dropdst; 1516 } 1517 } 1518 1519 if (!policy) 1520 goto nopol; 1521 1522 family = dst_orig->ops->family; 1523 pols[0] = policy; 1524 npols ++; 1525 xfrm_nr += pols[0]->xfrm_nr; 1526 1527 err = -ENOENT; 1528 if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) 1529 goto error; 1530 1531 policy->curlft.use_time = get_seconds(); 1532 1533 switch (policy->action) { 1534 default: 1535 case XFRM_POLICY_BLOCK: 1536 /* Prohibit the flow */ 1537 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); 1538 err = -EPERM; 1539 goto error; 1540 1541 case XFRM_POLICY_ALLOW: 1542 #ifndef CONFIG_XFRM_SUB_POLICY 1543 if (policy->xfrm_nr == 0) { 1544 /* Flow passes not transformed. */ 1545 xfrm_pol_put(policy); 1546 return 0; 1547 } 1548 #endif 1549 1550 /* Try to find matching bundle. 1551 * 1552 * LATER: help from flow cache. It is optional, this 1553 * is required only for output policy. 1554 */ 1555 dst = xfrm_find_bundle(fl, policy, family); 1556 if (IS_ERR(dst)) { 1557 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1558 err = PTR_ERR(dst); 1559 goto error; 1560 } 1561 1562 if (dst) 1563 break; 1564 1565 #ifdef CONFIG_XFRM_SUB_POLICY 1566 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1567 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, 1568 fl, family, 1569 XFRM_POLICY_OUT); 1570 if (pols[1]) { 1571 if (IS_ERR(pols[1])) { 1572 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); 1573 err = PTR_ERR(pols[1]); 1574 goto error; 1575 } 1576 if (pols[1]->action == XFRM_POLICY_BLOCK) { 1577 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); 1578 err = -EPERM; 1579 goto error; 1580 } 1581 npols ++; 1582 xfrm_nr += pols[1]->xfrm_nr; 1583 } 1584 } 1585 1586 /* 1587 * Because neither flowi nor bundle information knows about 1588 * transformation template size. On more than one policy usage 1589 * we can realize whether all of them is bypass or not after 1590 * they are searched. See above not-transformed bypass 1591 * is surrounded by non-sub policy configuration, too. 1592 */ 1593 if (xfrm_nr == 0) { 1594 /* Flow passes not transformed. */ 1595 xfrm_pols_put(pols, npols); 1596 return 0; 1597 } 1598 1599 #endif 1600 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); 1601 1602 if (unlikely(nx<0)) { 1603 err = nx; 1604 if (err == -EAGAIN && sysctl_xfrm_larval_drop) { 1605 /* EREMOTE tells the caller to generate 1606 * a one-shot blackhole route. 1607 */ 1608 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); 1609 xfrm_pol_put(policy); 1610 return -EREMOTE; 1611 } 1612 if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { 1613 DECLARE_WAITQUEUE(wait, current); 1614 1615 add_wait_queue(&km_waitq, &wait); 1616 set_current_state(TASK_INTERRUPTIBLE); 1617 schedule(); 1618 set_current_state(TASK_RUNNING); 1619 remove_wait_queue(&km_waitq, &wait); 1620 1621 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); 1622 1623 if (nx == -EAGAIN && signal_pending(current)) { 1624 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); 1625 err = -ERESTART; 1626 goto error; 1627 } 1628 if (nx == -EAGAIN || 1629 genid != atomic_read(&flow_cache_genid)) { 1630 xfrm_pols_put(pols, npols); 1631 goto restart; 1632 } 1633 err = nx; 1634 } 1635 if (err < 0) { 1636 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); 1637 goto error; 1638 } 1639 } 1640 if (nx == 0) { 1641 /* Flow passes not transformed. */ 1642 xfrm_pols_put(pols, npols); 1643 return 0; 1644 } 1645 1646 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); 1647 err = PTR_ERR(dst); 1648 if (IS_ERR(dst)) { 1649 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1650 goto error; 1651 } 1652 1653 for (pi = 0; pi < npols; pi++) { 1654 read_lock_bh(&pols[pi]->lock); 1655 pol_dead |= pols[pi]->dead; 1656 read_unlock_bh(&pols[pi]->lock); 1657 } 1658 1659 write_lock_bh(&policy->lock); 1660 if (unlikely(pol_dead || stale_bundle(dst))) { 1661 /* Wow! While we worked on resolving, this 1662 * policy has gone. Retry. It is not paranoia, 1663 * we just cannot enlist new bundle to dead object. 1664 * We can't enlist stable bundles either. 1665 */ 1666 write_unlock_bh(&policy->lock); 1667 if (dst) 1668 dst_free(dst); 1669 1670 if (pol_dead) 1671 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); 1672 else 1673 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1674 err = -EHOSTUNREACH; 1675 goto error; 1676 } 1677 1678 if (npols > 1) 1679 err = xfrm_dst_update_parent(dst, &pols[1]->selector); 1680 else 1681 err = xfrm_dst_update_origin(dst, fl); 1682 if (unlikely(err)) { 1683 write_unlock_bh(&policy->lock); 1684 if (dst) 1685 dst_free(dst); 1686 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1687 goto error; 1688 } 1689 1690 dst->next = policy->bundles; 1691 policy->bundles = dst; 1692 dst_hold(dst); 1693 write_unlock_bh(&policy->lock); 1694 } 1695 *dst_p = dst; 1696 dst_release(dst_orig); 1697 xfrm_pols_put(pols, npols); 1698 return 0; 1699 1700 error: 1701 xfrm_pols_put(pols, npols); 1702 dropdst: 1703 dst_release(dst_orig); 1704 *dst_p = NULL; 1705 return err; 1706 1707 nopol: 1708 err = -ENOENT; 1709 if (flags & XFRM_LOOKUP_ICMP) 1710 goto dropdst; 1711 return 0; 1712 } 1713 EXPORT_SYMBOL(__xfrm_lookup); 1714 1715 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, 1716 struct sock *sk, int flags) 1717 { 1718 int err = __xfrm_lookup(dst_p, fl, sk, flags); 1719 1720 if (err == -EREMOTE) { 1721 dst_release(*dst_p); 1722 *dst_p = NULL; 1723 err = -EAGAIN; 1724 } 1725 1726 return err; 1727 } 1728 EXPORT_SYMBOL(xfrm_lookup); 1729 1730 static inline int 1731 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) 1732 { 1733 struct xfrm_state *x; 1734 1735 if (!skb->sp || idx < 0 || idx >= skb->sp->len) 1736 return 0; 1737 x = skb->sp->xvec[idx]; 1738 if (!x->type->reject) 1739 return 0; 1740 return x->type->reject(x, skb, fl); 1741 } 1742 1743 /* When skb is transformed back to its "native" form, we have to 1744 * check policy restrictions. At the moment we make this in maximally 1745 * stupid way. Shame on me. :-) Of course, connected sockets must 1746 * have policy cached at them. 1747 */ 1748 1749 static inline int 1750 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 1751 unsigned short family) 1752 { 1753 if (xfrm_state_kern(x)) 1754 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); 1755 return x->id.proto == tmpl->id.proto && 1756 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 1757 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 1758 x->props.mode == tmpl->mode && 1759 ((tmpl->aalgos & (1<<x->props.aalgo)) || 1760 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && 1761 !(x->props.mode != XFRM_MODE_TRANSPORT && 1762 xfrm_state_addr_cmp(tmpl, x, family)); 1763 } 1764 1765 /* 1766 * 0 or more than 0 is returned when validation is succeeded (either bypass 1767 * because of optional transport mode, or next index of the mathced secpath 1768 * state with the template. 1769 * -1 is returned when no matching template is found. 1770 * Otherwise "-2 - errored_index" is returned. 1771 */ 1772 static inline int 1773 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, 1774 unsigned short family) 1775 { 1776 int idx = start; 1777 1778 if (tmpl->optional) { 1779 if (tmpl->mode == XFRM_MODE_TRANSPORT) 1780 return start; 1781 } else 1782 start = -1; 1783 for (; idx < sp->len; idx++) { 1784 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 1785 return ++idx; 1786 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { 1787 if (start == -1) 1788 start = -2-idx; 1789 break; 1790 } 1791 } 1792 return start; 1793 } 1794 1795 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, 1796 unsigned int family, int reverse) 1797 { 1798 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1799 int err; 1800 1801 if (unlikely(afinfo == NULL)) 1802 return -EAFNOSUPPORT; 1803 1804 afinfo->decode_session(skb, fl, reverse); 1805 err = security_xfrm_decode_session(skb, &fl->secid); 1806 xfrm_policy_put_afinfo(afinfo); 1807 return err; 1808 } 1809 EXPORT_SYMBOL(__xfrm_decode_session); 1810 1811 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) 1812 { 1813 for (; k < sp->len; k++) { 1814 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 1815 *idxp = k; 1816 return 1; 1817 } 1818 } 1819 1820 return 0; 1821 } 1822 1823 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 1824 unsigned short family) 1825 { 1826 struct xfrm_policy *pol; 1827 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1828 int npols = 0; 1829 int xfrm_nr; 1830 int pi; 1831 int reverse; 1832 struct flowi fl; 1833 u8 fl_dir; 1834 int xerr_idx = -1; 1835 1836 reverse = dir & ~XFRM_POLICY_MASK; 1837 dir &= XFRM_POLICY_MASK; 1838 fl_dir = policy_to_flow_dir(dir); 1839 1840 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { 1841 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); 1842 return 0; 1843 } 1844 1845 nf_nat_decode_session(skb, &fl, family); 1846 1847 /* First, check used SA against their selectors. */ 1848 if (skb->sp) { 1849 int i; 1850 1851 for (i=skb->sp->len-1; i>=0; i--) { 1852 struct xfrm_state *x = skb->sp->xvec[i]; 1853 if (!xfrm_selector_match(&x->sel, &fl, family)) { 1854 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); 1855 return 0; 1856 } 1857 } 1858 } 1859 1860 pol = NULL; 1861 if (sk && sk->sk_policy[dir]) { 1862 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 1863 if (IS_ERR(pol)) { 1864 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); 1865 return 0; 1866 } 1867 } 1868 1869 if (!pol) 1870 pol = flow_cache_lookup(&fl, family, fl_dir, 1871 xfrm_policy_lookup); 1872 1873 if (IS_ERR(pol)) { 1874 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); 1875 return 0; 1876 } 1877 1878 if (!pol) { 1879 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 1880 xfrm_secpath_reject(xerr_idx, skb, &fl); 1881 XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); 1882 return 0; 1883 } 1884 return 1; 1885 } 1886 1887 pol->curlft.use_time = get_seconds(); 1888 1889 pols[0] = pol; 1890 npols ++; 1891 #ifdef CONFIG_XFRM_SUB_POLICY 1892 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1893 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, 1894 &fl, family, 1895 XFRM_POLICY_IN); 1896 if (pols[1]) { 1897 if (IS_ERR(pols[1])) { 1898 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); 1899 return 0; 1900 } 1901 pols[1]->curlft.use_time = get_seconds(); 1902 npols ++; 1903 } 1904 } 1905 #endif 1906 1907 if (pol->action == XFRM_POLICY_ALLOW) { 1908 struct sec_path *sp; 1909 static struct sec_path dummy; 1910 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; 1911 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; 1912 struct xfrm_tmpl **tpp = tp; 1913 int ti = 0; 1914 int i, k; 1915 1916 if ((sp = skb->sp) == NULL) 1917 sp = &dummy; 1918 1919 for (pi = 0; pi < npols; pi++) { 1920 if (pols[pi] != pol && 1921 pols[pi]->action != XFRM_POLICY_ALLOW) { 1922 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); 1923 goto reject; 1924 } 1925 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { 1926 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); 1927 goto reject_error; 1928 } 1929 for (i = 0; i < pols[pi]->xfrm_nr; i++) 1930 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 1931 } 1932 xfrm_nr = ti; 1933 if (npols > 1) { 1934 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); 1935 tpp = stp; 1936 } 1937 1938 /* For each tunnel xfrm, find the first matching tmpl. 1939 * For each tmpl before that, find corresponding xfrm. 1940 * Order is _important_. Later we will implement 1941 * some barriers, but at the moment barriers 1942 * are implied between each two transformations. 1943 */ 1944 for (i = xfrm_nr-1, k = 0; i >= 0; i--) { 1945 k = xfrm_policy_ok(tpp[i], sp, k, family); 1946 if (k < 0) { 1947 if (k < -1) 1948 /* "-2 - errored_index" returned */ 1949 xerr_idx = -(2+k); 1950 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); 1951 goto reject; 1952 } 1953 } 1954 1955 if (secpath_has_nontransport(sp, k, &xerr_idx)) { 1956 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); 1957 goto reject; 1958 } 1959 1960 xfrm_pols_put(pols, npols); 1961 return 1; 1962 } 1963 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); 1964 1965 reject: 1966 xfrm_secpath_reject(xerr_idx, skb, &fl); 1967 reject_error: 1968 xfrm_pols_put(pols, npols); 1969 return 0; 1970 } 1971 EXPORT_SYMBOL(__xfrm_policy_check); 1972 1973 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 1974 { 1975 struct flowi fl; 1976 1977 if (xfrm_decode_session(skb, &fl, family) < 0) { 1978 /* XXX: we should have something like FWDHDRERROR here. */ 1979 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); 1980 return 0; 1981 } 1982 1983 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 1984 } 1985 EXPORT_SYMBOL(__xfrm_route_forward); 1986 1987 /* Optimize later using cookies and generation ids. */ 1988 1989 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 1990 { 1991 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete 1992 * to "-1" to force all XFRM destinations to get validated by 1993 * dst_ops->check on every use. We do this because when a 1994 * normal route referenced by an XFRM dst is obsoleted we do 1995 * not go looking around for all parent referencing XFRM dsts 1996 * so that we can invalidate them. It is just too much work. 1997 * Instead we make the checks here on every use. For example: 1998 * 1999 * XFRM dst A --> IPv4 dst X 2000 * 2001 * X is the "xdst->route" of A (X is also the "dst->path" of A 2002 * in this example). If X is marked obsolete, "A" will not 2003 * notice. That's what we are validating here via the 2004 * stale_bundle() check. 2005 * 2006 * When a policy's bundle is pruned, we dst_free() the XFRM 2007 * dst which causes it's ->obsolete field to be set to a 2008 * positive non-zero integer. If an XFRM dst has been pruned 2009 * like this, we want to force a new route lookup. 2010 */ 2011 if (dst->obsolete < 0 && !stale_bundle(dst)) 2012 return dst; 2013 2014 return NULL; 2015 } 2016 2017 static int stale_bundle(struct dst_entry *dst) 2018 { 2019 return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); 2020 } 2021 2022 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2023 { 2024 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2025 dst->dev = dev->nd_net->loopback_dev; 2026 dev_hold(dst->dev); 2027 dev_put(dev); 2028 } 2029 } 2030 EXPORT_SYMBOL(xfrm_dst_ifdown); 2031 2032 static void xfrm_link_failure(struct sk_buff *skb) 2033 { 2034 /* Impossible. Such dst must be popped before reaches point of failure. */ 2035 return; 2036 } 2037 2038 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2039 { 2040 if (dst) { 2041 if (dst->obsolete) { 2042 dst_release(dst); 2043 dst = NULL; 2044 } 2045 } 2046 return dst; 2047 } 2048 2049 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) 2050 { 2051 struct dst_entry *dst, **dstp; 2052 2053 write_lock(&pol->lock); 2054 dstp = &pol->bundles; 2055 while ((dst=*dstp) != NULL) { 2056 if (func(dst)) { 2057 *dstp = dst->next; 2058 dst->next = *gc_list_p; 2059 *gc_list_p = dst; 2060 } else { 2061 dstp = &dst->next; 2062 } 2063 } 2064 write_unlock(&pol->lock); 2065 } 2066 2067 static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) 2068 { 2069 struct dst_entry *gc_list = NULL; 2070 int dir; 2071 2072 read_lock_bh(&xfrm_policy_lock); 2073 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2074 struct xfrm_policy *pol; 2075 struct hlist_node *entry; 2076 struct hlist_head *table; 2077 int i; 2078 2079 hlist_for_each_entry(pol, entry, 2080 &xfrm_policy_inexact[dir], bydst) 2081 prune_one_bundle(pol, func, &gc_list); 2082 2083 table = xfrm_policy_bydst[dir].table; 2084 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 2085 hlist_for_each_entry(pol, entry, table + i, bydst) 2086 prune_one_bundle(pol, func, &gc_list); 2087 } 2088 } 2089 read_unlock_bh(&xfrm_policy_lock); 2090 2091 while (gc_list) { 2092 struct dst_entry *dst = gc_list; 2093 gc_list = dst->next; 2094 dst_free(dst); 2095 } 2096 } 2097 2098 static int unused_bundle(struct dst_entry *dst) 2099 { 2100 return !atomic_read(&dst->__refcnt); 2101 } 2102 2103 static void __xfrm_garbage_collect(void) 2104 { 2105 xfrm_prune_bundles(unused_bundle); 2106 } 2107 2108 static int xfrm_flush_bundles(void) 2109 { 2110 xfrm_prune_bundles(stale_bundle); 2111 return 0; 2112 } 2113 2114 static void xfrm_init_pmtu(struct dst_entry *dst) 2115 { 2116 do { 2117 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2118 u32 pmtu, route_mtu_cached; 2119 2120 pmtu = dst_mtu(dst->child); 2121 xdst->child_mtu_cached = pmtu; 2122 2123 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2124 2125 route_mtu_cached = dst_mtu(xdst->route); 2126 xdst->route_mtu_cached = route_mtu_cached; 2127 2128 if (pmtu > route_mtu_cached) 2129 pmtu = route_mtu_cached; 2130 2131 dst->metrics[RTAX_MTU-1] = pmtu; 2132 } while ((dst = dst->next)); 2133 } 2134 2135 /* Check that the bundle accepts the flow and its components are 2136 * still valid. 2137 */ 2138 2139 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, 2140 struct flowi *fl, int family, int strict) 2141 { 2142 struct dst_entry *dst = &first->u.dst; 2143 struct xfrm_dst *last; 2144 u32 mtu; 2145 2146 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2147 (dst->dev && !netif_running(dst->dev))) 2148 return 0; 2149 #ifdef CONFIG_XFRM_SUB_POLICY 2150 if (fl) { 2151 if (first->origin && !flow_cache_uli_match(first->origin, fl)) 2152 return 0; 2153 if (first->partner && 2154 !xfrm_selector_match(first->partner, fl, family)) 2155 return 0; 2156 } 2157 #endif 2158 2159 last = NULL; 2160 2161 do { 2162 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2163 2164 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) 2165 return 0; 2166 if (fl && pol && 2167 !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) 2168 return 0; 2169 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2170 return 0; 2171 if (xdst->genid != dst->xfrm->genid) 2172 return 0; 2173 2174 if (strict && fl && 2175 !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && 2176 !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) 2177 return 0; 2178 2179 mtu = dst_mtu(dst->child); 2180 if (xdst->child_mtu_cached != mtu) { 2181 last = xdst; 2182 xdst->child_mtu_cached = mtu; 2183 } 2184 2185 if (!dst_check(xdst->route, xdst->route_cookie)) 2186 return 0; 2187 mtu = dst_mtu(xdst->route); 2188 if (xdst->route_mtu_cached != mtu) { 2189 last = xdst; 2190 xdst->route_mtu_cached = mtu; 2191 } 2192 2193 dst = dst->child; 2194 } while (dst->xfrm); 2195 2196 if (likely(!last)) 2197 return 1; 2198 2199 mtu = last->child_mtu_cached; 2200 for (;;) { 2201 dst = &last->u.dst; 2202 2203 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2204 if (mtu > last->route_mtu_cached) 2205 mtu = last->route_mtu_cached; 2206 dst->metrics[RTAX_MTU-1] = mtu; 2207 2208 if (last == first) 2209 break; 2210 2211 last = (struct xfrm_dst *)last->u.dst.next; 2212 last->child_mtu_cached = mtu; 2213 } 2214 2215 return 1; 2216 } 2217 2218 EXPORT_SYMBOL(xfrm_bundle_ok); 2219 2220 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2221 { 2222 int err = 0; 2223 if (unlikely(afinfo == NULL)) 2224 return -EINVAL; 2225 if (unlikely(afinfo->family >= NPROTO)) 2226 return -EAFNOSUPPORT; 2227 write_lock_bh(&xfrm_policy_afinfo_lock); 2228 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2229 err = -ENOBUFS; 2230 else { 2231 struct dst_ops *dst_ops = afinfo->dst_ops; 2232 if (likely(dst_ops->kmem_cachep == NULL)) 2233 dst_ops->kmem_cachep = xfrm_dst_cache; 2234 if (likely(dst_ops->check == NULL)) 2235 dst_ops->check = xfrm_dst_check; 2236 if (likely(dst_ops->negative_advice == NULL)) 2237 dst_ops->negative_advice = xfrm_negative_advice; 2238 if (likely(dst_ops->link_failure == NULL)) 2239 dst_ops->link_failure = xfrm_link_failure; 2240 if (likely(afinfo->garbage_collect == NULL)) 2241 afinfo->garbage_collect = __xfrm_garbage_collect; 2242 xfrm_policy_afinfo[afinfo->family] = afinfo; 2243 } 2244 write_unlock_bh(&xfrm_policy_afinfo_lock); 2245 return err; 2246 } 2247 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 2248 2249 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 2250 { 2251 int err = 0; 2252 if (unlikely(afinfo == NULL)) 2253 return -EINVAL; 2254 if (unlikely(afinfo->family >= NPROTO)) 2255 return -EAFNOSUPPORT; 2256 write_lock_bh(&xfrm_policy_afinfo_lock); 2257 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 2258 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 2259 err = -EINVAL; 2260 else { 2261 struct dst_ops *dst_ops = afinfo->dst_ops; 2262 xfrm_policy_afinfo[afinfo->family] = NULL; 2263 dst_ops->kmem_cachep = NULL; 2264 dst_ops->check = NULL; 2265 dst_ops->negative_advice = NULL; 2266 dst_ops->link_failure = NULL; 2267 afinfo->garbage_collect = NULL; 2268 } 2269 } 2270 write_unlock_bh(&xfrm_policy_afinfo_lock); 2271 return err; 2272 } 2273 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 2274 2275 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 2276 { 2277 struct xfrm_policy_afinfo *afinfo; 2278 if (unlikely(family >= NPROTO)) 2279 return NULL; 2280 read_lock(&xfrm_policy_afinfo_lock); 2281 afinfo = xfrm_policy_afinfo[family]; 2282 if (unlikely(!afinfo)) 2283 read_unlock(&xfrm_policy_afinfo_lock); 2284 return afinfo; 2285 } 2286 2287 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 2288 { 2289 read_unlock(&xfrm_policy_afinfo_lock); 2290 } 2291 2292 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2293 { 2294 struct net_device *dev = ptr; 2295 2296 if (dev->nd_net != &init_net) 2297 return NOTIFY_DONE; 2298 2299 switch (event) { 2300 case NETDEV_DOWN: 2301 xfrm_flush_bundles(); 2302 } 2303 return NOTIFY_DONE; 2304 } 2305 2306 static struct notifier_block xfrm_dev_notifier = { 2307 xfrm_dev_event, 2308 NULL, 2309 0 2310 }; 2311 2312 #ifdef CONFIG_XFRM_STATISTICS 2313 static int __init xfrm_statistics_init(void) 2314 { 2315 if (snmp_mib_init((void **)xfrm_statistics, 2316 sizeof(struct linux_xfrm_mib)) < 0) 2317 return -ENOMEM; 2318 return 0; 2319 } 2320 #endif 2321 2322 static void __init xfrm_policy_init(void) 2323 { 2324 unsigned int hmask, sz; 2325 int dir; 2326 2327 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2328 sizeof(struct xfrm_dst), 2329 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2330 NULL); 2331 2332 hmask = 8 - 1; 2333 sz = (hmask+1) * sizeof(struct hlist_head); 2334 2335 xfrm_policy_byidx = xfrm_hash_alloc(sz); 2336 xfrm_idx_hmask = hmask; 2337 if (!xfrm_policy_byidx) 2338 panic("XFRM: failed to allocate byidx hash\n"); 2339 2340 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2341 struct xfrm_policy_hash *htab; 2342 2343 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); 2344 2345 htab = &xfrm_policy_bydst[dir]; 2346 htab->table = xfrm_hash_alloc(sz); 2347 htab->hmask = hmask; 2348 if (!htab->table) 2349 panic("XFRM: failed to allocate bydst hash\n"); 2350 } 2351 2352 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); 2353 register_netdevice_notifier(&xfrm_dev_notifier); 2354 } 2355 2356 void __init xfrm_init(void) 2357 { 2358 #ifdef CONFIG_XFRM_STATISTICS 2359 xfrm_statistics_init(); 2360 #endif 2361 xfrm_state_init(); 2362 xfrm_policy_init(); 2363 xfrm_input_init(); 2364 #ifdef CONFIG_XFRM_STATISTICS 2365 xfrm_proc_init(); 2366 #endif 2367 } 2368 2369 #ifdef CONFIG_AUDITSYSCALL 2370 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2371 struct audit_buffer *audit_buf) 2372 { 2373 struct xfrm_sec_ctx *ctx = xp->security; 2374 struct xfrm_selector *sel = &xp->selector; 2375 2376 if (ctx) 2377 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2378 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 2379 2380 switch(sel->family) { 2381 case AF_INET: 2382 audit_log_format(audit_buf, " src=" NIPQUAD_FMT, 2383 NIPQUAD(sel->saddr.a4)); 2384 if (sel->prefixlen_s != 32) 2385 audit_log_format(audit_buf, " src_prefixlen=%d", 2386 sel->prefixlen_s); 2387 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT, 2388 NIPQUAD(sel->daddr.a4)); 2389 if (sel->prefixlen_d != 32) 2390 audit_log_format(audit_buf, " dst_prefixlen=%d", 2391 sel->prefixlen_d); 2392 break; 2393 case AF_INET6: 2394 audit_log_format(audit_buf, " src=" NIP6_FMT, 2395 NIP6(*(struct in6_addr *)sel->saddr.a6)); 2396 if (sel->prefixlen_s != 128) 2397 audit_log_format(audit_buf, " src_prefixlen=%d", 2398 sel->prefixlen_s); 2399 audit_log_format(audit_buf, " dst=" NIP6_FMT, 2400 NIP6(*(struct in6_addr *)sel->daddr.a6)); 2401 if (sel->prefixlen_d != 128) 2402 audit_log_format(audit_buf, " dst_prefixlen=%d", 2403 sel->prefixlen_d); 2404 break; 2405 } 2406 } 2407 2408 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, 2409 u32 auid, u32 secid) 2410 { 2411 struct audit_buffer *audit_buf; 2412 2413 audit_buf = xfrm_audit_start("SPD-add"); 2414 if (audit_buf == NULL) 2415 return; 2416 xfrm_audit_helper_usrinfo(auid, secid, audit_buf); 2417 audit_log_format(audit_buf, " res=%u", result); 2418 xfrm_audit_common_policyinfo(xp, audit_buf); 2419 audit_log_end(audit_buf); 2420 } 2421 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 2422 2423 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, 2424 u32 auid, u32 secid) 2425 { 2426 struct audit_buffer *audit_buf; 2427 2428 audit_buf = xfrm_audit_start("SPD-delete"); 2429 if (audit_buf == NULL) 2430 return; 2431 xfrm_audit_helper_usrinfo(auid, secid, audit_buf); 2432 audit_log_format(audit_buf, " res=%u", result); 2433 xfrm_audit_common_policyinfo(xp, audit_buf); 2434 audit_log_end(audit_buf); 2435 } 2436 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); 2437 #endif 2438 2439 #ifdef CONFIG_XFRM_MIGRATE 2440 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, 2441 struct xfrm_selector *sel_tgt) 2442 { 2443 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 2444 if (sel_tgt->family == sel_cmp->family && 2445 xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, 2446 sel_cmp->family) == 0 && 2447 xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, 2448 sel_cmp->family) == 0 && 2449 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && 2450 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { 2451 return 1; 2452 } 2453 } else { 2454 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { 2455 return 1; 2456 } 2457 } 2458 return 0; 2459 } 2460 2461 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, 2462 u8 dir, u8 type) 2463 { 2464 struct xfrm_policy *pol, *ret = NULL; 2465 struct hlist_node *entry; 2466 struct hlist_head *chain; 2467 u32 priority = ~0U; 2468 2469 read_lock_bh(&xfrm_policy_lock); 2470 chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); 2471 hlist_for_each_entry(pol, entry, chain, bydst) { 2472 if (xfrm_migrate_selector_match(sel, &pol->selector) && 2473 pol->type == type) { 2474 ret = pol; 2475 priority = ret->priority; 2476 break; 2477 } 2478 } 2479 chain = &xfrm_policy_inexact[dir]; 2480 hlist_for_each_entry(pol, entry, chain, bydst) { 2481 if (xfrm_migrate_selector_match(sel, &pol->selector) && 2482 pol->type == type && 2483 pol->priority < priority) { 2484 ret = pol; 2485 break; 2486 } 2487 } 2488 2489 if (ret) 2490 xfrm_pol_hold(ret); 2491 2492 read_unlock_bh(&xfrm_policy_lock); 2493 2494 return ret; 2495 } 2496 2497 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) 2498 { 2499 int match = 0; 2500 2501 if (t->mode == m->mode && t->id.proto == m->proto && 2502 (m->reqid == 0 || t->reqid == m->reqid)) { 2503 switch (t->mode) { 2504 case XFRM_MODE_TUNNEL: 2505 case XFRM_MODE_BEET: 2506 if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, 2507 m->old_family) == 0 && 2508 xfrm_addr_cmp(&t->saddr, &m->old_saddr, 2509 m->old_family) == 0) { 2510 match = 1; 2511 } 2512 break; 2513 case XFRM_MODE_TRANSPORT: 2514 /* in case of transport mode, template does not store 2515 any IP addresses, hence we just compare mode and 2516 protocol */ 2517 match = 1; 2518 break; 2519 default: 2520 break; 2521 } 2522 } 2523 return match; 2524 } 2525 2526 /* update endpoint address(es) of template(s) */ 2527 static int xfrm_policy_migrate(struct xfrm_policy *pol, 2528 struct xfrm_migrate *m, int num_migrate) 2529 { 2530 struct xfrm_migrate *mp; 2531 struct dst_entry *dst; 2532 int i, j, n = 0; 2533 2534 write_lock_bh(&pol->lock); 2535 if (unlikely(pol->dead)) { 2536 /* target policy has been deleted */ 2537 write_unlock_bh(&pol->lock); 2538 return -ENOENT; 2539 } 2540 2541 for (i = 0; i < pol->xfrm_nr; i++) { 2542 for (j = 0, mp = m; j < num_migrate; j++, mp++) { 2543 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) 2544 continue; 2545 n++; 2546 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && 2547 pol->xfrm_vec[i].mode != XFRM_MODE_BEET) 2548 continue; 2549 /* update endpoints */ 2550 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, 2551 sizeof(pol->xfrm_vec[i].id.daddr)); 2552 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, 2553 sizeof(pol->xfrm_vec[i].saddr)); 2554 pol->xfrm_vec[i].encap_family = mp->new_family; 2555 /* flush bundles */ 2556 while ((dst = pol->bundles) != NULL) { 2557 pol->bundles = dst->next; 2558 dst_free(dst); 2559 } 2560 } 2561 } 2562 2563 write_unlock_bh(&pol->lock); 2564 2565 if (!n) 2566 return -ENODATA; 2567 2568 return 0; 2569 } 2570 2571 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) 2572 { 2573 int i, j; 2574 2575 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) 2576 return -EINVAL; 2577 2578 for (i = 0; i < num_migrate; i++) { 2579 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, 2580 m[i].old_family) == 0) && 2581 (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, 2582 m[i].old_family) == 0)) 2583 return -EINVAL; 2584 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || 2585 xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) 2586 return -EINVAL; 2587 2588 /* check if there is any duplicated entry */ 2589 for (j = i + 1; j < num_migrate; j++) { 2590 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, 2591 sizeof(m[i].old_daddr)) && 2592 !memcmp(&m[i].old_saddr, &m[j].old_saddr, 2593 sizeof(m[i].old_saddr)) && 2594 m[i].proto == m[j].proto && 2595 m[i].mode == m[j].mode && 2596 m[i].reqid == m[j].reqid && 2597 m[i].old_family == m[j].old_family) 2598 return -EINVAL; 2599 } 2600 } 2601 2602 return 0; 2603 } 2604 2605 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2606 struct xfrm_migrate *m, int num_migrate) 2607 { 2608 int i, err, nx_cur = 0, nx_new = 0; 2609 struct xfrm_policy *pol = NULL; 2610 struct xfrm_state *x, *xc; 2611 struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; 2612 struct xfrm_state *x_new[XFRM_MAX_DEPTH]; 2613 struct xfrm_migrate *mp; 2614 2615 if ((err = xfrm_migrate_check(m, num_migrate)) < 0) 2616 goto out; 2617 2618 /* Stage 1 - find policy */ 2619 if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { 2620 err = -ENOENT; 2621 goto out; 2622 } 2623 2624 /* Stage 2 - find and update state(s) */ 2625 for (i = 0, mp = m; i < num_migrate; i++, mp++) { 2626 if ((x = xfrm_migrate_state_find(mp))) { 2627 x_cur[nx_cur] = x; 2628 nx_cur++; 2629 if ((xc = xfrm_state_migrate(x, mp))) { 2630 x_new[nx_new] = xc; 2631 nx_new++; 2632 } else { 2633 err = -ENODATA; 2634 goto restore_state; 2635 } 2636 } 2637 } 2638 2639 /* Stage 3 - update policy */ 2640 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) 2641 goto restore_state; 2642 2643 /* Stage 4 - delete old state(s) */ 2644 if (nx_cur) { 2645 xfrm_states_put(x_cur, nx_cur); 2646 xfrm_states_delete(x_cur, nx_cur); 2647 } 2648 2649 /* Stage 5 - announce */ 2650 km_migrate(sel, dir, type, m, num_migrate); 2651 2652 xfrm_pol_put(pol); 2653 2654 return 0; 2655 out: 2656 return err; 2657 2658 restore_state: 2659 if (pol) 2660 xfrm_pol_put(pol); 2661 if (nx_cur) 2662 xfrm_states_put(x_cur, nx_cur); 2663 if (nx_new) 2664 xfrm_states_delete(x_new, nx_new); 2665 2666 return err; 2667 } 2668 EXPORT_SYMBOL(xfrm_migrate); 2669 #endif 2670