1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/kmod.h> 19 #include <linux/list.h> 20 #include <linux/spinlock.h> 21 #include <linux/workqueue.h> 22 #include <linux/notifier.h> 23 #include <linux/netdevice.h> 24 #include <linux/netfilter.h> 25 #include <linux/module.h> 26 #include <linux/cache.h> 27 #include <linux/audit.h> 28 #include <net/dst.h> 29 #include <net/xfrm.h> 30 #include <net/ip.h> 31 #ifdef CONFIG_XFRM_STATISTICS 32 #include <net/snmp.h> 33 #endif 34 35 #include "xfrm_hash.h" 36 37 int sysctl_xfrm_larval_drop __read_mostly; 38 39 #ifdef CONFIG_XFRM_STATISTICS 40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; 41 EXPORT_SYMBOL(xfrm_statistics); 42 #endif 43 44 DEFINE_MUTEX(xfrm_cfg_mutex); 45 EXPORT_SYMBOL(xfrm_cfg_mutex); 46 47 static DEFINE_RWLOCK(xfrm_policy_lock); 48 49 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; 50 EXPORT_SYMBOL(xfrm_policy_count); 51 52 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 53 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; 54 55 static struct kmem_cache *xfrm_dst_cache __read_mostly; 56 57 static struct work_struct xfrm_policy_gc_work; 58 static HLIST_HEAD(xfrm_policy_gc_list); 59 static DEFINE_SPINLOCK(xfrm_policy_gc_lock); 60 61 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 62 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 63 static void xfrm_init_pmtu(struct dst_entry *dst); 64 65 static inline int 66 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) 67 { 68 return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && 69 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && 70 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 71 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 72 (fl->proto == sel->proto || !sel->proto) && 73 (fl->oif == sel->ifindex || !sel->ifindex); 74 } 75 76 static inline int 77 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) 78 { 79 return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && 80 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && 81 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 82 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 83 (fl->proto == sel->proto || !sel->proto) && 84 (fl->oif == sel->ifindex || !sel->ifindex); 85 } 86 87 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, 88 unsigned short family) 89 { 90 switch (family) { 91 case AF_INET: 92 return __xfrm4_selector_match(sel, fl); 93 case AF_INET6: 94 return __xfrm6_selector_match(sel, fl); 95 } 96 return 0; 97 } 98 99 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, 100 int family) 101 { 102 xfrm_address_t *saddr = &x->props.saddr; 103 xfrm_address_t *daddr = &x->id.daddr; 104 struct xfrm_policy_afinfo *afinfo; 105 struct dst_entry *dst; 106 107 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) 108 saddr = x->coaddr; 109 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) 110 daddr = x->coaddr; 111 112 afinfo = xfrm_policy_get_afinfo(family); 113 if (unlikely(afinfo == NULL)) 114 return ERR_PTR(-EAFNOSUPPORT); 115 116 dst = afinfo->dst_lookup(tos, saddr, daddr); 117 xfrm_policy_put_afinfo(afinfo); 118 return dst; 119 } 120 121 static inline unsigned long make_jiffies(long secs) 122 { 123 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 124 return MAX_SCHEDULE_TIMEOUT-1; 125 else 126 return secs*HZ; 127 } 128 129 static void xfrm_policy_timer(unsigned long data) 130 { 131 struct xfrm_policy *xp = (struct xfrm_policy*)data; 132 unsigned long now = get_seconds(); 133 long next = LONG_MAX; 134 int warn = 0; 135 int dir; 136 137 read_lock(&xp->lock); 138 139 if (xp->dead) 140 goto out; 141 142 dir = xfrm_policy_id2dir(xp->index); 143 144 if (xp->lft.hard_add_expires_seconds) { 145 long tmo = xp->lft.hard_add_expires_seconds + 146 xp->curlft.add_time - now; 147 if (tmo <= 0) 148 goto expired; 149 if (tmo < next) 150 next = tmo; 151 } 152 if (xp->lft.hard_use_expires_seconds) { 153 long tmo = xp->lft.hard_use_expires_seconds + 154 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 155 if (tmo <= 0) 156 goto expired; 157 if (tmo < next) 158 next = tmo; 159 } 160 if (xp->lft.soft_add_expires_seconds) { 161 long tmo = xp->lft.soft_add_expires_seconds + 162 xp->curlft.add_time - now; 163 if (tmo <= 0) { 164 warn = 1; 165 tmo = XFRM_KM_TIMEOUT; 166 } 167 if (tmo < next) 168 next = tmo; 169 } 170 if (xp->lft.soft_use_expires_seconds) { 171 long tmo = xp->lft.soft_use_expires_seconds + 172 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 173 if (tmo <= 0) { 174 warn = 1; 175 tmo = XFRM_KM_TIMEOUT; 176 } 177 if (tmo < next) 178 next = tmo; 179 } 180 181 if (warn) 182 km_policy_expired(xp, dir, 0, 0); 183 if (next != LONG_MAX && 184 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 185 xfrm_pol_hold(xp); 186 187 out: 188 read_unlock(&xp->lock); 189 xfrm_pol_put(xp); 190 return; 191 192 expired: 193 read_unlock(&xp->lock); 194 if (!xfrm_policy_delete(xp, dir)) 195 km_policy_expired(xp, dir, 1, 0); 196 xfrm_pol_put(xp); 197 } 198 199 200 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 201 * SPD calls. 202 */ 203 204 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) 205 { 206 struct xfrm_policy *policy; 207 208 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 209 210 if (policy) { 211 INIT_HLIST_NODE(&policy->bydst); 212 INIT_HLIST_NODE(&policy->byidx); 213 rwlock_init(&policy->lock); 214 atomic_set(&policy->refcnt, 1); 215 setup_timer(&policy->timer, xfrm_policy_timer, 216 (unsigned long)policy); 217 } 218 return policy; 219 } 220 EXPORT_SYMBOL(xfrm_policy_alloc); 221 222 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 223 224 void xfrm_policy_destroy(struct xfrm_policy *policy) 225 { 226 BUG_ON(!policy->dead); 227 228 BUG_ON(policy->bundles); 229 230 if (del_timer(&policy->timer)) 231 BUG(); 232 233 security_xfrm_policy_free(policy); 234 kfree(policy); 235 } 236 EXPORT_SYMBOL(xfrm_policy_destroy); 237 238 static void xfrm_policy_gc_kill(struct xfrm_policy *policy) 239 { 240 struct dst_entry *dst; 241 242 while ((dst = policy->bundles) != NULL) { 243 policy->bundles = dst->next; 244 dst_free(dst); 245 } 246 247 if (del_timer(&policy->timer)) 248 atomic_dec(&policy->refcnt); 249 250 if (atomic_read(&policy->refcnt) > 1) 251 flow_cache_flush(); 252 253 xfrm_pol_put(policy); 254 } 255 256 static void xfrm_policy_gc_task(struct work_struct *work) 257 { 258 struct xfrm_policy *policy; 259 struct hlist_node *entry, *tmp; 260 struct hlist_head gc_list; 261 262 spin_lock_bh(&xfrm_policy_gc_lock); 263 gc_list.first = xfrm_policy_gc_list.first; 264 INIT_HLIST_HEAD(&xfrm_policy_gc_list); 265 spin_unlock_bh(&xfrm_policy_gc_lock); 266 267 hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) 268 xfrm_policy_gc_kill(policy); 269 } 270 271 /* Rule must be locked. Release descentant resources, announce 272 * entry dead. The rule must be unlinked from lists to the moment. 273 */ 274 275 static void xfrm_policy_kill(struct xfrm_policy *policy) 276 { 277 int dead; 278 279 write_lock_bh(&policy->lock); 280 dead = policy->dead; 281 policy->dead = 1; 282 write_unlock_bh(&policy->lock); 283 284 if (unlikely(dead)) { 285 WARN_ON(1); 286 return; 287 } 288 289 spin_lock(&xfrm_policy_gc_lock); 290 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); 291 spin_unlock(&xfrm_policy_gc_lock); 292 293 schedule_work(&xfrm_policy_gc_work); 294 } 295 296 struct xfrm_policy_hash { 297 struct hlist_head *table; 298 unsigned int hmask; 299 }; 300 301 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; 302 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; 303 static struct hlist_head *xfrm_policy_byidx __read_mostly; 304 static unsigned int xfrm_idx_hmask __read_mostly; 305 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 306 307 static inline unsigned int idx_hash(u32 index) 308 { 309 return __idx_hash(index, xfrm_idx_hmask); 310 } 311 312 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) 313 { 314 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 315 unsigned int hash = __sel_hash(sel, family, hmask); 316 317 return (hash == hmask + 1 ? 318 &xfrm_policy_inexact[dir] : 319 xfrm_policy_bydst[dir].table + hash); 320 } 321 322 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) 323 { 324 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 325 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 326 327 return xfrm_policy_bydst[dir].table + hash; 328 } 329 330 static void xfrm_dst_hash_transfer(struct hlist_head *list, 331 struct hlist_head *ndsttable, 332 unsigned int nhashmask) 333 { 334 struct hlist_node *entry, *tmp, *entry0 = NULL; 335 struct xfrm_policy *pol; 336 unsigned int h0 = 0; 337 338 redo: 339 hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { 340 unsigned int h; 341 342 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 343 pol->family, nhashmask); 344 if (!entry0) { 345 hlist_del(entry); 346 hlist_add_head(&pol->bydst, ndsttable+h); 347 h0 = h; 348 } else { 349 if (h != h0) 350 continue; 351 hlist_del(entry); 352 hlist_add_after(entry0, &pol->bydst); 353 } 354 entry0 = entry; 355 } 356 if (!hlist_empty(list)) { 357 entry0 = NULL; 358 goto redo; 359 } 360 } 361 362 static void xfrm_idx_hash_transfer(struct hlist_head *list, 363 struct hlist_head *nidxtable, 364 unsigned int nhashmask) 365 { 366 struct hlist_node *entry, *tmp; 367 struct xfrm_policy *pol; 368 369 hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { 370 unsigned int h; 371 372 h = __idx_hash(pol->index, nhashmask); 373 hlist_add_head(&pol->byidx, nidxtable+h); 374 } 375 } 376 377 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) 378 { 379 return ((old_hmask + 1) << 1) - 1; 380 } 381 382 static void xfrm_bydst_resize(int dir) 383 { 384 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 385 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 386 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 387 struct hlist_head *odst = xfrm_policy_bydst[dir].table; 388 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 389 int i; 390 391 if (!ndst) 392 return; 393 394 write_lock_bh(&xfrm_policy_lock); 395 396 for (i = hmask; i >= 0; i--) 397 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 398 399 xfrm_policy_bydst[dir].table = ndst; 400 xfrm_policy_bydst[dir].hmask = nhashmask; 401 402 write_unlock_bh(&xfrm_policy_lock); 403 404 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 405 } 406 407 static void xfrm_byidx_resize(int total) 408 { 409 unsigned int hmask = xfrm_idx_hmask; 410 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 411 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 412 struct hlist_head *oidx = xfrm_policy_byidx; 413 struct hlist_head *nidx = xfrm_hash_alloc(nsize); 414 int i; 415 416 if (!nidx) 417 return; 418 419 write_lock_bh(&xfrm_policy_lock); 420 421 for (i = hmask; i >= 0; i--) 422 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 423 424 xfrm_policy_byidx = nidx; 425 xfrm_idx_hmask = nhashmask; 426 427 write_unlock_bh(&xfrm_policy_lock); 428 429 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 430 } 431 432 static inline int xfrm_bydst_should_resize(int dir, int *total) 433 { 434 unsigned int cnt = xfrm_policy_count[dir]; 435 unsigned int hmask = xfrm_policy_bydst[dir].hmask; 436 437 if (total) 438 *total += cnt; 439 440 if ((hmask + 1) < xfrm_policy_hashmax && 441 cnt > hmask) 442 return 1; 443 444 return 0; 445 } 446 447 static inline int xfrm_byidx_should_resize(int total) 448 { 449 unsigned int hmask = xfrm_idx_hmask; 450 451 if ((hmask + 1) < xfrm_policy_hashmax && 452 total > hmask) 453 return 1; 454 455 return 0; 456 } 457 458 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) 459 { 460 read_lock_bh(&xfrm_policy_lock); 461 si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; 462 si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; 463 si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; 464 si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; 465 si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; 466 si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 467 si->spdhcnt = xfrm_idx_hmask; 468 si->spdhmcnt = xfrm_policy_hashmax; 469 read_unlock_bh(&xfrm_policy_lock); 470 } 471 EXPORT_SYMBOL(xfrm_spd_getinfo); 472 473 static DEFINE_MUTEX(hash_resize_mutex); 474 static void xfrm_hash_resize(struct work_struct *__unused) 475 { 476 int dir, total; 477 478 mutex_lock(&hash_resize_mutex); 479 480 total = 0; 481 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 482 if (xfrm_bydst_should_resize(dir, &total)) 483 xfrm_bydst_resize(dir); 484 } 485 if (xfrm_byidx_should_resize(total)) 486 xfrm_byidx_resize(total); 487 488 mutex_unlock(&hash_resize_mutex); 489 } 490 491 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); 492 493 /* Generate new index... KAME seems to generate them ordered by cost 494 * of an absolute inpredictability of ordering of rules. This will not pass. */ 495 static u32 xfrm_gen_index(u8 type, int dir) 496 { 497 static u32 idx_generator; 498 499 for (;;) { 500 struct hlist_node *entry; 501 struct hlist_head *list; 502 struct xfrm_policy *p; 503 u32 idx; 504 int found; 505 506 idx = (idx_generator | dir); 507 idx_generator += 8; 508 if (idx == 0) 509 idx = 8; 510 list = xfrm_policy_byidx + idx_hash(idx); 511 found = 0; 512 hlist_for_each_entry(p, entry, list, byidx) { 513 if (p->index == idx) { 514 found = 1; 515 break; 516 } 517 } 518 if (!found) 519 return idx; 520 } 521 } 522 523 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) 524 { 525 u32 *p1 = (u32 *) s1; 526 u32 *p2 = (u32 *) s2; 527 int len = sizeof(struct xfrm_selector) / sizeof(u32); 528 int i; 529 530 for (i = 0; i < len; i++) { 531 if (p1[i] != p2[i]) 532 return 1; 533 } 534 535 return 0; 536 } 537 538 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 539 { 540 struct xfrm_policy *pol; 541 struct xfrm_policy *delpol; 542 struct hlist_head *chain; 543 struct hlist_node *entry, *newpos; 544 struct dst_entry *gc_list; 545 546 write_lock_bh(&xfrm_policy_lock); 547 chain = policy_hash_bysel(&policy->selector, policy->family, dir); 548 delpol = NULL; 549 newpos = NULL; 550 hlist_for_each_entry(pol, entry, chain, bydst) { 551 if (pol->type == policy->type && 552 !selector_cmp(&pol->selector, &policy->selector) && 553 xfrm_sec_ctx_match(pol->security, policy->security) && 554 !WARN_ON(delpol)) { 555 if (excl) { 556 write_unlock_bh(&xfrm_policy_lock); 557 return -EEXIST; 558 } 559 delpol = pol; 560 if (policy->priority > pol->priority) 561 continue; 562 } else if (policy->priority >= pol->priority) { 563 newpos = &pol->bydst; 564 continue; 565 } 566 if (delpol) 567 break; 568 } 569 if (newpos) 570 hlist_add_after(newpos, &policy->bydst); 571 else 572 hlist_add_head(&policy->bydst, chain); 573 xfrm_pol_hold(policy); 574 xfrm_policy_count[dir]++; 575 atomic_inc(&flow_cache_genid); 576 if (delpol) { 577 hlist_del(&delpol->bydst); 578 hlist_del(&delpol->byidx); 579 xfrm_policy_count[dir]--; 580 } 581 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); 582 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); 583 policy->curlft.add_time = get_seconds(); 584 policy->curlft.use_time = 0; 585 if (!mod_timer(&policy->timer, jiffies + HZ)) 586 xfrm_pol_hold(policy); 587 write_unlock_bh(&xfrm_policy_lock); 588 589 if (delpol) 590 xfrm_policy_kill(delpol); 591 else if (xfrm_bydst_should_resize(dir, NULL)) 592 schedule_work(&xfrm_hash_work); 593 594 read_lock_bh(&xfrm_policy_lock); 595 gc_list = NULL; 596 entry = &policy->bydst; 597 hlist_for_each_entry_continue(policy, entry, bydst) { 598 struct dst_entry *dst; 599 600 write_lock(&policy->lock); 601 dst = policy->bundles; 602 if (dst) { 603 struct dst_entry *tail = dst; 604 while (tail->next) 605 tail = tail->next; 606 tail->next = gc_list; 607 gc_list = dst; 608 609 policy->bundles = NULL; 610 } 611 write_unlock(&policy->lock); 612 } 613 read_unlock_bh(&xfrm_policy_lock); 614 615 while (gc_list) { 616 struct dst_entry *dst = gc_list; 617 618 gc_list = dst->next; 619 dst_free(dst); 620 } 621 622 return 0; 623 } 624 EXPORT_SYMBOL(xfrm_policy_insert); 625 626 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, 627 struct xfrm_selector *sel, 628 struct xfrm_sec_ctx *ctx, int delete, 629 int *err) 630 { 631 struct xfrm_policy *pol, *ret; 632 struct hlist_head *chain; 633 struct hlist_node *entry; 634 635 *err = 0; 636 write_lock_bh(&xfrm_policy_lock); 637 chain = policy_hash_bysel(sel, sel->family, dir); 638 ret = NULL; 639 hlist_for_each_entry(pol, entry, chain, bydst) { 640 if (pol->type == type && 641 !selector_cmp(sel, &pol->selector) && 642 xfrm_sec_ctx_match(ctx, pol->security)) { 643 xfrm_pol_hold(pol); 644 if (delete) { 645 *err = security_xfrm_policy_delete(pol); 646 if (*err) { 647 write_unlock_bh(&xfrm_policy_lock); 648 return pol; 649 } 650 hlist_del(&pol->bydst); 651 hlist_del(&pol->byidx); 652 xfrm_policy_count[dir]--; 653 } 654 ret = pol; 655 break; 656 } 657 } 658 write_unlock_bh(&xfrm_policy_lock); 659 660 if (ret && delete) { 661 atomic_inc(&flow_cache_genid); 662 xfrm_policy_kill(ret); 663 } 664 return ret; 665 } 666 EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 667 668 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, 669 int *err) 670 { 671 struct xfrm_policy *pol, *ret; 672 struct hlist_head *chain; 673 struct hlist_node *entry; 674 675 *err = -ENOENT; 676 if (xfrm_policy_id2dir(id) != dir) 677 return NULL; 678 679 *err = 0; 680 write_lock_bh(&xfrm_policy_lock); 681 chain = xfrm_policy_byidx + idx_hash(id); 682 ret = NULL; 683 hlist_for_each_entry(pol, entry, chain, byidx) { 684 if (pol->type == type && pol->index == id) { 685 xfrm_pol_hold(pol); 686 if (delete) { 687 *err = security_xfrm_policy_delete(pol); 688 if (*err) { 689 write_unlock_bh(&xfrm_policy_lock); 690 return pol; 691 } 692 hlist_del(&pol->bydst); 693 hlist_del(&pol->byidx); 694 xfrm_policy_count[dir]--; 695 } 696 ret = pol; 697 break; 698 } 699 } 700 write_unlock_bh(&xfrm_policy_lock); 701 702 if (ret && delete) { 703 atomic_inc(&flow_cache_genid); 704 xfrm_policy_kill(ret); 705 } 706 return ret; 707 } 708 EXPORT_SYMBOL(xfrm_policy_byid); 709 710 #ifdef CONFIG_SECURITY_NETWORK_XFRM 711 static inline int 712 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) 713 { 714 int dir, err = 0; 715 716 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 717 struct xfrm_policy *pol; 718 struct hlist_node *entry; 719 int i; 720 721 hlist_for_each_entry(pol, entry, 722 &xfrm_policy_inexact[dir], bydst) { 723 if (pol->type != type) 724 continue; 725 err = security_xfrm_policy_delete(pol); 726 if (err) { 727 xfrm_audit_policy_delete(pol, 0, 728 audit_info->loginuid, 729 audit_info->secid); 730 return err; 731 } 732 } 733 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 734 hlist_for_each_entry(pol, entry, 735 xfrm_policy_bydst[dir].table + i, 736 bydst) { 737 if (pol->type != type) 738 continue; 739 err = security_xfrm_policy_delete(pol); 740 if (err) { 741 xfrm_audit_policy_delete(pol, 0, 742 audit_info->loginuid, 743 audit_info->secid); 744 return err; 745 } 746 } 747 } 748 } 749 return err; 750 } 751 #else 752 static inline int 753 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) 754 { 755 return 0; 756 } 757 #endif 758 759 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) 760 { 761 int dir, err = 0; 762 763 write_lock_bh(&xfrm_policy_lock); 764 765 err = xfrm_policy_flush_secctx_check(type, audit_info); 766 if (err) 767 goto out; 768 769 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 770 struct xfrm_policy *pol; 771 struct hlist_node *entry; 772 int i, killed; 773 774 killed = 0; 775 again1: 776 hlist_for_each_entry(pol, entry, 777 &xfrm_policy_inexact[dir], bydst) { 778 if (pol->type != type) 779 continue; 780 hlist_del(&pol->bydst); 781 hlist_del(&pol->byidx); 782 write_unlock_bh(&xfrm_policy_lock); 783 784 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, 785 audit_info->secid); 786 787 xfrm_policy_kill(pol); 788 killed++; 789 790 write_lock_bh(&xfrm_policy_lock); 791 goto again1; 792 } 793 794 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 795 again2: 796 hlist_for_each_entry(pol, entry, 797 xfrm_policy_bydst[dir].table + i, 798 bydst) { 799 if (pol->type != type) 800 continue; 801 hlist_del(&pol->bydst); 802 hlist_del(&pol->byidx); 803 write_unlock_bh(&xfrm_policy_lock); 804 805 xfrm_audit_policy_delete(pol, 1, 806 audit_info->loginuid, 807 audit_info->secid); 808 xfrm_policy_kill(pol); 809 killed++; 810 811 write_lock_bh(&xfrm_policy_lock); 812 goto again2; 813 } 814 } 815 816 xfrm_policy_count[dir] -= killed; 817 } 818 atomic_inc(&flow_cache_genid); 819 out: 820 write_unlock_bh(&xfrm_policy_lock); 821 return err; 822 } 823 EXPORT_SYMBOL(xfrm_policy_flush); 824 825 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), 826 void *data) 827 { 828 struct xfrm_policy *pol, *last = NULL; 829 struct hlist_node *entry; 830 int dir, last_dir = 0, count, error; 831 832 read_lock_bh(&xfrm_policy_lock); 833 count = 0; 834 835 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 836 struct hlist_head *table = xfrm_policy_bydst[dir].table; 837 int i; 838 839 hlist_for_each_entry(pol, entry, 840 &xfrm_policy_inexact[dir], bydst) { 841 if (pol->type != type) 842 continue; 843 if (last) { 844 error = func(last, last_dir % XFRM_POLICY_MAX, 845 count, data); 846 if (error) 847 goto out; 848 } 849 last = pol; 850 last_dir = dir; 851 count++; 852 } 853 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 854 hlist_for_each_entry(pol, entry, table + i, bydst) { 855 if (pol->type != type) 856 continue; 857 if (last) { 858 error = func(last, last_dir % XFRM_POLICY_MAX, 859 count, data); 860 if (error) 861 goto out; 862 } 863 last = pol; 864 last_dir = dir; 865 count++; 866 } 867 } 868 } 869 if (count == 0) { 870 error = -ENOENT; 871 goto out; 872 } 873 error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); 874 out: 875 read_unlock_bh(&xfrm_policy_lock); 876 return error; 877 } 878 EXPORT_SYMBOL(xfrm_policy_walk); 879 880 /* 881 * Find policy to apply to this flow. 882 * 883 * Returns 0 if policy found, else an -errno. 884 */ 885 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, 886 u8 type, u16 family, int dir) 887 { 888 struct xfrm_selector *sel = &pol->selector; 889 int match, ret = -ESRCH; 890 891 if (pol->family != family || 892 pol->type != type) 893 return ret; 894 895 match = xfrm_selector_match(sel, fl, family); 896 if (match) 897 ret = security_xfrm_policy_lookup(pol, fl->secid, dir); 898 899 return ret; 900 } 901 902 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, 903 u16 family, u8 dir) 904 { 905 int err; 906 struct xfrm_policy *pol, *ret; 907 xfrm_address_t *daddr, *saddr; 908 struct hlist_node *entry; 909 struct hlist_head *chain; 910 u32 priority = ~0U; 911 912 daddr = xfrm_flowi_daddr(fl, family); 913 saddr = xfrm_flowi_saddr(fl, family); 914 if (unlikely(!daddr || !saddr)) 915 return NULL; 916 917 read_lock_bh(&xfrm_policy_lock); 918 chain = policy_hash_direct(daddr, saddr, family, dir); 919 ret = NULL; 920 hlist_for_each_entry(pol, entry, chain, bydst) { 921 err = xfrm_policy_match(pol, fl, type, family, dir); 922 if (err) { 923 if (err == -ESRCH) 924 continue; 925 else { 926 ret = ERR_PTR(err); 927 goto fail; 928 } 929 } else { 930 ret = pol; 931 priority = ret->priority; 932 break; 933 } 934 } 935 chain = &xfrm_policy_inexact[dir]; 936 hlist_for_each_entry(pol, entry, chain, bydst) { 937 err = xfrm_policy_match(pol, fl, type, family, dir); 938 if (err) { 939 if (err == -ESRCH) 940 continue; 941 else { 942 ret = ERR_PTR(err); 943 goto fail; 944 } 945 } else if (pol->priority < priority) { 946 ret = pol; 947 break; 948 } 949 } 950 if (ret) 951 xfrm_pol_hold(ret); 952 fail: 953 read_unlock_bh(&xfrm_policy_lock); 954 955 return ret; 956 } 957 958 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, 959 void **objp, atomic_t **obj_refp) 960 { 961 struct xfrm_policy *pol; 962 int err = 0; 963 964 #ifdef CONFIG_XFRM_SUB_POLICY 965 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); 966 if (IS_ERR(pol)) { 967 err = PTR_ERR(pol); 968 pol = NULL; 969 } 970 if (pol || err) 971 goto end; 972 #endif 973 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); 974 if (IS_ERR(pol)) { 975 err = PTR_ERR(pol); 976 pol = NULL; 977 } 978 #ifdef CONFIG_XFRM_SUB_POLICY 979 end: 980 #endif 981 if ((*objp = (void *) pol) != NULL) 982 *obj_refp = &pol->refcnt; 983 return err; 984 } 985 986 static inline int policy_to_flow_dir(int dir) 987 { 988 if (XFRM_POLICY_IN == FLOW_DIR_IN && 989 XFRM_POLICY_OUT == FLOW_DIR_OUT && 990 XFRM_POLICY_FWD == FLOW_DIR_FWD) 991 return dir; 992 switch (dir) { 993 default: 994 case XFRM_POLICY_IN: 995 return FLOW_DIR_IN; 996 case XFRM_POLICY_OUT: 997 return FLOW_DIR_OUT; 998 case XFRM_POLICY_FWD: 999 return FLOW_DIR_FWD; 1000 } 1001 } 1002 1003 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 1004 { 1005 struct xfrm_policy *pol; 1006 1007 read_lock_bh(&xfrm_policy_lock); 1008 if ((pol = sk->sk_policy[dir]) != NULL) { 1009 int match = xfrm_selector_match(&pol->selector, fl, 1010 sk->sk_family); 1011 int err = 0; 1012 1013 if (match) { 1014 err = security_xfrm_policy_lookup(pol, fl->secid, 1015 policy_to_flow_dir(dir)); 1016 if (!err) 1017 xfrm_pol_hold(pol); 1018 else if (err == -ESRCH) 1019 pol = NULL; 1020 else 1021 pol = ERR_PTR(err); 1022 } else 1023 pol = NULL; 1024 } 1025 read_unlock_bh(&xfrm_policy_lock); 1026 return pol; 1027 } 1028 1029 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1030 { 1031 struct hlist_head *chain = policy_hash_bysel(&pol->selector, 1032 pol->family, dir); 1033 1034 hlist_add_head(&pol->bydst, chain); 1035 hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); 1036 xfrm_policy_count[dir]++; 1037 xfrm_pol_hold(pol); 1038 1039 if (xfrm_bydst_should_resize(dir, NULL)) 1040 schedule_work(&xfrm_hash_work); 1041 } 1042 1043 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1044 int dir) 1045 { 1046 if (hlist_unhashed(&pol->bydst)) 1047 return NULL; 1048 1049 hlist_del(&pol->bydst); 1050 hlist_del(&pol->byidx); 1051 xfrm_policy_count[dir]--; 1052 1053 return pol; 1054 } 1055 1056 int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1057 { 1058 write_lock_bh(&xfrm_policy_lock); 1059 pol = __xfrm_policy_unlink(pol, dir); 1060 write_unlock_bh(&xfrm_policy_lock); 1061 if (pol) { 1062 if (dir < XFRM_POLICY_MAX) 1063 atomic_inc(&flow_cache_genid); 1064 xfrm_policy_kill(pol); 1065 return 0; 1066 } 1067 return -ENOENT; 1068 } 1069 EXPORT_SYMBOL(xfrm_policy_delete); 1070 1071 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 1072 { 1073 struct xfrm_policy *old_pol; 1074 1075 #ifdef CONFIG_XFRM_SUB_POLICY 1076 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) 1077 return -EINVAL; 1078 #endif 1079 1080 write_lock_bh(&xfrm_policy_lock); 1081 old_pol = sk->sk_policy[dir]; 1082 sk->sk_policy[dir] = pol; 1083 if (pol) { 1084 pol->curlft.add_time = get_seconds(); 1085 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); 1086 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1087 } 1088 if (old_pol) 1089 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1090 write_unlock_bh(&xfrm_policy_lock); 1091 1092 if (old_pol) { 1093 xfrm_policy_kill(old_pol); 1094 } 1095 return 0; 1096 } 1097 1098 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) 1099 { 1100 struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); 1101 1102 if (newp) { 1103 newp->selector = old->selector; 1104 if (security_xfrm_policy_clone(old, newp)) { 1105 kfree(newp); 1106 return NULL; /* ENOMEM */ 1107 } 1108 newp->lft = old->lft; 1109 newp->curlft = old->curlft; 1110 newp->action = old->action; 1111 newp->flags = old->flags; 1112 newp->xfrm_nr = old->xfrm_nr; 1113 newp->index = old->index; 1114 newp->type = old->type; 1115 memcpy(newp->xfrm_vec, old->xfrm_vec, 1116 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1117 write_lock_bh(&xfrm_policy_lock); 1118 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 1119 write_unlock_bh(&xfrm_policy_lock); 1120 xfrm_pol_put(newp); 1121 } 1122 return newp; 1123 } 1124 1125 int __xfrm_sk_clone_policy(struct sock *sk) 1126 { 1127 struct xfrm_policy *p0 = sk->sk_policy[0], 1128 *p1 = sk->sk_policy[1]; 1129 1130 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 1131 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 1132 return -ENOMEM; 1133 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 1134 return -ENOMEM; 1135 return 0; 1136 } 1137 1138 static int 1139 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, 1140 unsigned short family) 1141 { 1142 int err; 1143 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1144 1145 if (unlikely(afinfo == NULL)) 1146 return -EINVAL; 1147 err = afinfo->get_saddr(local, remote); 1148 xfrm_policy_put_afinfo(afinfo); 1149 return err; 1150 } 1151 1152 /* Resolve list of templates for the flow, given policy. */ 1153 1154 static int 1155 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, 1156 struct xfrm_state **xfrm, 1157 unsigned short family) 1158 { 1159 int nx; 1160 int i, error; 1161 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1162 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1163 xfrm_address_t tmp; 1164 1165 for (nx=0, i = 0; i < policy->xfrm_nr; i++) { 1166 struct xfrm_state *x; 1167 xfrm_address_t *remote = daddr; 1168 xfrm_address_t *local = saddr; 1169 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1170 1171 if (tmpl->mode == XFRM_MODE_TUNNEL || 1172 tmpl->mode == XFRM_MODE_BEET) { 1173 remote = &tmpl->id.daddr; 1174 local = &tmpl->saddr; 1175 family = tmpl->encap_family; 1176 if (xfrm_addr_any(local, family)) { 1177 error = xfrm_get_saddr(&tmp, remote, family); 1178 if (error) 1179 goto fail; 1180 local = &tmp; 1181 } 1182 } 1183 1184 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1185 1186 if (x && x->km.state == XFRM_STATE_VALID) { 1187 xfrm[nx++] = x; 1188 daddr = remote; 1189 saddr = local; 1190 continue; 1191 } 1192 if (x) { 1193 error = (x->km.state == XFRM_STATE_ERROR ? 1194 -EINVAL : -EAGAIN); 1195 xfrm_state_put(x); 1196 } 1197 1198 if (!tmpl->optional) 1199 goto fail; 1200 } 1201 return nx; 1202 1203 fail: 1204 for (nx--; nx>=0; nx--) 1205 xfrm_state_put(xfrm[nx]); 1206 return error; 1207 } 1208 1209 static int 1210 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, 1211 struct xfrm_state **xfrm, 1212 unsigned short family) 1213 { 1214 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1215 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1216 int cnx = 0; 1217 int error; 1218 int ret; 1219 int i; 1220 1221 for (i = 0; i < npols; i++) { 1222 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { 1223 error = -ENOBUFS; 1224 goto fail; 1225 } 1226 1227 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); 1228 if (ret < 0) { 1229 error = ret; 1230 goto fail; 1231 } else 1232 cnx += ret; 1233 } 1234 1235 /* found states are sorted for outbound processing */ 1236 if (npols > 1) 1237 xfrm_state_sort(xfrm, tpp, cnx, family); 1238 1239 return cnx; 1240 1241 fail: 1242 for (cnx--; cnx>=0; cnx--) 1243 xfrm_state_put(tpp[cnx]); 1244 return error; 1245 1246 } 1247 1248 /* Check that the bundle accepts the flow and its components are 1249 * still valid. 1250 */ 1251 1252 static struct dst_entry * 1253 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) 1254 { 1255 struct dst_entry *x; 1256 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1257 if (unlikely(afinfo == NULL)) 1258 return ERR_PTR(-EINVAL); 1259 x = afinfo->find_bundle(fl, policy); 1260 xfrm_policy_put_afinfo(afinfo); 1261 return x; 1262 } 1263 1264 static inline int xfrm_get_tos(struct flowi *fl, int family) 1265 { 1266 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1267 int tos; 1268 1269 if (!afinfo) 1270 return -EINVAL; 1271 1272 tos = afinfo->get_tos(fl); 1273 1274 xfrm_policy_put_afinfo(afinfo); 1275 1276 return tos; 1277 } 1278 1279 static inline struct xfrm_dst *xfrm_alloc_dst(int family) 1280 { 1281 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1282 struct xfrm_dst *xdst; 1283 1284 if (!afinfo) 1285 return ERR_PTR(-EINVAL); 1286 1287 xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS); 1288 1289 xfrm_policy_put_afinfo(afinfo); 1290 1291 return xdst; 1292 } 1293 1294 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, 1295 int nfheader_len) 1296 { 1297 struct xfrm_policy_afinfo *afinfo = 1298 xfrm_policy_get_afinfo(dst->ops->family); 1299 int err; 1300 1301 if (!afinfo) 1302 return -EINVAL; 1303 1304 err = afinfo->init_path(path, dst, nfheader_len); 1305 1306 xfrm_policy_put_afinfo(afinfo); 1307 1308 return err; 1309 } 1310 1311 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) 1312 { 1313 struct xfrm_policy_afinfo *afinfo = 1314 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1315 int err; 1316 1317 if (!afinfo) 1318 return -EINVAL; 1319 1320 err = afinfo->fill_dst(xdst, dev); 1321 1322 xfrm_policy_put_afinfo(afinfo); 1323 1324 return err; 1325 } 1326 1327 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 1328 * all the metrics... Shortly, bundle a bundle. 1329 */ 1330 1331 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1332 struct xfrm_state **xfrm, int nx, 1333 struct flowi *fl, 1334 struct dst_entry *dst) 1335 { 1336 unsigned long now = jiffies; 1337 struct net_device *dev; 1338 struct dst_entry *dst_prev = NULL; 1339 struct dst_entry *dst0 = NULL; 1340 int i = 0; 1341 int err; 1342 int header_len = 0; 1343 int nfheader_len = 0; 1344 int trailer_len = 0; 1345 int tos; 1346 int family = policy->selector.family; 1347 1348 tos = xfrm_get_tos(fl, family); 1349 err = tos; 1350 if (tos < 0) 1351 goto put_states; 1352 1353 dst_hold(dst); 1354 1355 for (; i < nx; i++) { 1356 struct xfrm_dst *xdst = xfrm_alloc_dst(family); 1357 struct dst_entry *dst1 = &xdst->u.dst; 1358 1359 err = PTR_ERR(xdst); 1360 if (IS_ERR(xdst)) { 1361 dst_release(dst); 1362 goto put_states; 1363 } 1364 1365 if (!dst_prev) 1366 dst0 = dst1; 1367 else { 1368 dst_prev->child = dst_clone(dst1); 1369 dst1->flags |= DST_NOHASH; 1370 } 1371 1372 xdst->route = dst; 1373 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics)); 1374 1375 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 1376 family = xfrm[i]->props.family; 1377 dst = xfrm_dst_lookup(xfrm[i], tos, family); 1378 err = PTR_ERR(dst); 1379 if (IS_ERR(dst)) 1380 goto put_states; 1381 } else 1382 dst_hold(dst); 1383 1384 dst1->xfrm = xfrm[i]; 1385 xdst->genid = xfrm[i]->genid; 1386 1387 dst1->obsolete = -1; 1388 dst1->flags |= DST_HOST; 1389 dst1->lastuse = now; 1390 1391 dst1->input = dst_discard; 1392 dst1->output = xfrm[i]->outer_mode->afinfo->output; 1393 1394 dst1->next = dst_prev; 1395 dst_prev = dst1; 1396 1397 header_len += xfrm[i]->props.header_len; 1398 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) 1399 nfheader_len += xfrm[i]->props.header_len; 1400 trailer_len += xfrm[i]->props.trailer_len; 1401 } 1402 1403 dst_prev->child = dst; 1404 dst0->path = dst; 1405 1406 err = -ENODEV; 1407 dev = dst->dev; 1408 if (!dev) 1409 goto free_dst; 1410 1411 /* Copy neighbout for reachability confirmation */ 1412 dst0->neighbour = neigh_clone(dst->neighbour); 1413 1414 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); 1415 xfrm_init_pmtu(dst_prev); 1416 1417 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { 1418 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; 1419 1420 err = xfrm_fill_dst(xdst, dev); 1421 if (err) 1422 goto free_dst; 1423 1424 dst_prev->header_len = header_len; 1425 dst_prev->trailer_len = trailer_len; 1426 header_len -= xdst->u.dst.xfrm->props.header_len; 1427 trailer_len -= xdst->u.dst.xfrm->props.trailer_len; 1428 } 1429 1430 out: 1431 return dst0; 1432 1433 put_states: 1434 for (; i < nx; i++) 1435 xfrm_state_put(xfrm[i]); 1436 free_dst: 1437 if (dst0) 1438 dst_free(dst0); 1439 dst0 = ERR_PTR(err); 1440 goto out; 1441 } 1442 1443 static int inline 1444 xfrm_dst_alloc_copy(void **target, void *src, int size) 1445 { 1446 if (!*target) { 1447 *target = kmalloc(size, GFP_ATOMIC); 1448 if (!*target) 1449 return -ENOMEM; 1450 } 1451 memcpy(*target, src, size); 1452 return 0; 1453 } 1454 1455 static int inline 1456 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) 1457 { 1458 #ifdef CONFIG_XFRM_SUB_POLICY 1459 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1460 return xfrm_dst_alloc_copy((void **)&(xdst->partner), 1461 sel, sizeof(*sel)); 1462 #else 1463 return 0; 1464 #endif 1465 } 1466 1467 static int inline 1468 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) 1469 { 1470 #ifdef CONFIG_XFRM_SUB_POLICY 1471 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1472 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); 1473 #else 1474 return 0; 1475 #endif 1476 } 1477 1478 static int stale_bundle(struct dst_entry *dst); 1479 1480 /* Main function: finds/creates a bundle for given flow. 1481 * 1482 * At the moment we eat a raw IP route. Mostly to speed up lookups 1483 * on interfaces with disabled IPsec. 1484 */ 1485 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, 1486 struct sock *sk, int flags) 1487 { 1488 struct xfrm_policy *policy; 1489 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1490 int npols; 1491 int pol_dead; 1492 int xfrm_nr; 1493 int pi; 1494 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1495 struct dst_entry *dst, *dst_orig = *dst_p; 1496 int nx = 0; 1497 int err; 1498 u32 genid; 1499 u16 family; 1500 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 1501 1502 restart: 1503 genid = atomic_read(&flow_cache_genid); 1504 policy = NULL; 1505 for (pi = 0; pi < ARRAY_SIZE(pols); pi++) 1506 pols[pi] = NULL; 1507 npols = 0; 1508 pol_dead = 0; 1509 xfrm_nr = 0; 1510 1511 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 1512 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 1513 err = PTR_ERR(policy); 1514 if (IS_ERR(policy)) { 1515 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); 1516 goto dropdst; 1517 } 1518 } 1519 1520 if (!policy) { 1521 /* To accelerate a bit... */ 1522 if ((dst_orig->flags & DST_NOXFRM) || 1523 !xfrm_policy_count[XFRM_POLICY_OUT]) 1524 goto nopol; 1525 1526 policy = flow_cache_lookup(fl, dst_orig->ops->family, 1527 dir, xfrm_policy_lookup); 1528 err = PTR_ERR(policy); 1529 if (IS_ERR(policy)) { 1530 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); 1531 goto dropdst; 1532 } 1533 } 1534 1535 if (!policy) 1536 goto nopol; 1537 1538 family = dst_orig->ops->family; 1539 pols[0] = policy; 1540 npols ++; 1541 xfrm_nr += pols[0]->xfrm_nr; 1542 1543 err = -ENOENT; 1544 if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) 1545 goto error; 1546 1547 policy->curlft.use_time = get_seconds(); 1548 1549 switch (policy->action) { 1550 default: 1551 case XFRM_POLICY_BLOCK: 1552 /* Prohibit the flow */ 1553 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); 1554 err = -EPERM; 1555 goto error; 1556 1557 case XFRM_POLICY_ALLOW: 1558 #ifndef CONFIG_XFRM_SUB_POLICY 1559 if (policy->xfrm_nr == 0) { 1560 /* Flow passes not transformed. */ 1561 xfrm_pol_put(policy); 1562 return 0; 1563 } 1564 #endif 1565 1566 /* Try to find matching bundle. 1567 * 1568 * LATER: help from flow cache. It is optional, this 1569 * is required only for output policy. 1570 */ 1571 dst = xfrm_find_bundle(fl, policy, family); 1572 if (IS_ERR(dst)) { 1573 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1574 err = PTR_ERR(dst); 1575 goto error; 1576 } 1577 1578 if (dst) 1579 break; 1580 1581 #ifdef CONFIG_XFRM_SUB_POLICY 1582 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1583 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, 1584 fl, family, 1585 XFRM_POLICY_OUT); 1586 if (pols[1]) { 1587 if (IS_ERR(pols[1])) { 1588 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); 1589 err = PTR_ERR(pols[1]); 1590 goto error; 1591 } 1592 if (pols[1]->action == XFRM_POLICY_BLOCK) { 1593 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); 1594 err = -EPERM; 1595 goto error; 1596 } 1597 npols ++; 1598 xfrm_nr += pols[1]->xfrm_nr; 1599 } 1600 } 1601 1602 /* 1603 * Because neither flowi nor bundle information knows about 1604 * transformation template size. On more than one policy usage 1605 * we can realize whether all of them is bypass or not after 1606 * they are searched. See above not-transformed bypass 1607 * is surrounded by non-sub policy configuration, too. 1608 */ 1609 if (xfrm_nr == 0) { 1610 /* Flow passes not transformed. */ 1611 xfrm_pols_put(pols, npols); 1612 return 0; 1613 } 1614 1615 #endif 1616 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); 1617 1618 if (unlikely(nx<0)) { 1619 err = nx; 1620 if (err == -EAGAIN && sysctl_xfrm_larval_drop) { 1621 /* EREMOTE tells the caller to generate 1622 * a one-shot blackhole route. 1623 */ 1624 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); 1625 xfrm_pol_put(policy); 1626 return -EREMOTE; 1627 } 1628 if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { 1629 DECLARE_WAITQUEUE(wait, current); 1630 1631 add_wait_queue(&km_waitq, &wait); 1632 set_current_state(TASK_INTERRUPTIBLE); 1633 schedule(); 1634 set_current_state(TASK_RUNNING); 1635 remove_wait_queue(&km_waitq, &wait); 1636 1637 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); 1638 1639 if (nx == -EAGAIN && signal_pending(current)) { 1640 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); 1641 err = -ERESTART; 1642 goto error; 1643 } 1644 if (nx == -EAGAIN || 1645 genid != atomic_read(&flow_cache_genid)) { 1646 xfrm_pols_put(pols, npols); 1647 goto restart; 1648 } 1649 err = nx; 1650 } 1651 if (err < 0) { 1652 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); 1653 goto error; 1654 } 1655 } 1656 if (nx == 0) { 1657 /* Flow passes not transformed. */ 1658 xfrm_pols_put(pols, npols); 1659 return 0; 1660 } 1661 1662 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); 1663 err = PTR_ERR(dst); 1664 if (IS_ERR(dst)) { 1665 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); 1666 goto error; 1667 } 1668 1669 for (pi = 0; pi < npols; pi++) { 1670 read_lock_bh(&pols[pi]->lock); 1671 pol_dead |= pols[pi]->dead; 1672 read_unlock_bh(&pols[pi]->lock); 1673 } 1674 1675 write_lock_bh(&policy->lock); 1676 if (unlikely(pol_dead || stale_bundle(dst))) { 1677 /* Wow! While we worked on resolving, this 1678 * policy has gone. Retry. It is not paranoia, 1679 * we just cannot enlist new bundle to dead object. 1680 * We can't enlist stable bundles either. 1681 */ 1682 write_unlock_bh(&policy->lock); 1683 if (dst) 1684 dst_free(dst); 1685 1686 if (pol_dead) 1687 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); 1688 else 1689 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1690 err = -EHOSTUNREACH; 1691 goto error; 1692 } 1693 1694 if (npols > 1) 1695 err = xfrm_dst_update_parent(dst, &pols[1]->selector); 1696 else 1697 err = xfrm_dst_update_origin(dst, fl); 1698 if (unlikely(err)) { 1699 write_unlock_bh(&policy->lock); 1700 if (dst) 1701 dst_free(dst); 1702 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); 1703 goto error; 1704 } 1705 1706 dst->next = policy->bundles; 1707 policy->bundles = dst; 1708 dst_hold(dst); 1709 write_unlock_bh(&policy->lock); 1710 } 1711 *dst_p = dst; 1712 dst_release(dst_orig); 1713 xfrm_pols_put(pols, npols); 1714 return 0; 1715 1716 error: 1717 xfrm_pols_put(pols, npols); 1718 dropdst: 1719 dst_release(dst_orig); 1720 *dst_p = NULL; 1721 return err; 1722 1723 nopol: 1724 err = -ENOENT; 1725 if (flags & XFRM_LOOKUP_ICMP) 1726 goto dropdst; 1727 return 0; 1728 } 1729 EXPORT_SYMBOL(__xfrm_lookup); 1730 1731 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, 1732 struct sock *sk, int flags) 1733 { 1734 int err = __xfrm_lookup(dst_p, fl, sk, flags); 1735 1736 if (err == -EREMOTE) { 1737 dst_release(*dst_p); 1738 *dst_p = NULL; 1739 err = -EAGAIN; 1740 } 1741 1742 return err; 1743 } 1744 EXPORT_SYMBOL(xfrm_lookup); 1745 1746 static inline int 1747 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) 1748 { 1749 struct xfrm_state *x; 1750 1751 if (!skb->sp || idx < 0 || idx >= skb->sp->len) 1752 return 0; 1753 x = skb->sp->xvec[idx]; 1754 if (!x->type->reject) 1755 return 0; 1756 return x->type->reject(x, skb, fl); 1757 } 1758 1759 /* When skb is transformed back to its "native" form, we have to 1760 * check policy restrictions. At the moment we make this in maximally 1761 * stupid way. Shame on me. :-) Of course, connected sockets must 1762 * have policy cached at them. 1763 */ 1764 1765 static inline int 1766 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 1767 unsigned short family) 1768 { 1769 if (xfrm_state_kern(x)) 1770 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); 1771 return x->id.proto == tmpl->id.proto && 1772 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 1773 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 1774 x->props.mode == tmpl->mode && 1775 ((tmpl->aalgos & (1<<x->props.aalgo)) || 1776 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && 1777 !(x->props.mode != XFRM_MODE_TRANSPORT && 1778 xfrm_state_addr_cmp(tmpl, x, family)); 1779 } 1780 1781 /* 1782 * 0 or more than 0 is returned when validation is succeeded (either bypass 1783 * because of optional transport mode, or next index of the mathced secpath 1784 * state with the template. 1785 * -1 is returned when no matching template is found. 1786 * Otherwise "-2 - errored_index" is returned. 1787 */ 1788 static inline int 1789 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, 1790 unsigned short family) 1791 { 1792 int idx = start; 1793 1794 if (tmpl->optional) { 1795 if (tmpl->mode == XFRM_MODE_TRANSPORT) 1796 return start; 1797 } else 1798 start = -1; 1799 for (; idx < sp->len; idx++) { 1800 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 1801 return ++idx; 1802 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { 1803 if (start == -1) 1804 start = -2-idx; 1805 break; 1806 } 1807 } 1808 return start; 1809 } 1810 1811 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, 1812 unsigned int family, int reverse) 1813 { 1814 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1815 int err; 1816 1817 if (unlikely(afinfo == NULL)) 1818 return -EAFNOSUPPORT; 1819 1820 afinfo->decode_session(skb, fl, reverse); 1821 err = security_xfrm_decode_session(skb, &fl->secid); 1822 xfrm_policy_put_afinfo(afinfo); 1823 return err; 1824 } 1825 EXPORT_SYMBOL(__xfrm_decode_session); 1826 1827 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) 1828 { 1829 for (; k < sp->len; k++) { 1830 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 1831 *idxp = k; 1832 return 1; 1833 } 1834 } 1835 1836 return 0; 1837 } 1838 1839 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 1840 unsigned short family) 1841 { 1842 struct xfrm_policy *pol; 1843 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1844 int npols = 0; 1845 int xfrm_nr; 1846 int pi; 1847 int reverse; 1848 struct flowi fl; 1849 u8 fl_dir; 1850 int xerr_idx = -1; 1851 1852 reverse = dir & ~XFRM_POLICY_MASK; 1853 dir &= XFRM_POLICY_MASK; 1854 fl_dir = policy_to_flow_dir(dir); 1855 1856 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { 1857 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); 1858 return 0; 1859 } 1860 1861 nf_nat_decode_session(skb, &fl, family); 1862 1863 /* First, check used SA against their selectors. */ 1864 if (skb->sp) { 1865 int i; 1866 1867 for (i=skb->sp->len-1; i>=0; i--) { 1868 struct xfrm_state *x = skb->sp->xvec[i]; 1869 if (!xfrm_selector_match(&x->sel, &fl, family)) { 1870 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); 1871 return 0; 1872 } 1873 } 1874 } 1875 1876 pol = NULL; 1877 if (sk && sk->sk_policy[dir]) { 1878 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 1879 if (IS_ERR(pol)) { 1880 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); 1881 return 0; 1882 } 1883 } 1884 1885 if (!pol) 1886 pol = flow_cache_lookup(&fl, family, fl_dir, 1887 xfrm_policy_lookup); 1888 1889 if (IS_ERR(pol)) { 1890 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); 1891 return 0; 1892 } 1893 1894 if (!pol) { 1895 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 1896 xfrm_secpath_reject(xerr_idx, skb, &fl); 1897 XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); 1898 return 0; 1899 } 1900 return 1; 1901 } 1902 1903 pol->curlft.use_time = get_seconds(); 1904 1905 pols[0] = pol; 1906 npols ++; 1907 #ifdef CONFIG_XFRM_SUB_POLICY 1908 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { 1909 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, 1910 &fl, family, 1911 XFRM_POLICY_IN); 1912 if (pols[1]) { 1913 if (IS_ERR(pols[1])) { 1914 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); 1915 return 0; 1916 } 1917 pols[1]->curlft.use_time = get_seconds(); 1918 npols ++; 1919 } 1920 } 1921 #endif 1922 1923 if (pol->action == XFRM_POLICY_ALLOW) { 1924 struct sec_path *sp; 1925 static struct sec_path dummy; 1926 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; 1927 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; 1928 struct xfrm_tmpl **tpp = tp; 1929 int ti = 0; 1930 int i, k; 1931 1932 if ((sp = skb->sp) == NULL) 1933 sp = &dummy; 1934 1935 for (pi = 0; pi < npols; pi++) { 1936 if (pols[pi] != pol && 1937 pols[pi]->action != XFRM_POLICY_ALLOW) { 1938 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); 1939 goto reject; 1940 } 1941 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { 1942 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); 1943 goto reject_error; 1944 } 1945 for (i = 0; i < pols[pi]->xfrm_nr; i++) 1946 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 1947 } 1948 xfrm_nr = ti; 1949 if (npols > 1) { 1950 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); 1951 tpp = stp; 1952 } 1953 1954 /* For each tunnel xfrm, find the first matching tmpl. 1955 * For each tmpl before that, find corresponding xfrm. 1956 * Order is _important_. Later we will implement 1957 * some barriers, but at the moment barriers 1958 * are implied between each two transformations. 1959 */ 1960 for (i = xfrm_nr-1, k = 0; i >= 0; i--) { 1961 k = xfrm_policy_ok(tpp[i], sp, k, family); 1962 if (k < 0) { 1963 if (k < -1) 1964 /* "-2 - errored_index" returned */ 1965 xerr_idx = -(2+k); 1966 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); 1967 goto reject; 1968 } 1969 } 1970 1971 if (secpath_has_nontransport(sp, k, &xerr_idx)) { 1972 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); 1973 goto reject; 1974 } 1975 1976 xfrm_pols_put(pols, npols); 1977 return 1; 1978 } 1979 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); 1980 1981 reject: 1982 xfrm_secpath_reject(xerr_idx, skb, &fl); 1983 reject_error: 1984 xfrm_pols_put(pols, npols); 1985 return 0; 1986 } 1987 EXPORT_SYMBOL(__xfrm_policy_check); 1988 1989 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 1990 { 1991 struct flowi fl; 1992 1993 if (xfrm_decode_session(skb, &fl, family) < 0) { 1994 /* XXX: we should have something like FWDHDRERROR here. */ 1995 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); 1996 return 0; 1997 } 1998 1999 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 2000 } 2001 EXPORT_SYMBOL(__xfrm_route_forward); 2002 2003 /* Optimize later using cookies and generation ids. */ 2004 2005 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 2006 { 2007 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete 2008 * to "-1" to force all XFRM destinations to get validated by 2009 * dst_ops->check on every use. We do this because when a 2010 * normal route referenced by an XFRM dst is obsoleted we do 2011 * not go looking around for all parent referencing XFRM dsts 2012 * so that we can invalidate them. It is just too much work. 2013 * Instead we make the checks here on every use. For example: 2014 * 2015 * XFRM dst A --> IPv4 dst X 2016 * 2017 * X is the "xdst->route" of A (X is also the "dst->path" of A 2018 * in this example). If X is marked obsolete, "A" will not 2019 * notice. That's what we are validating here via the 2020 * stale_bundle() check. 2021 * 2022 * When a policy's bundle is pruned, we dst_free() the XFRM 2023 * dst which causes it's ->obsolete field to be set to a 2024 * positive non-zero integer. If an XFRM dst has been pruned 2025 * like this, we want to force a new route lookup. 2026 */ 2027 if (dst->obsolete < 0 && !stale_bundle(dst)) 2028 return dst; 2029 2030 return NULL; 2031 } 2032 2033 static int stale_bundle(struct dst_entry *dst) 2034 { 2035 return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); 2036 } 2037 2038 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2039 { 2040 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2041 dst->dev = dev->nd_net->loopback_dev; 2042 dev_hold(dst->dev); 2043 dev_put(dev); 2044 } 2045 } 2046 EXPORT_SYMBOL(xfrm_dst_ifdown); 2047 2048 static void xfrm_link_failure(struct sk_buff *skb) 2049 { 2050 /* Impossible. Such dst must be popped before reaches point of failure. */ 2051 return; 2052 } 2053 2054 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2055 { 2056 if (dst) { 2057 if (dst->obsolete) { 2058 dst_release(dst); 2059 dst = NULL; 2060 } 2061 } 2062 return dst; 2063 } 2064 2065 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) 2066 { 2067 struct dst_entry *dst, **dstp; 2068 2069 write_lock(&pol->lock); 2070 dstp = &pol->bundles; 2071 while ((dst=*dstp) != NULL) { 2072 if (func(dst)) { 2073 *dstp = dst->next; 2074 dst->next = *gc_list_p; 2075 *gc_list_p = dst; 2076 } else { 2077 dstp = &dst->next; 2078 } 2079 } 2080 write_unlock(&pol->lock); 2081 } 2082 2083 static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) 2084 { 2085 struct dst_entry *gc_list = NULL; 2086 int dir; 2087 2088 read_lock_bh(&xfrm_policy_lock); 2089 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2090 struct xfrm_policy *pol; 2091 struct hlist_node *entry; 2092 struct hlist_head *table; 2093 int i; 2094 2095 hlist_for_each_entry(pol, entry, 2096 &xfrm_policy_inexact[dir], bydst) 2097 prune_one_bundle(pol, func, &gc_list); 2098 2099 table = xfrm_policy_bydst[dir].table; 2100 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { 2101 hlist_for_each_entry(pol, entry, table + i, bydst) 2102 prune_one_bundle(pol, func, &gc_list); 2103 } 2104 } 2105 read_unlock_bh(&xfrm_policy_lock); 2106 2107 while (gc_list) { 2108 struct dst_entry *dst = gc_list; 2109 gc_list = dst->next; 2110 dst_free(dst); 2111 } 2112 } 2113 2114 static int unused_bundle(struct dst_entry *dst) 2115 { 2116 return !atomic_read(&dst->__refcnt); 2117 } 2118 2119 static void __xfrm_garbage_collect(void) 2120 { 2121 xfrm_prune_bundles(unused_bundle); 2122 } 2123 2124 static int xfrm_flush_bundles(void) 2125 { 2126 xfrm_prune_bundles(stale_bundle); 2127 return 0; 2128 } 2129 2130 static void xfrm_init_pmtu(struct dst_entry *dst) 2131 { 2132 do { 2133 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2134 u32 pmtu, route_mtu_cached; 2135 2136 pmtu = dst_mtu(dst->child); 2137 xdst->child_mtu_cached = pmtu; 2138 2139 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 2140 2141 route_mtu_cached = dst_mtu(xdst->route); 2142 xdst->route_mtu_cached = route_mtu_cached; 2143 2144 if (pmtu > route_mtu_cached) 2145 pmtu = route_mtu_cached; 2146 2147 dst->metrics[RTAX_MTU-1] = pmtu; 2148 } while ((dst = dst->next)); 2149 } 2150 2151 /* Check that the bundle accepts the flow and its components are 2152 * still valid. 2153 */ 2154 2155 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, 2156 struct flowi *fl, int family, int strict) 2157 { 2158 struct dst_entry *dst = &first->u.dst; 2159 struct xfrm_dst *last; 2160 u32 mtu; 2161 2162 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2163 (dst->dev && !netif_running(dst->dev))) 2164 return 0; 2165 #ifdef CONFIG_XFRM_SUB_POLICY 2166 if (fl) { 2167 if (first->origin && !flow_cache_uli_match(first->origin, fl)) 2168 return 0; 2169 if (first->partner && 2170 !xfrm_selector_match(first->partner, fl, family)) 2171 return 0; 2172 } 2173 #endif 2174 2175 last = NULL; 2176 2177 do { 2178 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2179 2180 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) 2181 return 0; 2182 if (fl && pol && 2183 !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) 2184 return 0; 2185 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2186 return 0; 2187 if (xdst->genid != dst->xfrm->genid) 2188 return 0; 2189 2190 if (strict && fl && 2191 !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && 2192 !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) 2193 return 0; 2194 2195 mtu = dst_mtu(dst->child); 2196 if (xdst->child_mtu_cached != mtu) { 2197 last = xdst; 2198 xdst->child_mtu_cached = mtu; 2199 } 2200 2201 if (!dst_check(xdst->route, xdst->route_cookie)) 2202 return 0; 2203 mtu = dst_mtu(xdst->route); 2204 if (xdst->route_mtu_cached != mtu) { 2205 last = xdst; 2206 xdst->route_mtu_cached = mtu; 2207 } 2208 2209 dst = dst->child; 2210 } while (dst->xfrm); 2211 2212 if (likely(!last)) 2213 return 1; 2214 2215 mtu = last->child_mtu_cached; 2216 for (;;) { 2217 dst = &last->u.dst; 2218 2219 mtu = xfrm_state_mtu(dst->xfrm, mtu); 2220 if (mtu > last->route_mtu_cached) 2221 mtu = last->route_mtu_cached; 2222 dst->metrics[RTAX_MTU-1] = mtu; 2223 2224 if (last == first) 2225 break; 2226 2227 last = (struct xfrm_dst *)last->u.dst.next; 2228 last->child_mtu_cached = mtu; 2229 } 2230 2231 return 1; 2232 } 2233 2234 EXPORT_SYMBOL(xfrm_bundle_ok); 2235 2236 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2237 { 2238 int err = 0; 2239 if (unlikely(afinfo == NULL)) 2240 return -EINVAL; 2241 if (unlikely(afinfo->family >= NPROTO)) 2242 return -EAFNOSUPPORT; 2243 write_lock_bh(&xfrm_policy_afinfo_lock); 2244 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 2245 err = -ENOBUFS; 2246 else { 2247 struct dst_ops *dst_ops = afinfo->dst_ops; 2248 if (likely(dst_ops->kmem_cachep == NULL)) 2249 dst_ops->kmem_cachep = xfrm_dst_cache; 2250 if (likely(dst_ops->check == NULL)) 2251 dst_ops->check = xfrm_dst_check; 2252 if (likely(dst_ops->negative_advice == NULL)) 2253 dst_ops->negative_advice = xfrm_negative_advice; 2254 if (likely(dst_ops->link_failure == NULL)) 2255 dst_ops->link_failure = xfrm_link_failure; 2256 if (likely(afinfo->garbage_collect == NULL)) 2257 afinfo->garbage_collect = __xfrm_garbage_collect; 2258 xfrm_policy_afinfo[afinfo->family] = afinfo; 2259 } 2260 write_unlock_bh(&xfrm_policy_afinfo_lock); 2261 return err; 2262 } 2263 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 2264 2265 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 2266 { 2267 int err = 0; 2268 if (unlikely(afinfo == NULL)) 2269 return -EINVAL; 2270 if (unlikely(afinfo->family >= NPROTO)) 2271 return -EAFNOSUPPORT; 2272 write_lock_bh(&xfrm_policy_afinfo_lock); 2273 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 2274 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 2275 err = -EINVAL; 2276 else { 2277 struct dst_ops *dst_ops = afinfo->dst_ops; 2278 xfrm_policy_afinfo[afinfo->family] = NULL; 2279 dst_ops->kmem_cachep = NULL; 2280 dst_ops->check = NULL; 2281 dst_ops->negative_advice = NULL; 2282 dst_ops->link_failure = NULL; 2283 afinfo->garbage_collect = NULL; 2284 } 2285 } 2286 write_unlock_bh(&xfrm_policy_afinfo_lock); 2287 return err; 2288 } 2289 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 2290 2291 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 2292 { 2293 struct xfrm_policy_afinfo *afinfo; 2294 if (unlikely(family >= NPROTO)) 2295 return NULL; 2296 read_lock(&xfrm_policy_afinfo_lock); 2297 afinfo = xfrm_policy_afinfo[family]; 2298 if (unlikely(!afinfo)) 2299 read_unlock(&xfrm_policy_afinfo_lock); 2300 return afinfo; 2301 } 2302 2303 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 2304 { 2305 read_unlock(&xfrm_policy_afinfo_lock); 2306 } 2307 2308 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2309 { 2310 struct net_device *dev = ptr; 2311 2312 if (dev->nd_net != &init_net) 2313 return NOTIFY_DONE; 2314 2315 switch (event) { 2316 case NETDEV_DOWN: 2317 xfrm_flush_bundles(); 2318 } 2319 return NOTIFY_DONE; 2320 } 2321 2322 static struct notifier_block xfrm_dev_notifier = { 2323 xfrm_dev_event, 2324 NULL, 2325 0 2326 }; 2327 2328 #ifdef CONFIG_XFRM_STATISTICS 2329 static int __init xfrm_statistics_init(void) 2330 { 2331 if (snmp_mib_init((void **)xfrm_statistics, 2332 sizeof(struct linux_xfrm_mib)) < 0) 2333 return -ENOMEM; 2334 return 0; 2335 } 2336 #endif 2337 2338 static void __init xfrm_policy_init(void) 2339 { 2340 unsigned int hmask, sz; 2341 int dir; 2342 2343 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2344 sizeof(struct xfrm_dst), 2345 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2346 NULL); 2347 2348 hmask = 8 - 1; 2349 sz = (hmask+1) * sizeof(struct hlist_head); 2350 2351 xfrm_policy_byidx = xfrm_hash_alloc(sz); 2352 xfrm_idx_hmask = hmask; 2353 if (!xfrm_policy_byidx) 2354 panic("XFRM: failed to allocate byidx hash\n"); 2355 2356 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 2357 struct xfrm_policy_hash *htab; 2358 2359 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); 2360 2361 htab = &xfrm_policy_bydst[dir]; 2362 htab->table = xfrm_hash_alloc(sz); 2363 htab->hmask = hmask; 2364 if (!htab->table) 2365 panic("XFRM: failed to allocate bydst hash\n"); 2366 } 2367 2368 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); 2369 register_netdevice_notifier(&xfrm_dev_notifier); 2370 } 2371 2372 void __init xfrm_init(void) 2373 { 2374 #ifdef CONFIG_XFRM_STATISTICS 2375 xfrm_statistics_init(); 2376 #endif 2377 xfrm_state_init(); 2378 xfrm_policy_init(); 2379 xfrm_input_init(); 2380 #ifdef CONFIG_XFRM_STATISTICS 2381 xfrm_proc_init(); 2382 #endif 2383 } 2384 2385 #ifdef CONFIG_AUDITSYSCALL 2386 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2387 struct audit_buffer *audit_buf) 2388 { 2389 struct xfrm_sec_ctx *ctx = xp->security; 2390 struct xfrm_selector *sel = &xp->selector; 2391 2392 if (ctx) 2393 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2394 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); 2395 2396 switch(sel->family) { 2397 case AF_INET: 2398 audit_log_format(audit_buf, " src=" NIPQUAD_FMT, 2399 NIPQUAD(sel->saddr.a4)); 2400 if (sel->prefixlen_s != 32) 2401 audit_log_format(audit_buf, " src_prefixlen=%d", 2402 sel->prefixlen_s); 2403 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT, 2404 NIPQUAD(sel->daddr.a4)); 2405 if (sel->prefixlen_d != 32) 2406 audit_log_format(audit_buf, " dst_prefixlen=%d", 2407 sel->prefixlen_d); 2408 break; 2409 case AF_INET6: 2410 audit_log_format(audit_buf, " src=" NIP6_FMT, 2411 NIP6(*(struct in6_addr *)sel->saddr.a6)); 2412 if (sel->prefixlen_s != 128) 2413 audit_log_format(audit_buf, " src_prefixlen=%d", 2414 sel->prefixlen_s); 2415 audit_log_format(audit_buf, " dst=" NIP6_FMT, 2416 NIP6(*(struct in6_addr *)sel->daddr.a6)); 2417 if (sel->prefixlen_d != 128) 2418 audit_log_format(audit_buf, " dst_prefixlen=%d", 2419 sel->prefixlen_d); 2420 break; 2421 } 2422 } 2423 2424 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, 2425 u32 auid, u32 secid) 2426 { 2427 struct audit_buffer *audit_buf; 2428 2429 audit_buf = xfrm_audit_start("SPD-add"); 2430 if (audit_buf == NULL) 2431 return; 2432 xfrm_audit_helper_usrinfo(auid, secid, audit_buf); 2433 audit_log_format(audit_buf, " res=%u", result); 2434 xfrm_audit_common_policyinfo(xp, audit_buf); 2435 audit_log_end(audit_buf); 2436 } 2437 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 2438 2439 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, 2440 u32 auid, u32 secid) 2441 { 2442 struct audit_buffer *audit_buf; 2443 2444 audit_buf = xfrm_audit_start("SPD-delete"); 2445 if (audit_buf == NULL) 2446 return; 2447 xfrm_audit_helper_usrinfo(auid, secid, audit_buf); 2448 audit_log_format(audit_buf, " res=%u", result); 2449 xfrm_audit_common_policyinfo(xp, audit_buf); 2450 audit_log_end(audit_buf); 2451 } 2452 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); 2453 #endif 2454 2455 #ifdef CONFIG_XFRM_MIGRATE 2456 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, 2457 struct xfrm_selector *sel_tgt) 2458 { 2459 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 2460 if (sel_tgt->family == sel_cmp->family && 2461 xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, 2462 sel_cmp->family) == 0 && 2463 xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, 2464 sel_cmp->family) == 0 && 2465 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && 2466 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { 2467 return 1; 2468 } 2469 } else { 2470 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { 2471 return 1; 2472 } 2473 } 2474 return 0; 2475 } 2476 2477 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, 2478 u8 dir, u8 type) 2479 { 2480 struct xfrm_policy *pol, *ret = NULL; 2481 struct hlist_node *entry; 2482 struct hlist_head *chain; 2483 u32 priority = ~0U; 2484 2485 read_lock_bh(&xfrm_policy_lock); 2486 chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); 2487 hlist_for_each_entry(pol, entry, chain, bydst) { 2488 if (xfrm_migrate_selector_match(sel, &pol->selector) && 2489 pol->type == type) { 2490 ret = pol; 2491 priority = ret->priority; 2492 break; 2493 } 2494 } 2495 chain = &xfrm_policy_inexact[dir]; 2496 hlist_for_each_entry(pol, entry, chain, bydst) { 2497 if (xfrm_migrate_selector_match(sel, &pol->selector) && 2498 pol->type == type && 2499 pol->priority < priority) { 2500 ret = pol; 2501 break; 2502 } 2503 } 2504 2505 if (ret) 2506 xfrm_pol_hold(ret); 2507 2508 read_unlock_bh(&xfrm_policy_lock); 2509 2510 return ret; 2511 } 2512 2513 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) 2514 { 2515 int match = 0; 2516 2517 if (t->mode == m->mode && t->id.proto == m->proto && 2518 (m->reqid == 0 || t->reqid == m->reqid)) { 2519 switch (t->mode) { 2520 case XFRM_MODE_TUNNEL: 2521 case XFRM_MODE_BEET: 2522 if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, 2523 m->old_family) == 0 && 2524 xfrm_addr_cmp(&t->saddr, &m->old_saddr, 2525 m->old_family) == 0) { 2526 match = 1; 2527 } 2528 break; 2529 case XFRM_MODE_TRANSPORT: 2530 /* in case of transport mode, template does not store 2531 any IP addresses, hence we just compare mode and 2532 protocol */ 2533 match = 1; 2534 break; 2535 default: 2536 break; 2537 } 2538 } 2539 return match; 2540 } 2541 2542 /* update endpoint address(es) of template(s) */ 2543 static int xfrm_policy_migrate(struct xfrm_policy *pol, 2544 struct xfrm_migrate *m, int num_migrate) 2545 { 2546 struct xfrm_migrate *mp; 2547 struct dst_entry *dst; 2548 int i, j, n = 0; 2549 2550 write_lock_bh(&pol->lock); 2551 if (unlikely(pol->dead)) { 2552 /* target policy has been deleted */ 2553 write_unlock_bh(&pol->lock); 2554 return -ENOENT; 2555 } 2556 2557 for (i = 0; i < pol->xfrm_nr; i++) { 2558 for (j = 0, mp = m; j < num_migrate; j++, mp++) { 2559 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) 2560 continue; 2561 n++; 2562 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && 2563 pol->xfrm_vec[i].mode != XFRM_MODE_BEET) 2564 continue; 2565 /* update endpoints */ 2566 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, 2567 sizeof(pol->xfrm_vec[i].id.daddr)); 2568 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, 2569 sizeof(pol->xfrm_vec[i].saddr)); 2570 pol->xfrm_vec[i].encap_family = mp->new_family; 2571 /* flush bundles */ 2572 while ((dst = pol->bundles) != NULL) { 2573 pol->bundles = dst->next; 2574 dst_free(dst); 2575 } 2576 } 2577 } 2578 2579 write_unlock_bh(&pol->lock); 2580 2581 if (!n) 2582 return -ENODATA; 2583 2584 return 0; 2585 } 2586 2587 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) 2588 { 2589 int i, j; 2590 2591 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) 2592 return -EINVAL; 2593 2594 for (i = 0; i < num_migrate; i++) { 2595 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, 2596 m[i].old_family) == 0) && 2597 (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, 2598 m[i].old_family) == 0)) 2599 return -EINVAL; 2600 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || 2601 xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) 2602 return -EINVAL; 2603 2604 /* check if there is any duplicated entry */ 2605 for (j = i + 1; j < num_migrate; j++) { 2606 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, 2607 sizeof(m[i].old_daddr)) && 2608 !memcmp(&m[i].old_saddr, &m[j].old_saddr, 2609 sizeof(m[i].old_saddr)) && 2610 m[i].proto == m[j].proto && 2611 m[i].mode == m[j].mode && 2612 m[i].reqid == m[j].reqid && 2613 m[i].old_family == m[j].old_family) 2614 return -EINVAL; 2615 } 2616 } 2617 2618 return 0; 2619 } 2620 2621 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2622 struct xfrm_migrate *m, int num_migrate) 2623 { 2624 int i, err, nx_cur = 0, nx_new = 0; 2625 struct xfrm_policy *pol = NULL; 2626 struct xfrm_state *x, *xc; 2627 struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; 2628 struct xfrm_state *x_new[XFRM_MAX_DEPTH]; 2629 struct xfrm_migrate *mp; 2630 2631 if ((err = xfrm_migrate_check(m, num_migrate)) < 0) 2632 goto out; 2633 2634 /* Stage 1 - find policy */ 2635 if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { 2636 err = -ENOENT; 2637 goto out; 2638 } 2639 2640 /* Stage 2 - find and update state(s) */ 2641 for (i = 0, mp = m; i < num_migrate; i++, mp++) { 2642 if ((x = xfrm_migrate_state_find(mp))) { 2643 x_cur[nx_cur] = x; 2644 nx_cur++; 2645 if ((xc = xfrm_state_migrate(x, mp))) { 2646 x_new[nx_new] = xc; 2647 nx_new++; 2648 } else { 2649 err = -ENODATA; 2650 goto restore_state; 2651 } 2652 } 2653 } 2654 2655 /* Stage 3 - update policy */ 2656 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) 2657 goto restore_state; 2658 2659 /* Stage 4 - delete old state(s) */ 2660 if (nx_cur) { 2661 xfrm_states_put(x_cur, nx_cur); 2662 xfrm_states_delete(x_cur, nx_cur); 2663 } 2664 2665 /* Stage 5 - announce */ 2666 km_migrate(sel, dir, type, m, num_migrate); 2667 2668 xfrm_pol_put(pol); 2669 2670 return 0; 2671 out: 2672 return err; 2673 2674 restore_state: 2675 if (pol) 2676 xfrm_pol_put(pol); 2677 if (nx_cur) 2678 xfrm_states_put(x_cur, nx_cur); 2679 if (nx_new) 2680 xfrm_states_delete(x_new, nx_new); 2681 2682 return err; 2683 } 2684 EXPORT_SYMBOL(xfrm_migrate); 2685 #endif 2686