1 /* 2 * xfrm_policy.c 3 * 4 * Changes: 5 * Mitsuru KANDA @USAGI 6 * Kazunori MIYAZAWA @USAGI 7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * IPv6 support 9 * Kazunori MIYAZAWA @USAGI 10 * YOSHIFUJI Hideaki 11 * Split up af-specific portion 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 13 * 14 */ 15 16 #include <asm/bug.h> 17 #include <linux/config.h> 18 #include <linux/slab.h> 19 #include <linux/kmod.h> 20 #include <linux/list.h> 21 #include <linux/spinlock.h> 22 #include <linux/workqueue.h> 23 #include <linux/notifier.h> 24 #include <linux/netdevice.h> 25 #include <linux/module.h> 26 #include <net/xfrm.h> 27 #include <net/ip.h> 28 29 DECLARE_MUTEX(xfrm_cfg_sem); 30 EXPORT_SYMBOL(xfrm_cfg_sem); 31 32 static DEFINE_RWLOCK(xfrm_policy_lock); 33 34 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; 35 EXPORT_SYMBOL(xfrm_policy_list); 36 37 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 38 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; 39 40 static kmem_cache_t *xfrm_dst_cache; 41 42 static struct work_struct xfrm_policy_gc_work; 43 static struct list_head xfrm_policy_gc_list = 44 LIST_HEAD_INIT(xfrm_policy_gc_list); 45 static DEFINE_SPINLOCK(xfrm_policy_gc_lock); 46 47 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 48 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 49 50 int xfrm_register_type(struct xfrm_type *type, unsigned short family) 51 { 52 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 53 struct xfrm_type_map *typemap; 54 int err = 0; 55 56 if (unlikely(afinfo == NULL)) 57 return -EAFNOSUPPORT; 58 typemap = afinfo->type_map; 59 60 write_lock(&typemap->lock); 61 if (likely(typemap->map[type->proto] == NULL)) 62 typemap->map[type->proto] = type; 63 else 64 err = -EEXIST; 65 write_unlock(&typemap->lock); 66 xfrm_policy_put_afinfo(afinfo); 67 return err; 68 } 69 EXPORT_SYMBOL(xfrm_register_type); 70 71 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) 72 { 73 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 74 struct xfrm_type_map *typemap; 75 int err = 0; 76 77 if (unlikely(afinfo == NULL)) 78 return -EAFNOSUPPORT; 79 typemap = afinfo->type_map; 80 81 write_lock(&typemap->lock); 82 if (unlikely(typemap->map[type->proto] != type)) 83 err = -ENOENT; 84 else 85 typemap->map[type->proto] = NULL; 86 write_unlock(&typemap->lock); 87 xfrm_policy_put_afinfo(afinfo); 88 return err; 89 } 90 EXPORT_SYMBOL(xfrm_unregister_type); 91 92 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) 93 { 94 struct xfrm_policy_afinfo *afinfo; 95 struct xfrm_type_map *typemap; 96 struct xfrm_type *type; 97 int modload_attempted = 0; 98 99 retry: 100 afinfo = xfrm_policy_get_afinfo(family); 101 if (unlikely(afinfo == NULL)) 102 return NULL; 103 typemap = afinfo->type_map; 104 105 read_lock(&typemap->lock); 106 type = typemap->map[proto]; 107 if (unlikely(type && !try_module_get(type->owner))) 108 type = NULL; 109 read_unlock(&typemap->lock); 110 if (!type && !modload_attempted) { 111 xfrm_policy_put_afinfo(afinfo); 112 request_module("xfrm-type-%d-%d", 113 (int) family, (int) proto); 114 modload_attempted = 1; 115 goto retry; 116 } 117 118 xfrm_policy_put_afinfo(afinfo); 119 return type; 120 } 121 EXPORT_SYMBOL(xfrm_get_type); 122 123 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 124 unsigned short family) 125 { 126 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 127 int err = 0; 128 129 if (unlikely(afinfo == NULL)) 130 return -EAFNOSUPPORT; 131 132 if (likely(afinfo->dst_lookup != NULL)) 133 err = afinfo->dst_lookup(dst, fl); 134 else 135 err = -EINVAL; 136 xfrm_policy_put_afinfo(afinfo); 137 return err; 138 } 139 EXPORT_SYMBOL(xfrm_dst_lookup); 140 141 void xfrm_put_type(struct xfrm_type *type) 142 { 143 module_put(type->owner); 144 } 145 146 static inline unsigned long make_jiffies(long secs) 147 { 148 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) 149 return MAX_SCHEDULE_TIMEOUT-1; 150 else 151 return secs*HZ; 152 } 153 154 static void xfrm_policy_timer(unsigned long data) 155 { 156 struct xfrm_policy *xp = (struct xfrm_policy*)data; 157 unsigned long now = (unsigned long)xtime.tv_sec; 158 long next = LONG_MAX; 159 int warn = 0; 160 int dir; 161 162 read_lock(&xp->lock); 163 164 if (xp->dead) 165 goto out; 166 167 dir = xp->index & 7; 168 169 if (xp->lft.hard_add_expires_seconds) { 170 long tmo = xp->lft.hard_add_expires_seconds + 171 xp->curlft.add_time - now; 172 if (tmo <= 0) 173 goto expired; 174 if (tmo < next) 175 next = tmo; 176 } 177 if (xp->lft.hard_use_expires_seconds) { 178 long tmo = xp->lft.hard_use_expires_seconds + 179 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 180 if (tmo <= 0) 181 goto expired; 182 if (tmo < next) 183 next = tmo; 184 } 185 if (xp->lft.soft_add_expires_seconds) { 186 long tmo = xp->lft.soft_add_expires_seconds + 187 xp->curlft.add_time - now; 188 if (tmo <= 0) { 189 warn = 1; 190 tmo = XFRM_KM_TIMEOUT; 191 } 192 if (tmo < next) 193 next = tmo; 194 } 195 if (xp->lft.soft_use_expires_seconds) { 196 long tmo = xp->lft.soft_use_expires_seconds + 197 (xp->curlft.use_time ? : xp->curlft.add_time) - now; 198 if (tmo <= 0) { 199 warn = 1; 200 tmo = XFRM_KM_TIMEOUT; 201 } 202 if (tmo < next) 203 next = tmo; 204 } 205 206 if (warn) 207 km_policy_expired(xp, dir, 0); 208 if (next != LONG_MAX && 209 !mod_timer(&xp->timer, jiffies + make_jiffies(next))) 210 xfrm_pol_hold(xp); 211 212 out: 213 read_unlock(&xp->lock); 214 xfrm_pol_put(xp); 215 return; 216 217 expired: 218 read_unlock(&xp->lock); 219 km_policy_expired(xp, dir, 1); 220 xfrm_policy_delete(xp, dir); 221 xfrm_pol_put(xp); 222 } 223 224 225 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 226 * SPD calls. 227 */ 228 229 struct xfrm_policy *xfrm_policy_alloc(int gfp) 230 { 231 struct xfrm_policy *policy; 232 233 policy = kmalloc(sizeof(struct xfrm_policy), gfp); 234 235 if (policy) { 236 memset(policy, 0, sizeof(struct xfrm_policy)); 237 atomic_set(&policy->refcnt, 1); 238 rwlock_init(&policy->lock); 239 init_timer(&policy->timer); 240 policy->timer.data = (unsigned long)policy; 241 policy->timer.function = xfrm_policy_timer; 242 } 243 return policy; 244 } 245 EXPORT_SYMBOL(xfrm_policy_alloc); 246 247 /* Destroy xfrm_policy: descendant resources must be released to this moment. */ 248 249 void __xfrm_policy_destroy(struct xfrm_policy *policy) 250 { 251 if (!policy->dead) 252 BUG(); 253 254 if (policy->bundles) 255 BUG(); 256 257 if (del_timer(&policy->timer)) 258 BUG(); 259 260 kfree(policy); 261 } 262 EXPORT_SYMBOL(__xfrm_policy_destroy); 263 264 static void xfrm_policy_gc_kill(struct xfrm_policy *policy) 265 { 266 struct dst_entry *dst; 267 268 while ((dst = policy->bundles) != NULL) { 269 policy->bundles = dst->next; 270 dst_free(dst); 271 } 272 273 if (del_timer(&policy->timer)) 274 atomic_dec(&policy->refcnt); 275 276 if (atomic_read(&policy->refcnt) > 1) 277 flow_cache_flush(); 278 279 xfrm_pol_put(policy); 280 } 281 282 static void xfrm_policy_gc_task(void *data) 283 { 284 struct xfrm_policy *policy; 285 struct list_head *entry, *tmp; 286 struct list_head gc_list = LIST_HEAD_INIT(gc_list); 287 288 spin_lock_bh(&xfrm_policy_gc_lock); 289 list_splice_init(&xfrm_policy_gc_list, &gc_list); 290 spin_unlock_bh(&xfrm_policy_gc_lock); 291 292 list_for_each_safe(entry, tmp, &gc_list) { 293 policy = list_entry(entry, struct xfrm_policy, list); 294 xfrm_policy_gc_kill(policy); 295 } 296 } 297 298 /* Rule must be locked. Release descentant resources, announce 299 * entry dead. The rule must be unlinked from lists to the moment. 300 */ 301 302 static void xfrm_policy_kill(struct xfrm_policy *policy) 303 { 304 int dead; 305 306 write_lock_bh(&policy->lock); 307 dead = policy->dead; 308 policy->dead = 1; 309 write_unlock_bh(&policy->lock); 310 311 if (unlikely(dead)) { 312 WARN_ON(1); 313 return; 314 } 315 316 spin_lock(&xfrm_policy_gc_lock); 317 list_add(&policy->list, &xfrm_policy_gc_list); 318 spin_unlock(&xfrm_policy_gc_lock); 319 320 schedule_work(&xfrm_policy_gc_work); 321 } 322 323 /* Generate new index... KAME seems to generate them ordered by cost 324 * of an absolute inpredictability of ordering of rules. This will not pass. */ 325 static u32 xfrm_gen_index(int dir) 326 { 327 u32 idx; 328 struct xfrm_policy *p; 329 static u32 idx_generator; 330 331 for (;;) { 332 idx = (idx_generator | dir); 333 idx_generator += 8; 334 if (idx == 0) 335 idx = 8; 336 for (p = xfrm_policy_list[dir]; p; p = p->next) { 337 if (p->index == idx) 338 break; 339 } 340 if (!p) 341 return idx; 342 } 343 } 344 345 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 346 { 347 struct xfrm_policy *pol, **p; 348 struct xfrm_policy *delpol = NULL; 349 struct xfrm_policy **newpos = NULL; 350 351 write_lock_bh(&xfrm_policy_lock); 352 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { 353 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { 354 if (excl) { 355 write_unlock_bh(&xfrm_policy_lock); 356 return -EEXIST; 357 } 358 *p = pol->next; 359 delpol = pol; 360 if (policy->priority > pol->priority) 361 continue; 362 } else if (policy->priority >= pol->priority) { 363 p = &pol->next; 364 continue; 365 } 366 if (!newpos) 367 newpos = p; 368 if (delpol) 369 break; 370 p = &pol->next; 371 } 372 if (newpos) 373 p = newpos; 374 xfrm_pol_hold(policy); 375 policy->next = *p; 376 *p = policy; 377 atomic_inc(&flow_cache_genid); 378 policy->index = delpol ? delpol->index : xfrm_gen_index(dir); 379 policy->curlft.add_time = (unsigned long)xtime.tv_sec; 380 policy->curlft.use_time = 0; 381 if (!mod_timer(&policy->timer, jiffies + HZ)) 382 xfrm_pol_hold(policy); 383 write_unlock_bh(&xfrm_policy_lock); 384 385 if (delpol) { 386 xfrm_policy_kill(delpol); 387 } 388 return 0; 389 } 390 EXPORT_SYMBOL(xfrm_policy_insert); 391 392 struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, 393 int delete) 394 { 395 struct xfrm_policy *pol, **p; 396 397 write_lock_bh(&xfrm_policy_lock); 398 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { 399 if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { 400 xfrm_pol_hold(pol); 401 if (delete) 402 *p = pol->next; 403 break; 404 } 405 } 406 write_unlock_bh(&xfrm_policy_lock); 407 408 if (pol && delete) { 409 atomic_inc(&flow_cache_genid); 410 xfrm_policy_kill(pol); 411 } 412 return pol; 413 } 414 EXPORT_SYMBOL(xfrm_policy_bysel); 415 416 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) 417 { 418 struct xfrm_policy *pol, **p; 419 420 write_lock_bh(&xfrm_policy_lock); 421 for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) { 422 if (pol->index == id) { 423 xfrm_pol_hold(pol); 424 if (delete) 425 *p = pol->next; 426 break; 427 } 428 } 429 write_unlock_bh(&xfrm_policy_lock); 430 431 if (pol && delete) { 432 atomic_inc(&flow_cache_genid); 433 xfrm_policy_kill(pol); 434 } 435 return pol; 436 } 437 EXPORT_SYMBOL(xfrm_policy_byid); 438 439 void xfrm_policy_flush(void) 440 { 441 struct xfrm_policy *xp; 442 int dir; 443 444 write_lock_bh(&xfrm_policy_lock); 445 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 446 while ((xp = xfrm_policy_list[dir]) != NULL) { 447 xfrm_policy_list[dir] = xp->next; 448 write_unlock_bh(&xfrm_policy_lock); 449 450 xfrm_policy_kill(xp); 451 452 write_lock_bh(&xfrm_policy_lock); 453 } 454 } 455 atomic_inc(&flow_cache_genid); 456 write_unlock_bh(&xfrm_policy_lock); 457 } 458 EXPORT_SYMBOL(xfrm_policy_flush); 459 460 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), 461 void *data) 462 { 463 struct xfrm_policy *xp; 464 int dir; 465 int count = 0; 466 int error = 0; 467 468 read_lock_bh(&xfrm_policy_lock); 469 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 470 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) 471 count++; 472 } 473 474 if (count == 0) { 475 error = -ENOENT; 476 goto out; 477 } 478 479 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 480 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { 481 error = func(xp, dir%XFRM_POLICY_MAX, --count, data); 482 if (error) 483 goto out; 484 } 485 } 486 487 out: 488 read_unlock_bh(&xfrm_policy_lock); 489 return error; 490 } 491 EXPORT_SYMBOL(xfrm_policy_walk); 492 493 /* Find policy to apply to this flow. */ 494 495 static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, 496 void **objp, atomic_t **obj_refp) 497 { 498 struct xfrm_policy *pol; 499 500 read_lock_bh(&xfrm_policy_lock); 501 for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { 502 struct xfrm_selector *sel = &pol->selector; 503 int match; 504 505 if (pol->family != family) 506 continue; 507 508 match = xfrm_selector_match(sel, fl, family); 509 if (match) { 510 xfrm_pol_hold(pol); 511 break; 512 } 513 } 514 read_unlock_bh(&xfrm_policy_lock); 515 if ((*objp = (void *) pol) != NULL) 516 *obj_refp = &pol->refcnt; 517 } 518 519 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 520 { 521 struct xfrm_policy *pol; 522 523 read_lock_bh(&xfrm_policy_lock); 524 if ((pol = sk->sk_policy[dir]) != NULL) { 525 int match = xfrm_selector_match(&pol->selector, fl, 526 sk->sk_family); 527 if (match) 528 xfrm_pol_hold(pol); 529 else 530 pol = NULL; 531 } 532 read_unlock_bh(&xfrm_policy_lock); 533 return pol; 534 } 535 536 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 537 { 538 pol->next = xfrm_policy_list[dir]; 539 xfrm_policy_list[dir] = pol; 540 xfrm_pol_hold(pol); 541 } 542 543 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 544 int dir) 545 { 546 struct xfrm_policy **polp; 547 548 for (polp = &xfrm_policy_list[dir]; 549 *polp != NULL; polp = &(*polp)->next) { 550 if (*polp == pol) { 551 *polp = pol->next; 552 return pol; 553 } 554 } 555 return NULL; 556 } 557 558 void xfrm_policy_delete(struct xfrm_policy *pol, int dir) 559 { 560 write_lock_bh(&xfrm_policy_lock); 561 pol = __xfrm_policy_unlink(pol, dir); 562 write_unlock_bh(&xfrm_policy_lock); 563 if (pol) { 564 if (dir < XFRM_POLICY_MAX) 565 atomic_inc(&flow_cache_genid); 566 xfrm_policy_kill(pol); 567 } 568 } 569 570 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 571 { 572 struct xfrm_policy *old_pol; 573 574 write_lock_bh(&xfrm_policy_lock); 575 old_pol = sk->sk_policy[dir]; 576 sk->sk_policy[dir] = pol; 577 if (pol) { 578 pol->curlft.add_time = (unsigned long)xtime.tv_sec; 579 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); 580 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 581 } 582 if (old_pol) 583 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 584 write_unlock_bh(&xfrm_policy_lock); 585 586 if (old_pol) { 587 xfrm_policy_kill(old_pol); 588 } 589 return 0; 590 } 591 592 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) 593 { 594 struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); 595 596 if (newp) { 597 newp->selector = old->selector; 598 newp->lft = old->lft; 599 newp->curlft = old->curlft; 600 newp->action = old->action; 601 newp->flags = old->flags; 602 newp->xfrm_nr = old->xfrm_nr; 603 newp->index = old->index; 604 memcpy(newp->xfrm_vec, old->xfrm_vec, 605 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 606 write_lock_bh(&xfrm_policy_lock); 607 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); 608 write_unlock_bh(&xfrm_policy_lock); 609 xfrm_pol_put(newp); 610 } 611 return newp; 612 } 613 614 int __xfrm_sk_clone_policy(struct sock *sk) 615 { 616 struct xfrm_policy *p0 = sk->sk_policy[0], 617 *p1 = sk->sk_policy[1]; 618 619 sk->sk_policy[0] = sk->sk_policy[1] = NULL; 620 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) 621 return -ENOMEM; 622 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) 623 return -ENOMEM; 624 return 0; 625 } 626 627 /* Resolve list of templates for the flow, given policy. */ 628 629 static int 630 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, 631 struct xfrm_state **xfrm, 632 unsigned short family) 633 { 634 int nx; 635 int i, error; 636 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 637 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 638 639 for (nx=0, i = 0; i < policy->xfrm_nr; i++) { 640 struct xfrm_state *x; 641 xfrm_address_t *remote = daddr; 642 xfrm_address_t *local = saddr; 643 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 644 645 if (tmpl->mode) { 646 remote = &tmpl->id.daddr; 647 local = &tmpl->saddr; 648 } 649 650 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 651 652 if (x && x->km.state == XFRM_STATE_VALID) { 653 xfrm[nx++] = x; 654 daddr = remote; 655 saddr = local; 656 continue; 657 } 658 if (x) { 659 error = (x->km.state == XFRM_STATE_ERROR ? 660 -EINVAL : -EAGAIN); 661 xfrm_state_put(x); 662 } 663 664 if (!tmpl->optional) 665 goto fail; 666 } 667 return nx; 668 669 fail: 670 for (nx--; nx>=0; nx--) 671 xfrm_state_put(xfrm[nx]); 672 return error; 673 } 674 675 /* Check that the bundle accepts the flow and its components are 676 * still valid. 677 */ 678 679 static struct dst_entry * 680 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) 681 { 682 struct dst_entry *x; 683 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 684 if (unlikely(afinfo == NULL)) 685 return ERR_PTR(-EINVAL); 686 x = afinfo->find_bundle(fl, policy); 687 xfrm_policy_put_afinfo(afinfo); 688 return x; 689 } 690 691 /* Allocate chain of dst_entry's, attach known xfrm's, calculate 692 * all the metrics... Shortly, bundle a bundle. 693 */ 694 695 static int 696 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, 697 struct flowi *fl, struct dst_entry **dst_p, 698 unsigned short family) 699 { 700 int err; 701 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 702 if (unlikely(afinfo == NULL)) 703 return -EINVAL; 704 err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); 705 xfrm_policy_put_afinfo(afinfo); 706 return err; 707 } 708 709 static inline int policy_to_flow_dir(int dir) 710 { 711 if (XFRM_POLICY_IN == FLOW_DIR_IN && 712 XFRM_POLICY_OUT == FLOW_DIR_OUT && 713 XFRM_POLICY_FWD == FLOW_DIR_FWD) 714 return dir; 715 switch (dir) { 716 default: 717 case XFRM_POLICY_IN: 718 return FLOW_DIR_IN; 719 case XFRM_POLICY_OUT: 720 return FLOW_DIR_OUT; 721 case XFRM_POLICY_FWD: 722 return FLOW_DIR_FWD; 723 }; 724 } 725 726 static int stale_bundle(struct dst_entry *dst); 727 728 /* Main function: finds/creates a bundle for given flow. 729 * 730 * At the moment we eat a raw IP route. Mostly to speed up lookups 731 * on interfaces with disabled IPsec. 732 */ 733 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, 734 struct sock *sk, int flags) 735 { 736 struct xfrm_policy *policy; 737 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 738 struct dst_entry *dst, *dst_orig = *dst_p; 739 int nx = 0; 740 int err; 741 u32 genid; 742 u16 family = dst_orig->ops->family; 743 restart: 744 genid = atomic_read(&flow_cache_genid); 745 policy = NULL; 746 if (sk && sk->sk_policy[1]) 747 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 748 749 if (!policy) { 750 /* To accelerate a bit... */ 751 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) 752 return 0; 753 754 policy = flow_cache_lookup(fl, family, 755 policy_to_flow_dir(XFRM_POLICY_OUT), 756 xfrm_policy_lookup); 757 } 758 759 if (!policy) 760 return 0; 761 762 policy->curlft.use_time = (unsigned long)xtime.tv_sec; 763 764 switch (policy->action) { 765 case XFRM_POLICY_BLOCK: 766 /* Prohibit the flow */ 767 xfrm_pol_put(policy); 768 return -EPERM; 769 770 case XFRM_POLICY_ALLOW: 771 if (policy->xfrm_nr == 0) { 772 /* Flow passes not transformed. */ 773 xfrm_pol_put(policy); 774 return 0; 775 } 776 777 /* Try to find matching bundle. 778 * 779 * LATER: help from flow cache. It is optional, this 780 * is required only for output policy. 781 */ 782 dst = xfrm_find_bundle(fl, policy, family); 783 if (IS_ERR(dst)) { 784 xfrm_pol_put(policy); 785 return PTR_ERR(dst); 786 } 787 788 if (dst) 789 break; 790 791 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); 792 793 if (unlikely(nx<0)) { 794 err = nx; 795 if (err == -EAGAIN && flags) { 796 DECLARE_WAITQUEUE(wait, current); 797 798 add_wait_queue(&km_waitq, &wait); 799 set_current_state(TASK_INTERRUPTIBLE); 800 schedule(); 801 set_current_state(TASK_RUNNING); 802 remove_wait_queue(&km_waitq, &wait); 803 804 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); 805 806 if (nx == -EAGAIN && signal_pending(current)) { 807 err = -ERESTART; 808 goto error; 809 } 810 if (nx == -EAGAIN || 811 genid != atomic_read(&flow_cache_genid)) { 812 xfrm_pol_put(policy); 813 goto restart; 814 } 815 err = nx; 816 } 817 if (err < 0) 818 goto error; 819 } 820 if (nx == 0) { 821 /* Flow passes not transformed. */ 822 xfrm_pol_put(policy); 823 return 0; 824 } 825 826 dst = dst_orig; 827 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); 828 829 if (unlikely(err)) { 830 int i; 831 for (i=0; i<nx; i++) 832 xfrm_state_put(xfrm[i]); 833 goto error; 834 } 835 836 write_lock_bh(&policy->lock); 837 if (unlikely(policy->dead || stale_bundle(dst))) { 838 /* Wow! While we worked on resolving, this 839 * policy has gone. Retry. It is not paranoia, 840 * we just cannot enlist new bundle to dead object. 841 * We can't enlist stable bundles either. 842 */ 843 write_unlock_bh(&policy->lock); 844 845 xfrm_pol_put(policy); 846 if (dst) 847 dst_free(dst); 848 goto restart; 849 } 850 dst->next = policy->bundles; 851 policy->bundles = dst; 852 dst_hold(dst); 853 write_unlock_bh(&policy->lock); 854 } 855 *dst_p = dst; 856 dst_release(dst_orig); 857 xfrm_pol_put(policy); 858 return 0; 859 860 error: 861 dst_release(dst_orig); 862 xfrm_pol_put(policy); 863 *dst_p = NULL; 864 return err; 865 } 866 EXPORT_SYMBOL(xfrm_lookup); 867 868 /* When skb is transformed back to its "native" form, we have to 869 * check policy restrictions. At the moment we make this in maximally 870 * stupid way. Shame on me. :-) Of course, connected sockets must 871 * have policy cached at them. 872 */ 873 874 static inline int 875 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 876 unsigned short family) 877 { 878 if (xfrm_state_kern(x)) 879 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family); 880 return x->id.proto == tmpl->id.proto && 881 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 882 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 883 x->props.mode == tmpl->mode && 884 (tmpl->aalgos & (1<<x->props.aalgo)) && 885 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); 886 } 887 888 static inline int 889 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, 890 unsigned short family) 891 { 892 int idx = start; 893 894 if (tmpl->optional) { 895 if (!tmpl->mode) 896 return start; 897 } else 898 start = -1; 899 for (; idx < sp->len; idx++) { 900 if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family)) 901 return ++idx; 902 if (sp->x[idx].xvec->props.mode) 903 break; 904 } 905 return start; 906 } 907 908 static int 909 _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) 910 { 911 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 912 913 if (unlikely(afinfo == NULL)) 914 return -EAFNOSUPPORT; 915 916 afinfo->decode_session(skb, fl); 917 xfrm_policy_put_afinfo(afinfo); 918 return 0; 919 } 920 921 static inline int secpath_has_tunnel(struct sec_path *sp, int k) 922 { 923 for (; k < sp->len; k++) { 924 if (sp->x[k].xvec->props.mode) 925 return 1; 926 } 927 928 return 0; 929 } 930 931 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 932 unsigned short family) 933 { 934 struct xfrm_policy *pol; 935 struct flowi fl; 936 937 if (_decode_session(skb, &fl, family) < 0) 938 return 0; 939 940 /* First, check used SA against their selectors. */ 941 if (skb->sp) { 942 int i; 943 944 for (i=skb->sp->len-1; i>=0; i--) { 945 struct sec_decap_state *xvec = &(skb->sp->x[i]); 946 if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) 947 return 0; 948 949 /* If there is a post_input processor, try running it */ 950 if (xvec->xvec->type->post_input && 951 (xvec->xvec->type->post_input)(xvec->xvec, 952 &(xvec->decap), 953 skb) != 0) 954 return 0; 955 } 956 } 957 958 pol = NULL; 959 if (sk && sk->sk_policy[dir]) 960 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 961 962 if (!pol) 963 pol = flow_cache_lookup(&fl, family, 964 policy_to_flow_dir(dir), 965 xfrm_policy_lookup); 966 967 if (!pol) 968 return !skb->sp || !secpath_has_tunnel(skb->sp, 0); 969 970 pol->curlft.use_time = (unsigned long)xtime.tv_sec; 971 972 if (pol->action == XFRM_POLICY_ALLOW) { 973 struct sec_path *sp; 974 static struct sec_path dummy; 975 int i, k; 976 977 if ((sp = skb->sp) == NULL) 978 sp = &dummy; 979 980 /* For each tunnel xfrm, find the first matching tmpl. 981 * For each tmpl before that, find corresponding xfrm. 982 * Order is _important_. Later we will implement 983 * some barriers, but at the moment barriers 984 * are implied between each two transformations. 985 */ 986 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { 987 k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); 988 if (k < 0) 989 goto reject; 990 } 991 992 if (secpath_has_tunnel(sp, k)) 993 goto reject; 994 995 xfrm_pol_put(pol); 996 return 1; 997 } 998 999 reject: 1000 xfrm_pol_put(pol); 1001 return 0; 1002 } 1003 EXPORT_SYMBOL(__xfrm_policy_check); 1004 1005 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) 1006 { 1007 struct flowi fl; 1008 1009 if (_decode_session(skb, &fl, family) < 0) 1010 return 0; 1011 1012 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 1013 } 1014 EXPORT_SYMBOL(__xfrm_route_forward); 1015 1016 /* Optimize later using cookies and generation ids. */ 1017 1018 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 1019 { 1020 if (!stale_bundle(dst)) 1021 return dst; 1022 1023 return NULL; 1024 } 1025 1026 static int stale_bundle(struct dst_entry *dst) 1027 { 1028 return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); 1029 } 1030 1031 static void xfrm_dst_destroy(struct dst_entry *dst) 1032 { 1033 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1034 1035 dst_release(xdst->route); 1036 1037 if (!dst->xfrm) 1038 return; 1039 xfrm_state_put(dst->xfrm); 1040 dst->xfrm = NULL; 1041 } 1042 1043 static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 1044 int unregister) 1045 { 1046 if (!unregister) 1047 return; 1048 1049 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 1050 dst->dev = &loopback_dev; 1051 dev_hold(&loopback_dev); 1052 dev_put(dev); 1053 } 1054 } 1055 1056 static void xfrm_link_failure(struct sk_buff *skb) 1057 { 1058 /* Impossible. Such dst must be popped before reaches point of failure. */ 1059 return; 1060 } 1061 1062 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 1063 { 1064 if (dst) { 1065 if (dst->obsolete) { 1066 dst_release(dst); 1067 dst = NULL; 1068 } 1069 } 1070 return dst; 1071 } 1072 1073 static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) 1074 { 1075 int i; 1076 struct xfrm_policy *pol; 1077 struct dst_entry *dst, **dstp, *gc_list = NULL; 1078 1079 read_lock_bh(&xfrm_policy_lock); 1080 for (i=0; i<2*XFRM_POLICY_MAX; i++) { 1081 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { 1082 write_lock(&pol->lock); 1083 dstp = &pol->bundles; 1084 while ((dst=*dstp) != NULL) { 1085 if (func(dst)) { 1086 *dstp = dst->next; 1087 dst->next = gc_list; 1088 gc_list = dst; 1089 } else { 1090 dstp = &dst->next; 1091 } 1092 } 1093 write_unlock(&pol->lock); 1094 } 1095 } 1096 read_unlock_bh(&xfrm_policy_lock); 1097 1098 while (gc_list) { 1099 dst = gc_list; 1100 gc_list = dst->next; 1101 dst_free(dst); 1102 } 1103 } 1104 1105 static int unused_bundle(struct dst_entry *dst) 1106 { 1107 return !atomic_read(&dst->__refcnt); 1108 } 1109 1110 static void __xfrm_garbage_collect(void) 1111 { 1112 xfrm_prune_bundles(unused_bundle); 1113 } 1114 1115 int xfrm_flush_bundles(void) 1116 { 1117 xfrm_prune_bundles(stale_bundle); 1118 return 0; 1119 } 1120 1121 void xfrm_init_pmtu(struct dst_entry *dst) 1122 { 1123 do { 1124 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1125 u32 pmtu, route_mtu_cached; 1126 1127 pmtu = dst_mtu(dst->child); 1128 xdst->child_mtu_cached = pmtu; 1129 1130 pmtu = xfrm_state_mtu(dst->xfrm, pmtu); 1131 1132 route_mtu_cached = dst_mtu(xdst->route); 1133 xdst->route_mtu_cached = route_mtu_cached; 1134 1135 if (pmtu > route_mtu_cached) 1136 pmtu = route_mtu_cached; 1137 1138 dst->metrics[RTAX_MTU-1] = pmtu; 1139 } while ((dst = dst->next)); 1140 } 1141 1142 EXPORT_SYMBOL(xfrm_init_pmtu); 1143 1144 /* Check that the bundle accepts the flow and its components are 1145 * still valid. 1146 */ 1147 1148 int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) 1149 { 1150 struct dst_entry *dst = &first->u.dst; 1151 struct xfrm_dst *last; 1152 u32 mtu; 1153 1154 if (!dst_check(dst->path, 0) || 1155 (dst->dev && !netif_running(dst->dev))) 1156 return 0; 1157 1158 last = NULL; 1159 1160 do { 1161 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1162 1163 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) 1164 return 0; 1165 if (dst->xfrm->km.state != XFRM_STATE_VALID) 1166 return 0; 1167 1168 mtu = dst_mtu(dst->child); 1169 if (xdst->child_mtu_cached != mtu) { 1170 last = xdst; 1171 xdst->child_mtu_cached = mtu; 1172 } 1173 1174 if (!dst_check(xdst->route, 0)) 1175 return 0; 1176 mtu = dst_mtu(xdst->route); 1177 if (xdst->route_mtu_cached != mtu) { 1178 last = xdst; 1179 xdst->route_mtu_cached = mtu; 1180 } 1181 1182 dst = dst->child; 1183 } while (dst->xfrm); 1184 1185 if (likely(!last)) 1186 return 1; 1187 1188 mtu = last->child_mtu_cached; 1189 for (;;) { 1190 dst = &last->u.dst; 1191 1192 mtu = xfrm_state_mtu(dst->xfrm, mtu); 1193 if (mtu > last->route_mtu_cached) 1194 mtu = last->route_mtu_cached; 1195 dst->metrics[RTAX_MTU-1] = mtu; 1196 1197 if (last == first) 1198 break; 1199 1200 last = last->u.next; 1201 last->child_mtu_cached = mtu; 1202 } 1203 1204 return 1; 1205 } 1206 1207 EXPORT_SYMBOL(xfrm_bundle_ok); 1208 1209 /* Well... that's _TASK_. We need to scan through transformation 1210 * list and figure out what mss tcp should generate in order to 1211 * final datagram fit to mtu. Mama mia... :-) 1212 * 1213 * Apparently, some easy way exists, but we used to choose the most 1214 * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta. 1215 * 1216 * Consider this function as something like dark humour. :-) 1217 */ 1218 static int xfrm_get_mss(struct dst_entry *dst, u32 mtu) 1219 { 1220 int res = mtu - dst->header_len; 1221 1222 for (;;) { 1223 struct dst_entry *d = dst; 1224 int m = res; 1225 1226 do { 1227 struct xfrm_state *x = d->xfrm; 1228 if (x) { 1229 spin_lock_bh(&x->lock); 1230 if (x->km.state == XFRM_STATE_VALID && 1231 x->type && x->type->get_max_size) 1232 m = x->type->get_max_size(d->xfrm, m); 1233 else 1234 m += x->props.header_len; 1235 spin_unlock_bh(&x->lock); 1236 } 1237 } while ((d = d->child) != NULL); 1238 1239 if (m <= mtu) 1240 break; 1241 res -= (m - mtu); 1242 if (res < 88) 1243 return mtu; 1244 } 1245 1246 return res + dst->header_len; 1247 } 1248 1249 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 1250 { 1251 int err = 0; 1252 if (unlikely(afinfo == NULL)) 1253 return -EINVAL; 1254 if (unlikely(afinfo->family >= NPROTO)) 1255 return -EAFNOSUPPORT; 1256 write_lock(&xfrm_policy_afinfo_lock); 1257 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) 1258 err = -ENOBUFS; 1259 else { 1260 struct dst_ops *dst_ops = afinfo->dst_ops; 1261 if (likely(dst_ops->kmem_cachep == NULL)) 1262 dst_ops->kmem_cachep = xfrm_dst_cache; 1263 if (likely(dst_ops->check == NULL)) 1264 dst_ops->check = xfrm_dst_check; 1265 if (likely(dst_ops->destroy == NULL)) 1266 dst_ops->destroy = xfrm_dst_destroy; 1267 if (likely(dst_ops->ifdown == NULL)) 1268 dst_ops->ifdown = xfrm_dst_ifdown; 1269 if (likely(dst_ops->negative_advice == NULL)) 1270 dst_ops->negative_advice = xfrm_negative_advice; 1271 if (likely(dst_ops->link_failure == NULL)) 1272 dst_ops->link_failure = xfrm_link_failure; 1273 if (likely(dst_ops->get_mss == NULL)) 1274 dst_ops->get_mss = xfrm_get_mss; 1275 if (likely(afinfo->garbage_collect == NULL)) 1276 afinfo->garbage_collect = __xfrm_garbage_collect; 1277 xfrm_policy_afinfo[afinfo->family] = afinfo; 1278 } 1279 write_unlock(&xfrm_policy_afinfo_lock); 1280 return err; 1281 } 1282 EXPORT_SYMBOL(xfrm_policy_register_afinfo); 1283 1284 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) 1285 { 1286 int err = 0; 1287 if (unlikely(afinfo == NULL)) 1288 return -EINVAL; 1289 if (unlikely(afinfo->family >= NPROTO)) 1290 return -EAFNOSUPPORT; 1291 write_lock(&xfrm_policy_afinfo_lock); 1292 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { 1293 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) 1294 err = -EINVAL; 1295 else { 1296 struct dst_ops *dst_ops = afinfo->dst_ops; 1297 xfrm_policy_afinfo[afinfo->family] = NULL; 1298 dst_ops->kmem_cachep = NULL; 1299 dst_ops->check = NULL; 1300 dst_ops->destroy = NULL; 1301 dst_ops->ifdown = NULL; 1302 dst_ops->negative_advice = NULL; 1303 dst_ops->link_failure = NULL; 1304 dst_ops->get_mss = NULL; 1305 afinfo->garbage_collect = NULL; 1306 } 1307 } 1308 write_unlock(&xfrm_policy_afinfo_lock); 1309 return err; 1310 } 1311 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); 1312 1313 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) 1314 { 1315 struct xfrm_policy_afinfo *afinfo; 1316 if (unlikely(family >= NPROTO)) 1317 return NULL; 1318 read_lock(&xfrm_policy_afinfo_lock); 1319 afinfo = xfrm_policy_afinfo[family]; 1320 if (likely(afinfo != NULL)) 1321 read_lock(&afinfo->lock); 1322 read_unlock(&xfrm_policy_afinfo_lock); 1323 return afinfo; 1324 } 1325 1326 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) 1327 { 1328 if (unlikely(afinfo == NULL)) 1329 return; 1330 read_unlock(&afinfo->lock); 1331 } 1332 1333 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 1334 { 1335 switch (event) { 1336 case NETDEV_DOWN: 1337 xfrm_flush_bundles(); 1338 } 1339 return NOTIFY_DONE; 1340 } 1341 1342 static struct notifier_block xfrm_dev_notifier = { 1343 xfrm_dev_event, 1344 NULL, 1345 0 1346 }; 1347 1348 static void __init xfrm_policy_init(void) 1349 { 1350 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 1351 sizeof(struct xfrm_dst), 1352 0, SLAB_HWCACHE_ALIGN, 1353 NULL, NULL); 1354 if (!xfrm_dst_cache) 1355 panic("XFRM: failed to allocate xfrm_dst_cache\n"); 1356 1357 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); 1358 register_netdevice_notifier(&xfrm_dev_notifier); 1359 } 1360 1361 void __init xfrm_init(void) 1362 { 1363 xfrm_state_init(); 1364 xfrm_policy_init(); 1365 xfrm_input_init(); 1366 } 1367 1368