1 /* 2 * ip_vs_app.c: Application module support for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference 12 * is that ip_vs_app module handles the reverse direction (incoming requests 13 * and outgoing responses). 14 * 15 * IP_MASQ_APP application masquerading module 16 * 17 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> 18 * 19 */ 20 21 #define KMSG_COMPONENT "IPVS" 22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24 #include <linux/module.h> 25 #include <linux/kernel.h> 26 #include <linux/skbuff.h> 27 #include <linux/in.h> 28 #include <linux/ip.h> 29 #include <linux/netfilter.h> 30 #include <linux/slab.h> 31 #include <net/net_namespace.h> 32 #include <net/protocol.h> 33 #include <net/tcp.h> 34 #include <linux/stat.h> 35 #include <linux/proc_fs.h> 36 #include <linux/seq_file.h> 37 #include <linux/mutex.h> 38 39 #include <net/ip_vs.h> 40 41 EXPORT_SYMBOL(register_ip_vs_app); 42 EXPORT_SYMBOL(unregister_ip_vs_app); 43 EXPORT_SYMBOL(register_ip_vs_app_inc); 44 45 static DEFINE_MUTEX(__ip_vs_app_mutex); 46 47 /* 48 * Get an ip_vs_app object 49 */ 50 static inline int ip_vs_app_get(struct ip_vs_app *app) 51 { 52 return try_module_get(app->module); 53 } 54 55 56 static inline void ip_vs_app_put(struct ip_vs_app *app) 57 { 58 module_put(app->module); 59 } 60 61 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) 62 { 63 kfree(inc->timeout_table); 64 kfree(inc); 65 } 66 67 static void ip_vs_app_inc_rcu_free(struct rcu_head *head) 68 { 69 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); 70 71 ip_vs_app_inc_destroy(inc); 72 } 73 74 /* 75 * Allocate/initialize app incarnation and register it in proto apps. 76 */ 77 static int 78 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 79 __u16 port) 80 { 81 struct ip_vs_protocol *pp; 82 struct ip_vs_app *inc; 83 int ret; 84 85 if (!(pp = ip_vs_proto_get(proto))) 86 return -EPROTONOSUPPORT; 87 88 if (!pp->unregister_app) 89 return -EOPNOTSUPP; 90 91 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); 92 if (!inc) 93 return -ENOMEM; 94 INIT_LIST_HEAD(&inc->p_list); 95 INIT_LIST_HEAD(&inc->incs_list); 96 inc->app = app; 97 inc->port = htons(port); 98 atomic_set(&inc->usecnt, 0); 99 100 if (app->timeouts) { 101 inc->timeout_table = 102 ip_vs_create_timeout_table(app->timeouts, 103 app->timeouts_size); 104 if (!inc->timeout_table) { 105 ret = -ENOMEM; 106 goto out; 107 } 108 } 109 110 ret = pp->register_app(ipvs, inc); 111 if (ret) 112 goto out; 113 114 list_add(&inc->a_list, &app->incs_list); 115 IP_VS_DBG(9, "%s App %s:%u registered\n", 116 pp->name, inc->name, ntohs(inc->port)); 117 118 return 0; 119 120 out: 121 ip_vs_app_inc_destroy(inc); 122 return ret; 123 } 124 125 126 /* 127 * Release app incarnation 128 */ 129 static void 130 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) 131 { 132 struct ip_vs_protocol *pp; 133 134 if (!(pp = ip_vs_proto_get(inc->protocol))) 135 return; 136 137 if (pp->unregister_app) 138 pp->unregister_app(ipvs, inc); 139 140 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 141 pp->name, inc->name, ntohs(inc->port)); 142 143 list_del(&inc->a_list); 144 145 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); 146 } 147 148 149 /* 150 * Get reference to app inc (only called from softirq) 151 * 152 */ 153 int ip_vs_app_inc_get(struct ip_vs_app *inc) 154 { 155 int result; 156 157 result = ip_vs_app_get(inc->app); 158 if (result) 159 atomic_inc(&inc->usecnt); 160 return result; 161 } 162 163 164 /* 165 * Put the app inc (only called from timer or net softirq) 166 */ 167 void ip_vs_app_inc_put(struct ip_vs_app *inc) 168 { 169 atomic_dec(&inc->usecnt); 170 ip_vs_app_put(inc->app); 171 } 172 173 174 /* 175 * Register an application incarnation in protocol applications 176 */ 177 int 178 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 179 __u16 port) 180 { 181 int result; 182 183 mutex_lock(&__ip_vs_app_mutex); 184 185 result = ip_vs_app_inc_new(ipvs, app, proto, port); 186 187 mutex_unlock(&__ip_vs_app_mutex); 188 189 return result; 190 } 191 192 193 /* Register application for netns */ 194 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 195 { 196 struct ip_vs_app *a; 197 int err = 0; 198 199 mutex_lock(&__ip_vs_app_mutex); 200 201 list_for_each_entry(a, &ipvs->app_list, a_list) { 202 if (!strcmp(app->name, a->name)) { 203 err = -EEXIST; 204 goto out_unlock; 205 } 206 } 207 a = kmemdup(app, sizeof(*app), GFP_KERNEL); 208 if (!a) { 209 err = -ENOMEM; 210 goto out_unlock; 211 } 212 INIT_LIST_HEAD(&a->incs_list); 213 list_add(&a->a_list, &ipvs->app_list); 214 /* increase the module use count */ 215 ip_vs_use_count_inc(); 216 217 out_unlock: 218 mutex_unlock(&__ip_vs_app_mutex); 219 220 return err ? ERR_PTR(err) : a; 221 } 222 223 224 /* 225 * ip_vs_app unregistration routine 226 * We are sure there are no app incarnations attached to services 227 * Caller should use synchronize_rcu() or rcu_barrier() 228 */ 229 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 230 { 231 struct ip_vs_app *a, *anxt, *inc, *nxt; 232 233 mutex_lock(&__ip_vs_app_mutex); 234 235 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { 236 if (app && strcmp(app->name, a->name)) 237 continue; 238 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { 239 ip_vs_app_inc_release(ipvs, inc); 240 } 241 242 list_del(&a->a_list); 243 kfree(a); 244 245 /* decrease the module use count */ 246 ip_vs_use_count_dec(); 247 } 248 249 mutex_unlock(&__ip_vs_app_mutex); 250 } 251 252 253 /* 254 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 255 */ 256 int ip_vs_bind_app(struct ip_vs_conn *cp, 257 struct ip_vs_protocol *pp) 258 { 259 return pp->app_conn_bind(cp); 260 } 261 262 263 /* 264 * Unbind cp from application incarnation (called by cp destructor) 265 */ 266 void ip_vs_unbind_app(struct ip_vs_conn *cp) 267 { 268 struct ip_vs_app *inc = cp->app; 269 270 if (!inc) 271 return; 272 273 if (inc->unbind_conn) 274 inc->unbind_conn(inc, cp); 275 if (inc->done_conn) 276 inc->done_conn(inc, cp); 277 ip_vs_app_inc_put(inc); 278 cp->app = NULL; 279 } 280 281 282 /* 283 * Fixes th->seq based on ip_vs_seq info. 284 */ 285 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 286 { 287 __u32 seq = ntohl(th->seq); 288 289 /* 290 * Adjust seq with delta-offset for all packets after 291 * the most recent resized pkt seq and with previous_delta offset 292 * for all packets before most recent resized pkt seq. 293 */ 294 if (vseq->delta || vseq->previous_delta) { 295 if(after(seq, vseq->init_seq)) { 296 th->seq = htonl(seq + vseq->delta); 297 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", 298 __func__, vseq->delta); 299 } else { 300 th->seq = htonl(seq + vseq->previous_delta); 301 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", 302 __func__, vseq->previous_delta); 303 } 304 } 305 } 306 307 308 /* 309 * Fixes th->ack_seq based on ip_vs_seq info. 310 */ 311 static inline void 312 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 313 { 314 __u32 ack_seq = ntohl(th->ack_seq); 315 316 /* 317 * Adjust ack_seq with delta-offset for 318 * the packets AFTER most recent resized pkt has caused a shift 319 * for packets before most recent resized pkt, use previous_delta 320 */ 321 if (vseq->delta || vseq->previous_delta) { 322 /* since ack_seq is the number of octet that is expected 323 to receive next, so compare it with init_seq+delta */ 324 if(after(ack_seq, vseq->init_seq+vseq->delta)) { 325 th->ack_seq = htonl(ack_seq - vseq->delta); 326 IP_VS_DBG(9, "%s(): subtracted delta " 327 "(%d) from ack_seq\n", __func__, vseq->delta); 328 329 } else { 330 th->ack_seq = htonl(ack_seq - vseq->previous_delta); 331 IP_VS_DBG(9, "%s(): subtracted " 332 "previous_delta (%d) from ack_seq\n", 333 __func__, vseq->previous_delta); 334 } 335 } 336 } 337 338 339 /* 340 * Updates ip_vs_seq if pkt has been resized 341 * Assumes already checked proto==IPPROTO_TCP and diff!=0. 342 */ 343 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, 344 unsigned int flag, __u32 seq, int diff) 345 { 346 /* spinlock is to keep updating cp->flags atomic */ 347 spin_lock_bh(&cp->lock); 348 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 349 vseq->previous_delta = vseq->delta; 350 vseq->delta += diff; 351 vseq->init_seq = seq; 352 cp->flags |= flag; 353 } 354 spin_unlock_bh(&cp->lock); 355 } 356 357 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 358 struct ip_vs_app *app) 359 { 360 int diff; 361 const unsigned int tcp_offset = ip_hdrlen(skb); 362 struct tcphdr *th; 363 __u32 seq; 364 365 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 366 return 0; 367 368 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 369 370 /* 371 * Remember seq number in case this pkt gets resized 372 */ 373 seq = ntohl(th->seq); 374 375 /* 376 * Fix seq stuff if flagged as so. 377 */ 378 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 379 vs_fix_seq(&cp->out_seq, th); 380 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 381 vs_fix_ack_seq(&cp->in_seq, th); 382 383 /* 384 * Call private output hook function 385 */ 386 if (app->pkt_out == NULL) 387 return 1; 388 389 if (!app->pkt_out(app, cp, skb, &diff)) 390 return 0; 391 392 /* 393 * Update ip_vs seq stuff if len has changed. 394 */ 395 if (diff != 0) 396 vs_seq_update(cp, &cp->out_seq, 397 IP_VS_CONN_F_OUT_SEQ, seq, diff); 398 399 return 1; 400 } 401 402 /* 403 * Output pkt hook. Will call bound ip_vs_app specific function 404 * called by ipvs packet handler, assumes previously checked cp!=NULL 405 * returns false if it can't handle packet (oom) 406 */ 407 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) 408 { 409 struct ip_vs_app *app; 410 411 /* 412 * check if application module is bound to 413 * this ip_vs_conn. 414 */ 415 if ((app = cp->app) == NULL) 416 return 1; 417 418 /* TCP is complicated */ 419 if (cp->protocol == IPPROTO_TCP) 420 return app_tcp_pkt_out(cp, skb, app); 421 422 /* 423 * Call private output hook function 424 */ 425 if (app->pkt_out == NULL) 426 return 1; 427 428 return app->pkt_out(app, cp, skb, NULL); 429 } 430 431 432 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 433 struct ip_vs_app *app) 434 { 435 int diff; 436 const unsigned int tcp_offset = ip_hdrlen(skb); 437 struct tcphdr *th; 438 __u32 seq; 439 440 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 441 return 0; 442 443 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 444 445 /* 446 * Remember seq number in case this pkt gets resized 447 */ 448 seq = ntohl(th->seq); 449 450 /* 451 * Fix seq stuff if flagged as so. 452 */ 453 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 454 vs_fix_seq(&cp->in_seq, th); 455 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 456 vs_fix_ack_seq(&cp->out_seq, th); 457 458 /* 459 * Call private input hook function 460 */ 461 if (app->pkt_in == NULL) 462 return 1; 463 464 if (!app->pkt_in(app, cp, skb, &diff)) 465 return 0; 466 467 /* 468 * Update ip_vs seq stuff if len has changed. 469 */ 470 if (diff != 0) 471 vs_seq_update(cp, &cp->in_seq, 472 IP_VS_CONN_F_IN_SEQ, seq, diff); 473 474 return 1; 475 } 476 477 /* 478 * Input pkt hook. Will call bound ip_vs_app specific function 479 * called by ipvs packet handler, assumes previously checked cp!=NULL. 480 * returns false if can't handle packet (oom). 481 */ 482 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) 483 { 484 struct ip_vs_app *app; 485 486 /* 487 * check if application module is bound to 488 * this ip_vs_conn. 489 */ 490 if ((app = cp->app) == NULL) 491 return 1; 492 493 /* TCP is complicated */ 494 if (cp->protocol == IPPROTO_TCP) 495 return app_tcp_pkt_in(cp, skb, app); 496 497 /* 498 * Call private input hook function 499 */ 500 if (app->pkt_in == NULL) 501 return 1; 502 503 return app->pkt_in(app, cp, skb, NULL); 504 } 505 506 507 #ifdef CONFIG_PROC_FS 508 /* 509 * /proc/net/ip_vs_app entry function 510 */ 511 512 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 513 { 514 struct ip_vs_app *app, *inc; 515 516 list_for_each_entry(app, &ipvs->app_list, a_list) { 517 list_for_each_entry(inc, &app->incs_list, a_list) { 518 if (pos-- == 0) 519 return inc; 520 } 521 } 522 return NULL; 523 524 } 525 526 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 527 { 528 struct net *net = seq_file_net(seq); 529 struct netns_ipvs *ipvs = net_ipvs(net); 530 531 mutex_lock(&__ip_vs_app_mutex); 532 533 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 534 } 535 536 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 537 { 538 struct ip_vs_app *inc, *app; 539 struct list_head *e; 540 struct net *net = seq_file_net(seq); 541 struct netns_ipvs *ipvs = net_ipvs(net); 542 543 ++*pos; 544 if (v == SEQ_START_TOKEN) 545 return ip_vs_app_idx(ipvs, 0); 546 547 inc = v; 548 app = inc->app; 549 550 if ((e = inc->a_list.next) != &app->incs_list) 551 return list_entry(e, struct ip_vs_app, a_list); 552 553 /* go on to next application */ 554 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 555 app = list_entry(e, struct ip_vs_app, a_list); 556 list_for_each_entry(inc, &app->incs_list, a_list) { 557 return inc; 558 } 559 } 560 return NULL; 561 } 562 563 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 564 { 565 mutex_unlock(&__ip_vs_app_mutex); 566 } 567 568 static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 569 { 570 if (v == SEQ_START_TOKEN) 571 seq_puts(seq, "prot port usecnt name\n"); 572 else { 573 const struct ip_vs_app *inc = v; 574 575 seq_printf(seq, "%-3s %-7u %-6d %-17s\n", 576 ip_vs_proto_name(inc->protocol), 577 ntohs(inc->port), 578 atomic_read(&inc->usecnt), 579 inc->name); 580 } 581 return 0; 582 } 583 584 static const struct seq_operations ip_vs_app_seq_ops = { 585 .start = ip_vs_app_seq_start, 586 .next = ip_vs_app_seq_next, 587 .stop = ip_vs_app_seq_stop, 588 .show = ip_vs_app_seq_show, 589 }; 590 591 static int ip_vs_app_open(struct inode *inode, struct file *file) 592 { 593 return seq_open_net(inode, file, &ip_vs_app_seq_ops, 594 sizeof(struct seq_net_private)); 595 } 596 597 static const struct file_operations ip_vs_app_fops = { 598 .owner = THIS_MODULE, 599 .open = ip_vs_app_open, 600 .read = seq_read, 601 .llseek = seq_lseek, 602 .release = seq_release_net, 603 }; 604 #endif 605 606 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) 607 { 608 struct net *net = ipvs->net; 609 610 INIT_LIST_HEAD(&ipvs->app_list); 611 proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); 612 return 0; 613 } 614 615 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) 616 { 617 struct net *net = ipvs->net; 618 619 unregister_ip_vs_app(ipvs, NULL /* all */); 620 remove_proc_entry("ip_vs_app", net->proc_net); 621 } 622