1 /* 2 * ip_vs_app.c: Application module support for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference 12 * is that ip_vs_app module handles the reverse direction (incoming requests 13 * and outgoing responses). 14 * 15 * IP_MASQ_APP application masquerading module 16 * 17 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> 18 * 19 */ 20 21 #define KMSG_COMPONENT "IPVS" 22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24 #include <linux/module.h> 25 #include <linux/kernel.h> 26 #include <linux/skbuff.h> 27 #include <linux/in.h> 28 #include <linux/ip.h> 29 #include <linux/netfilter.h> 30 #include <linux/slab.h> 31 #include <net/net_namespace.h> 32 #include <net/protocol.h> 33 #include <net/tcp.h> 34 #include <linux/stat.h> 35 #include <linux/proc_fs.h> 36 #include <linux/seq_file.h> 37 #include <linux/mutex.h> 38 39 #include <net/ip_vs.h> 40 41 EXPORT_SYMBOL(register_ip_vs_app); 42 EXPORT_SYMBOL(unregister_ip_vs_app); 43 EXPORT_SYMBOL(register_ip_vs_app_inc); 44 45 static DEFINE_MUTEX(__ip_vs_app_mutex); 46 47 /* 48 * Get an ip_vs_app object 49 */ 50 static inline int ip_vs_app_get(struct ip_vs_app *app) 51 { 52 return try_module_get(app->module); 53 } 54 55 56 static inline void ip_vs_app_put(struct ip_vs_app *app) 57 { 58 module_put(app->module); 59 } 60 61 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) 62 { 63 kfree(inc->timeout_table); 64 kfree(inc); 65 } 66 67 static void ip_vs_app_inc_rcu_free(struct rcu_head *head) 68 { 69 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); 70 71 ip_vs_app_inc_destroy(inc); 72 } 73 74 /* 75 * Allocate/initialize app incarnation and register it in proto apps. 76 */ 77 static int 78 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 79 __u16 port) 80 { 81 struct ip_vs_protocol *pp; 82 struct ip_vs_app *inc; 83 int ret; 84 85 if (!(pp = ip_vs_proto_get(proto))) 86 return -EPROTONOSUPPORT; 87 88 if (!pp->unregister_app) 89 return -EOPNOTSUPP; 90 91 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); 92 if (!inc) 93 return -ENOMEM; 94 INIT_LIST_HEAD(&inc->p_list); 95 INIT_LIST_HEAD(&inc->incs_list); 96 inc->app = app; 97 inc->port = htons(port); 98 atomic_set(&inc->usecnt, 0); 99 100 if (app->timeouts) { 101 inc->timeout_table = 102 ip_vs_create_timeout_table(app->timeouts, 103 app->timeouts_size); 104 if (!inc->timeout_table) { 105 ret = -ENOMEM; 106 goto out; 107 } 108 } 109 110 ret = pp->register_app(ipvs, inc); 111 if (ret) 112 goto out; 113 114 list_add(&inc->a_list, &app->incs_list); 115 IP_VS_DBG(9, "%s App %s:%u registered\n", 116 pp->name, inc->name, ntohs(inc->port)); 117 118 return 0; 119 120 out: 121 ip_vs_app_inc_destroy(inc); 122 return ret; 123 } 124 125 126 /* 127 * Release app incarnation 128 */ 129 static void 130 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) 131 { 132 struct ip_vs_protocol *pp; 133 134 if (!(pp = ip_vs_proto_get(inc->protocol))) 135 return; 136 137 if (pp->unregister_app) 138 pp->unregister_app(ipvs, inc); 139 140 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 141 pp->name, inc->name, ntohs(inc->port)); 142 143 list_del(&inc->a_list); 144 145 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); 146 } 147 148 149 /* 150 * Get reference to app inc (only called from softirq) 151 * 152 */ 153 int ip_vs_app_inc_get(struct ip_vs_app *inc) 154 { 155 int result; 156 157 result = ip_vs_app_get(inc->app); 158 if (result) 159 atomic_inc(&inc->usecnt); 160 return result; 161 } 162 163 164 /* 165 * Put the app inc (only called from timer or net softirq) 166 */ 167 void ip_vs_app_inc_put(struct ip_vs_app *inc) 168 { 169 atomic_dec(&inc->usecnt); 170 ip_vs_app_put(inc->app); 171 } 172 173 174 /* 175 * Register an application incarnation in protocol applications 176 */ 177 int 178 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 179 __u16 port) 180 { 181 int result; 182 183 mutex_lock(&__ip_vs_app_mutex); 184 185 result = ip_vs_app_inc_new(ipvs, app, proto, port); 186 187 mutex_unlock(&__ip_vs_app_mutex); 188 189 return result; 190 } 191 192 193 /* Register application for netns */ 194 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 195 { 196 struct ip_vs_app *a; 197 int err = 0; 198 199 mutex_lock(&__ip_vs_app_mutex); 200 201 list_for_each_entry(a, &ipvs->app_list, a_list) { 202 if (!strcmp(app->name, a->name)) { 203 err = -EEXIST; 204 goto out_unlock; 205 } 206 } 207 a = kmemdup(app, sizeof(*app), GFP_KERNEL); 208 if (!a) { 209 err = -ENOMEM; 210 goto out_unlock; 211 } 212 INIT_LIST_HEAD(&a->incs_list); 213 list_add(&a->a_list, &ipvs->app_list); 214 /* increase the module use count */ 215 ip_vs_use_count_inc(); 216 217 out_unlock: 218 mutex_unlock(&__ip_vs_app_mutex); 219 220 return err ? ERR_PTR(err) : a; 221 } 222 223 224 /* 225 * ip_vs_app unregistration routine 226 * We are sure there are no app incarnations attached to services 227 * Caller should use synchronize_rcu() or rcu_barrier() 228 */ 229 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 230 { 231 struct ip_vs_app *a, *anxt, *inc, *nxt; 232 233 mutex_lock(&__ip_vs_app_mutex); 234 235 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { 236 if (app && strcmp(app->name, a->name)) 237 continue; 238 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { 239 ip_vs_app_inc_release(ipvs, inc); 240 } 241 242 list_del(&a->a_list); 243 kfree(a); 244 245 /* decrease the module use count */ 246 ip_vs_use_count_dec(); 247 } 248 249 mutex_unlock(&__ip_vs_app_mutex); 250 } 251 252 253 /* 254 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 255 */ 256 int ip_vs_bind_app(struct ip_vs_conn *cp, 257 struct ip_vs_protocol *pp) 258 { 259 return pp->app_conn_bind(cp); 260 } 261 262 263 /* 264 * Unbind cp from application incarnation (called by cp destructor) 265 */ 266 void ip_vs_unbind_app(struct ip_vs_conn *cp) 267 { 268 struct ip_vs_app *inc = cp->app; 269 270 if (!inc) 271 return; 272 273 if (inc->unbind_conn) 274 inc->unbind_conn(inc, cp); 275 if (inc->done_conn) 276 inc->done_conn(inc, cp); 277 ip_vs_app_inc_put(inc); 278 cp->app = NULL; 279 } 280 281 282 /* 283 * Fixes th->seq based on ip_vs_seq info. 284 */ 285 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 286 { 287 __u32 seq = ntohl(th->seq); 288 289 /* 290 * Adjust seq with delta-offset for all packets after 291 * the most recent resized pkt seq and with previous_delta offset 292 * for all packets before most recent resized pkt seq. 293 */ 294 if (vseq->delta || vseq->previous_delta) { 295 if(after(seq, vseq->init_seq)) { 296 th->seq = htonl(seq + vseq->delta); 297 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", 298 __func__, vseq->delta); 299 } else { 300 th->seq = htonl(seq + vseq->previous_delta); 301 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", 302 __func__, vseq->previous_delta); 303 } 304 } 305 } 306 307 308 /* 309 * Fixes th->ack_seq based on ip_vs_seq info. 310 */ 311 static inline void 312 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 313 { 314 __u32 ack_seq = ntohl(th->ack_seq); 315 316 /* 317 * Adjust ack_seq with delta-offset for 318 * the packets AFTER most recent resized pkt has caused a shift 319 * for packets before most recent resized pkt, use previous_delta 320 */ 321 if (vseq->delta || vseq->previous_delta) { 322 /* since ack_seq is the number of octet that is expected 323 to receive next, so compare it with init_seq+delta */ 324 if(after(ack_seq, vseq->init_seq+vseq->delta)) { 325 th->ack_seq = htonl(ack_seq - vseq->delta); 326 IP_VS_DBG(9, "%s(): subtracted delta " 327 "(%d) from ack_seq\n", __func__, vseq->delta); 328 329 } else { 330 th->ack_seq = htonl(ack_seq - vseq->previous_delta); 331 IP_VS_DBG(9, "%s(): subtracted " 332 "previous_delta (%d) from ack_seq\n", 333 __func__, vseq->previous_delta); 334 } 335 } 336 } 337 338 339 /* 340 * Updates ip_vs_seq if pkt has been resized 341 * Assumes already checked proto==IPPROTO_TCP and diff!=0. 342 */ 343 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, 344 unsigned int flag, __u32 seq, int diff) 345 { 346 /* spinlock is to keep updating cp->flags atomic */ 347 spin_lock_bh(&cp->lock); 348 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 349 vseq->previous_delta = vseq->delta; 350 vseq->delta += diff; 351 vseq->init_seq = seq; 352 cp->flags |= flag; 353 } 354 spin_unlock_bh(&cp->lock); 355 } 356 357 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 358 struct ip_vs_app *app, 359 struct ip_vs_iphdr *ipvsh) 360 { 361 int diff; 362 const unsigned int tcp_offset = ip_hdrlen(skb); 363 struct tcphdr *th; 364 __u32 seq; 365 366 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 367 return 0; 368 369 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 370 371 /* 372 * Remember seq number in case this pkt gets resized 373 */ 374 seq = ntohl(th->seq); 375 376 /* 377 * Fix seq stuff if flagged as so. 378 */ 379 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 380 vs_fix_seq(&cp->out_seq, th); 381 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 382 vs_fix_ack_seq(&cp->in_seq, th); 383 384 /* 385 * Call private output hook function 386 */ 387 if (app->pkt_out == NULL) 388 return 1; 389 390 if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) 391 return 0; 392 393 /* 394 * Update ip_vs seq stuff if len has changed. 395 */ 396 if (diff != 0) 397 vs_seq_update(cp, &cp->out_seq, 398 IP_VS_CONN_F_OUT_SEQ, seq, diff); 399 400 return 1; 401 } 402 403 /* 404 * Output pkt hook. Will call bound ip_vs_app specific function 405 * called by ipvs packet handler, assumes previously checked cp!=NULL 406 * returns false if it can't handle packet (oom) 407 */ 408 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 409 struct ip_vs_iphdr *ipvsh) 410 { 411 struct ip_vs_app *app; 412 413 /* 414 * check if application module is bound to 415 * this ip_vs_conn. 416 */ 417 if ((app = cp->app) == NULL) 418 return 1; 419 420 /* TCP is complicated */ 421 if (cp->protocol == IPPROTO_TCP) 422 return app_tcp_pkt_out(cp, skb, app, ipvsh); 423 424 /* 425 * Call private output hook function 426 */ 427 if (app->pkt_out == NULL) 428 return 1; 429 430 return app->pkt_out(app, cp, skb, NULL, ipvsh); 431 } 432 433 434 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 435 struct ip_vs_app *app, 436 struct ip_vs_iphdr *ipvsh) 437 { 438 int diff; 439 const unsigned int tcp_offset = ip_hdrlen(skb); 440 struct tcphdr *th; 441 __u32 seq; 442 443 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 444 return 0; 445 446 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 447 448 /* 449 * Remember seq number in case this pkt gets resized 450 */ 451 seq = ntohl(th->seq); 452 453 /* 454 * Fix seq stuff if flagged as so. 455 */ 456 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 457 vs_fix_seq(&cp->in_seq, th); 458 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 459 vs_fix_ack_seq(&cp->out_seq, th); 460 461 /* 462 * Call private input hook function 463 */ 464 if (app->pkt_in == NULL) 465 return 1; 466 467 if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) 468 return 0; 469 470 /* 471 * Update ip_vs seq stuff if len has changed. 472 */ 473 if (diff != 0) 474 vs_seq_update(cp, &cp->in_seq, 475 IP_VS_CONN_F_IN_SEQ, seq, diff); 476 477 return 1; 478 } 479 480 /* 481 * Input pkt hook. Will call bound ip_vs_app specific function 482 * called by ipvs packet handler, assumes previously checked cp!=NULL. 483 * returns false if can't handle packet (oom). 484 */ 485 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 486 struct ip_vs_iphdr *ipvsh) 487 { 488 struct ip_vs_app *app; 489 490 /* 491 * check if application module is bound to 492 * this ip_vs_conn. 493 */ 494 if ((app = cp->app) == NULL) 495 return 1; 496 497 /* TCP is complicated */ 498 if (cp->protocol == IPPROTO_TCP) 499 return app_tcp_pkt_in(cp, skb, app, ipvsh); 500 501 /* 502 * Call private input hook function 503 */ 504 if (app->pkt_in == NULL) 505 return 1; 506 507 return app->pkt_in(app, cp, skb, NULL, ipvsh); 508 } 509 510 511 #ifdef CONFIG_PROC_FS 512 /* 513 * /proc/net/ip_vs_app entry function 514 */ 515 516 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 517 { 518 struct ip_vs_app *app, *inc; 519 520 list_for_each_entry(app, &ipvs->app_list, a_list) { 521 list_for_each_entry(inc, &app->incs_list, a_list) { 522 if (pos-- == 0) 523 return inc; 524 } 525 } 526 return NULL; 527 528 } 529 530 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 531 { 532 struct net *net = seq_file_net(seq); 533 struct netns_ipvs *ipvs = net_ipvs(net); 534 535 mutex_lock(&__ip_vs_app_mutex); 536 537 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 538 } 539 540 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 541 { 542 struct ip_vs_app *inc, *app; 543 struct list_head *e; 544 struct net *net = seq_file_net(seq); 545 struct netns_ipvs *ipvs = net_ipvs(net); 546 547 ++*pos; 548 if (v == SEQ_START_TOKEN) 549 return ip_vs_app_idx(ipvs, 0); 550 551 inc = v; 552 app = inc->app; 553 554 if ((e = inc->a_list.next) != &app->incs_list) 555 return list_entry(e, struct ip_vs_app, a_list); 556 557 /* go on to next application */ 558 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 559 app = list_entry(e, struct ip_vs_app, a_list); 560 list_for_each_entry(inc, &app->incs_list, a_list) { 561 return inc; 562 } 563 } 564 return NULL; 565 } 566 567 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 568 { 569 mutex_unlock(&__ip_vs_app_mutex); 570 } 571 572 static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 573 { 574 if (v == SEQ_START_TOKEN) 575 seq_puts(seq, "prot port usecnt name\n"); 576 else { 577 const struct ip_vs_app *inc = v; 578 579 seq_printf(seq, "%-3s %-7u %-6d %-17s\n", 580 ip_vs_proto_name(inc->protocol), 581 ntohs(inc->port), 582 atomic_read(&inc->usecnt), 583 inc->name); 584 } 585 return 0; 586 } 587 588 static const struct seq_operations ip_vs_app_seq_ops = { 589 .start = ip_vs_app_seq_start, 590 .next = ip_vs_app_seq_next, 591 .stop = ip_vs_app_seq_stop, 592 .show = ip_vs_app_seq_show, 593 }; 594 #endif 595 596 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) 597 { 598 INIT_LIST_HEAD(&ipvs->app_list); 599 proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, 600 sizeof(struct seq_net_private)); 601 return 0; 602 } 603 604 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) 605 { 606 unregister_ip_vs_app(ipvs, NULL /* all */); 607 remove_proc_entry("ip_vs_app", ipvs->net->proc_net); 608 } 609