1 /* 2 * ip_vs_proto.c: transport protocol load balancing support for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Julian Anastasov <ja@ssi.bg> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Changes: 13 * 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/kernel.h> 21 #include <linux/skbuff.h> 22 #include <linux/gfp.h> 23 #include <linux/in.h> 24 #include <linux/ip.h> 25 #include <net/protocol.h> 26 #include <net/tcp.h> 27 #include <net/udp.h> 28 #include <linux/stat.h> 29 #include <linux/proc_fs.h> 30 31 #include <net/ip_vs.h> 32 33 34 /* 35 * IPVS protocols can only be registered/unregistered when the ipvs 36 * module is loaded/unloaded, so no lock is needed in accessing the 37 * ipvs protocol table. 38 */ 39 40 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ 41 #define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1)) 42 43 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; 44 45 /* States for conn templates: NONE or words separated with ",", max 15 chars */ 46 static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = { 47 [IP_VS_CTPL_S_NONE] = "NONE", 48 [IP_VS_CTPL_S_ASSURED] = "ASSURED", 49 }; 50 51 /* 52 * register an ipvs protocol 53 */ 54 static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) 55 { 56 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 57 58 pp->next = ip_vs_proto_table[hash]; 59 ip_vs_proto_table[hash] = pp; 60 61 if (pp->init != NULL) 62 pp->init(pp); 63 64 return 0; 65 } 66 67 /* 68 * register an ipvs protocols netns related data 69 */ 70 static int 71 register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) 72 { 73 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 74 struct ip_vs_proto_data *pd = 75 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); 76 77 if (!pd) 78 return -ENOMEM; 79 80 pd->pp = pp; /* For speed issues */ 81 pd->next = ipvs->proto_data_table[hash]; 82 ipvs->proto_data_table[hash] = pd; 83 atomic_set(&pd->appcnt, 0); /* Init app counter */ 84 85 if (pp->init_netns != NULL) { 86 int ret = pp->init_netns(ipvs, pd); 87 if (ret) { 88 /* unlink an free proto data */ 89 ipvs->proto_data_table[hash] = pd->next; 90 kfree(pd); 91 return ret; 92 } 93 } 94 95 return 0; 96 } 97 98 /* 99 * unregister an ipvs protocol 100 */ 101 static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) 102 { 103 struct ip_vs_protocol **pp_p; 104 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 105 106 pp_p = &ip_vs_proto_table[hash]; 107 for (; *pp_p; pp_p = &(*pp_p)->next) { 108 if (*pp_p == pp) { 109 *pp_p = pp->next; 110 if (pp->exit != NULL) 111 pp->exit(pp); 112 return 0; 113 } 114 } 115 116 return -ESRCH; 117 } 118 119 /* 120 * unregister an ipvs protocols netns data 121 */ 122 static int 123 unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) 124 { 125 struct ip_vs_proto_data **pd_p; 126 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); 127 128 pd_p = &ipvs->proto_data_table[hash]; 129 for (; *pd_p; pd_p = &(*pd_p)->next) { 130 if (*pd_p == pd) { 131 *pd_p = pd->next; 132 if (pd->pp->exit_netns != NULL) 133 pd->pp->exit_netns(ipvs, pd); 134 kfree(pd); 135 return 0; 136 } 137 } 138 139 return -ESRCH; 140 } 141 142 /* 143 * get ip_vs_protocol object by its proto. 144 */ 145 struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) 146 { 147 struct ip_vs_protocol *pp; 148 unsigned int hash = IP_VS_PROTO_HASH(proto); 149 150 for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) { 151 if (pp->protocol == proto) 152 return pp; 153 } 154 155 return NULL; 156 } 157 EXPORT_SYMBOL(ip_vs_proto_get); 158 159 /* 160 * get ip_vs_protocol object data by netns and proto 161 */ 162 struct ip_vs_proto_data * 163 ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 164 { 165 struct ip_vs_proto_data *pd; 166 unsigned int hash = IP_VS_PROTO_HASH(proto); 167 168 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { 169 if (pd->pp->protocol == proto) 170 return pd; 171 } 172 173 return NULL; 174 } 175 EXPORT_SYMBOL(ip_vs_proto_data_get); 176 177 /* 178 * Propagate event for state change to all protocols 179 */ 180 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) 181 { 182 struct ip_vs_proto_data *pd; 183 int i; 184 185 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 186 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { 187 if (pd->pp->timeout_change) 188 pd->pp->timeout_change(pd, flags); 189 } 190 } 191 } 192 193 194 int * 195 ip_vs_create_timeout_table(int *table, int size) 196 { 197 return kmemdup(table, size, GFP_KERNEL); 198 } 199 200 201 const char *ip_vs_state_name(const struct ip_vs_conn *cp) 202 { 203 unsigned int state = cp->state; 204 struct ip_vs_protocol *pp; 205 206 if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 207 208 if (state >= IP_VS_CTPL_S_LAST) 209 return "ERR!"; 210 return ip_vs_ctpl_state_name_table[state] ? : "?"; 211 } 212 pp = ip_vs_proto_get(cp->protocol); 213 if (pp == NULL || pp->state_name == NULL) 214 return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!"; 215 return pp->state_name(state); 216 } 217 218 219 static void 220 ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, 221 const struct sk_buff *skb, 222 int offset, 223 const char *msg) 224 { 225 char buf[128]; 226 struct iphdr _iph, *ih; 227 228 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 229 if (ih == NULL) 230 sprintf(buf, "TRUNCATED"); 231 else if (ih->frag_off & htons(IP_OFFSET)) 232 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 233 else { 234 __be16 _ports[2], *pptr; 235 236 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 237 sizeof(_ports), _ports); 238 if (pptr == NULL) 239 sprintf(buf, "TRUNCATED %pI4->%pI4", 240 &ih->saddr, &ih->daddr); 241 else 242 sprintf(buf, "%pI4:%u->%pI4:%u", 243 &ih->saddr, ntohs(pptr[0]), 244 &ih->daddr, ntohs(pptr[1])); 245 } 246 247 pr_debug("%s: %s %s\n", msg, pp->name, buf); 248 } 249 250 #ifdef CONFIG_IP_VS_IPV6 251 static void 252 ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, 253 const struct sk_buff *skb, 254 int offset, 255 const char *msg) 256 { 257 char buf[192]; 258 struct ipv6hdr _iph, *ih; 259 260 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 261 if (ih == NULL) 262 sprintf(buf, "TRUNCATED"); 263 else if (ih->nexthdr == IPPROTO_FRAGMENT) 264 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr); 265 else { 266 __be16 _ports[2], *pptr; 267 268 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 269 sizeof(_ports), _ports); 270 if (pptr == NULL) 271 sprintf(buf, "TRUNCATED %pI6c->%pI6c", 272 &ih->saddr, &ih->daddr); 273 else 274 sprintf(buf, "%pI6c:%u->%pI6c:%u", 275 &ih->saddr, ntohs(pptr[0]), 276 &ih->daddr, ntohs(pptr[1])); 277 } 278 279 pr_debug("%s: %s %s\n", msg, pp->name, buf); 280 } 281 #endif 282 283 284 void 285 ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, 286 const struct sk_buff *skb, 287 int offset, 288 const char *msg) 289 { 290 #ifdef CONFIG_IP_VS_IPV6 291 if (af == AF_INET6) 292 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 293 else 294 #endif 295 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 296 } 297 298 /* 299 * per network name-space init 300 */ 301 int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) 302 { 303 int i, ret; 304 static struct ip_vs_protocol *protos[] = { 305 #ifdef CONFIG_IP_VS_PROTO_TCP 306 &ip_vs_protocol_tcp, 307 #endif 308 #ifdef CONFIG_IP_VS_PROTO_UDP 309 &ip_vs_protocol_udp, 310 #endif 311 #ifdef CONFIG_IP_VS_PROTO_SCTP 312 &ip_vs_protocol_sctp, 313 #endif 314 #ifdef CONFIG_IP_VS_PROTO_AH 315 &ip_vs_protocol_ah, 316 #endif 317 #ifdef CONFIG_IP_VS_PROTO_ESP 318 &ip_vs_protocol_esp, 319 #endif 320 }; 321 322 for (i = 0; i < ARRAY_SIZE(protos); i++) { 323 ret = register_ip_vs_proto_netns(ipvs, protos[i]); 324 if (ret < 0) 325 goto cleanup; 326 } 327 return 0; 328 329 cleanup: 330 ip_vs_protocol_net_cleanup(ipvs); 331 return ret; 332 } 333 334 void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) 335 { 336 struct ip_vs_proto_data *pd; 337 int i; 338 339 /* unregister all the ipvs proto data for this netns */ 340 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 341 while ((pd = ipvs->proto_data_table[i]) != NULL) 342 unregister_ip_vs_proto_netns(ipvs, pd); 343 } 344 } 345 346 int __init ip_vs_protocol_init(void) 347 { 348 char protocols[64]; 349 #define REGISTER_PROTOCOL(p) \ 350 do { \ 351 register_ip_vs_protocol(p); \ 352 strcat(protocols, ", "); \ 353 strcat(protocols, (p)->name); \ 354 } while (0) 355 356 protocols[0] = '\0'; 357 protocols[2] = '\0'; 358 #ifdef CONFIG_IP_VS_PROTO_TCP 359 REGISTER_PROTOCOL(&ip_vs_protocol_tcp); 360 #endif 361 #ifdef CONFIG_IP_VS_PROTO_UDP 362 REGISTER_PROTOCOL(&ip_vs_protocol_udp); 363 #endif 364 #ifdef CONFIG_IP_VS_PROTO_SCTP 365 REGISTER_PROTOCOL(&ip_vs_protocol_sctp); 366 #endif 367 #ifdef CONFIG_IP_VS_PROTO_AH 368 REGISTER_PROTOCOL(&ip_vs_protocol_ah); 369 #endif 370 #ifdef CONFIG_IP_VS_PROTO_ESP 371 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 372 #endif 373 pr_info("Registered protocols (%s)\n", &protocols[2]); 374 375 return 0; 376 } 377 378 379 void ip_vs_protocol_cleanup(void) 380 { 381 struct ip_vs_protocol *pp; 382 int i; 383 384 /* unregister all the ipvs protocols */ 385 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 386 while ((pp = ip_vs_proto_table[i]) != NULL) 387 unregister_ip_vs_protocol(pp); 388 } 389 } 390