1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip_vs_proto.c: transport protocol load balancing support for IPVS 4 * 5 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 6 * Julian Anastasov <ja@ssi.bg> 7 * 8 * Changes: 9 */ 10 11 #define KMSG_COMPONENT "IPVS" 12 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 13 14 #include <linux/module.h> 15 #include <linux/kernel.h> 16 #include <linux/skbuff.h> 17 #include <linux/gfp.h> 18 #include <linux/in.h> 19 #include <linux/ip.h> 20 #include <net/protocol.h> 21 #include <net/tcp.h> 22 #include <net/udp.h> 23 #include <linux/stat.h> 24 #include <linux/proc_fs.h> 25 26 #include <net/ip_vs.h> 27 28 29 /* 30 * IPVS protocols can only be registered/unregistered when the ipvs 31 * module is loaded/unloaded, so no lock is needed in accessing the 32 * ipvs protocol table. 33 */ 34 35 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ 36 #define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1)) 37 38 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; 39 40 /* States for conn templates: NONE or words separated with ",", max 15 chars */ 41 static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = { 42 [IP_VS_CTPL_S_NONE] = "NONE", 43 [IP_VS_CTPL_S_ASSURED] = "ASSURED", 44 }; 45 46 /* 47 * register an ipvs protocol 48 */ 49 static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) 50 { 51 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 52 53 pp->next = ip_vs_proto_table[hash]; 54 ip_vs_proto_table[hash] = pp; 55 56 if (pp->init != NULL) 57 pp->init(pp); 58 59 return 0; 60 } 61 62 /* 63 * register an ipvs protocols netns related data 64 */ 65 static int 66 register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) 67 { 68 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 69 struct ip_vs_proto_data *pd = 70 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); 71 72 if (!pd) 73 return -ENOMEM; 74 75 pd->pp = pp; /* For speed issues */ 76 pd->next = ipvs->proto_data_table[hash]; 77 ipvs->proto_data_table[hash] = pd; 78 atomic_set(&pd->appcnt, 0); /* Init app counter */ 79 80 if (pp->init_netns != NULL) { 81 int ret = pp->init_netns(ipvs, pd); 82 if (ret) { 83 /* unlink an free proto data */ 84 ipvs->proto_data_table[hash] = pd->next; 85 kfree(pd); 86 return ret; 87 } 88 } 89 90 return 0; 91 } 92 93 /* 94 * unregister an ipvs protocol 95 */ 96 static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) 97 { 98 struct ip_vs_protocol **pp_p; 99 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 100 101 pp_p = &ip_vs_proto_table[hash]; 102 for (; *pp_p; pp_p = &(*pp_p)->next) { 103 if (*pp_p == pp) { 104 *pp_p = pp->next; 105 if (pp->exit != NULL) 106 pp->exit(pp); 107 return 0; 108 } 109 } 110 111 return -ESRCH; 112 } 113 114 /* 115 * unregister an ipvs protocols netns data 116 */ 117 static int 118 unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) 119 { 120 struct ip_vs_proto_data **pd_p; 121 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); 122 123 pd_p = &ipvs->proto_data_table[hash]; 124 for (; *pd_p; pd_p = &(*pd_p)->next) { 125 if (*pd_p == pd) { 126 *pd_p = pd->next; 127 if (pd->pp->exit_netns != NULL) 128 pd->pp->exit_netns(ipvs, pd); 129 kfree(pd); 130 return 0; 131 } 132 } 133 134 return -ESRCH; 135 } 136 137 /* 138 * get ip_vs_protocol object by its proto. 139 */ 140 struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) 141 { 142 struct ip_vs_protocol *pp; 143 unsigned int hash = IP_VS_PROTO_HASH(proto); 144 145 for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) { 146 if (pp->protocol == proto) 147 return pp; 148 } 149 150 return NULL; 151 } 152 EXPORT_SYMBOL(ip_vs_proto_get); 153 154 /* 155 * get ip_vs_protocol object data by netns and proto 156 */ 157 struct ip_vs_proto_data * 158 ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 159 { 160 struct ip_vs_proto_data *pd; 161 unsigned int hash = IP_VS_PROTO_HASH(proto); 162 163 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { 164 if (pd->pp->protocol == proto) 165 return pd; 166 } 167 168 return NULL; 169 } 170 EXPORT_SYMBOL(ip_vs_proto_data_get); 171 172 /* 173 * Propagate event for state change to all protocols 174 */ 175 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) 176 { 177 struct ip_vs_proto_data *pd; 178 int i; 179 180 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 181 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { 182 if (pd->pp->timeout_change) 183 pd->pp->timeout_change(pd, flags); 184 } 185 } 186 } 187 188 189 int * 190 ip_vs_create_timeout_table(int *table, int size) 191 { 192 return kmemdup(table, size, GFP_KERNEL); 193 } 194 195 196 const char *ip_vs_state_name(const struct ip_vs_conn *cp) 197 { 198 unsigned int state = cp->state; 199 struct ip_vs_protocol *pp; 200 201 if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 202 203 if (state >= IP_VS_CTPL_S_LAST) 204 return "ERR!"; 205 return ip_vs_ctpl_state_name_table[state] ? : "?"; 206 } 207 pp = ip_vs_proto_get(cp->protocol); 208 if (pp == NULL || pp->state_name == NULL) 209 return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!"; 210 return pp->state_name(state); 211 } 212 213 214 static void 215 ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, 216 const struct sk_buff *skb, 217 int offset, 218 const char *msg) 219 { 220 char buf[128]; 221 struct iphdr _iph, *ih; 222 223 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 224 if (ih == NULL) 225 sprintf(buf, "TRUNCATED"); 226 else if (ih->frag_off & htons(IP_OFFSET)) 227 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 228 else { 229 __be16 _ports[2], *pptr; 230 231 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 232 sizeof(_ports), _ports); 233 if (pptr == NULL) 234 sprintf(buf, "TRUNCATED %pI4->%pI4", 235 &ih->saddr, &ih->daddr); 236 else 237 sprintf(buf, "%pI4:%u->%pI4:%u", 238 &ih->saddr, ntohs(pptr[0]), 239 &ih->daddr, ntohs(pptr[1])); 240 } 241 242 pr_debug("%s: %s %s\n", msg, pp->name, buf); 243 } 244 245 #ifdef CONFIG_IP_VS_IPV6 246 static void 247 ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, 248 const struct sk_buff *skb, 249 int offset, 250 const char *msg) 251 { 252 char buf[192]; 253 struct ipv6hdr _iph, *ih; 254 255 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 256 if (ih == NULL) 257 sprintf(buf, "TRUNCATED"); 258 else if (ih->nexthdr == IPPROTO_FRAGMENT) 259 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr); 260 else { 261 __be16 _ports[2], *pptr; 262 263 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 264 sizeof(_ports), _ports); 265 if (pptr == NULL) 266 sprintf(buf, "TRUNCATED %pI6c->%pI6c", 267 &ih->saddr, &ih->daddr); 268 else 269 sprintf(buf, "%pI6c:%u->%pI6c:%u", 270 &ih->saddr, ntohs(pptr[0]), 271 &ih->daddr, ntohs(pptr[1])); 272 } 273 274 pr_debug("%s: %s %s\n", msg, pp->name, buf); 275 } 276 #endif 277 278 279 void 280 ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, 281 const struct sk_buff *skb, 282 int offset, 283 const char *msg) 284 { 285 #ifdef CONFIG_IP_VS_IPV6 286 if (af == AF_INET6) 287 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 288 else 289 #endif 290 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 291 } 292 293 /* 294 * per network name-space init 295 */ 296 int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) 297 { 298 int i, ret; 299 static struct ip_vs_protocol *protos[] = { 300 #ifdef CONFIG_IP_VS_PROTO_TCP 301 &ip_vs_protocol_tcp, 302 #endif 303 #ifdef CONFIG_IP_VS_PROTO_UDP 304 &ip_vs_protocol_udp, 305 #endif 306 #ifdef CONFIG_IP_VS_PROTO_SCTP 307 &ip_vs_protocol_sctp, 308 #endif 309 #ifdef CONFIG_IP_VS_PROTO_AH 310 &ip_vs_protocol_ah, 311 #endif 312 #ifdef CONFIG_IP_VS_PROTO_ESP 313 &ip_vs_protocol_esp, 314 #endif 315 }; 316 317 for (i = 0; i < ARRAY_SIZE(protos); i++) { 318 ret = register_ip_vs_proto_netns(ipvs, protos[i]); 319 if (ret < 0) 320 goto cleanup; 321 } 322 return 0; 323 324 cleanup: 325 ip_vs_protocol_net_cleanup(ipvs); 326 return ret; 327 } 328 329 void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) 330 { 331 struct ip_vs_proto_data *pd; 332 int i; 333 334 /* unregister all the ipvs proto data for this netns */ 335 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 336 while ((pd = ipvs->proto_data_table[i]) != NULL) 337 unregister_ip_vs_proto_netns(ipvs, pd); 338 } 339 } 340 341 int __init ip_vs_protocol_init(void) 342 { 343 char protocols[64]; 344 #define REGISTER_PROTOCOL(p) \ 345 do { \ 346 register_ip_vs_protocol(p); \ 347 strcat(protocols, ", "); \ 348 strcat(protocols, (p)->name); \ 349 } while (0) 350 351 protocols[0] = '\0'; 352 protocols[2] = '\0'; 353 #ifdef CONFIG_IP_VS_PROTO_TCP 354 REGISTER_PROTOCOL(&ip_vs_protocol_tcp); 355 #endif 356 #ifdef CONFIG_IP_VS_PROTO_UDP 357 REGISTER_PROTOCOL(&ip_vs_protocol_udp); 358 #endif 359 #ifdef CONFIG_IP_VS_PROTO_SCTP 360 REGISTER_PROTOCOL(&ip_vs_protocol_sctp); 361 #endif 362 #ifdef CONFIG_IP_VS_PROTO_AH 363 REGISTER_PROTOCOL(&ip_vs_protocol_ah); 364 #endif 365 #ifdef CONFIG_IP_VS_PROTO_ESP 366 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 367 #endif 368 pr_info("Registered protocols (%s)\n", &protocols[2]); 369 370 return 0; 371 } 372 373 374 void ip_vs_protocol_cleanup(void) 375 { 376 struct ip_vs_protocol *pp; 377 int i; 378 379 /* unregister all the ipvs protocols */ 380 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 381 while ((pp = ip_vs_proto_table[i]) != NULL) 382 unregister_ip_vs_protocol(pp); 383 } 384 } 385