1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPVS: Destination Hashing scheduling module 4 * 5 * Authors: Wensong Zhang <wensong@gnuchina.org> 6 * 7 * Inspired by the consistent hashing scheduler patch from 8 * Thomas Proell <proellt@gmx.de> 9 * 10 * Changes: 11 */ 12 13 /* 14 * The dh algorithm is to select server by the hash key of destination IP 15 * address. The pseudo code is as follows: 16 * 17 * n <- servernode[dest_ip]; 18 * if (n is dead) OR 19 * (n is overloaded) OR (n.weight <= 0) then 20 * return NULL; 21 * 22 * return n; 23 * 24 * Notes that servernode is a 256-bucket hash table that maps the hash 25 * index derived from packet destination IP address to the current server 26 * array. If the dh scheduler is used in cache cluster, it is good to 27 * combine it with cache_bypass feature. When the statically assigned 28 * server is dead or overloaded, the load balancer can bypass the cache 29 * server and send requests to the original server directly. 30 * 31 */ 32 33 #define KMSG_COMPONENT "IPVS" 34 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 35 36 #include <linux/ip.h> 37 #include <linux/slab.h> 38 #include <linux/module.h> 39 #include <linux/kernel.h> 40 #include <linux/skbuff.h> 41 #include <linux/hash.h> 42 43 #include <net/ip_vs.h> 44 45 46 /* 47 * IPVS DH bucket 48 */ 49 struct ip_vs_dh_bucket { 50 struct ip_vs_dest __rcu *dest; /* real server (cache) */ 51 }; 52 53 /* 54 * for IPVS DH entry hash table 55 */ 56 #ifndef CONFIG_IP_VS_DH_TAB_BITS 57 #define CONFIG_IP_VS_DH_TAB_BITS 8 58 #endif 59 #define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS 60 #define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) 61 #define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) 62 63 struct ip_vs_dh_state { 64 struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE]; 65 struct rcu_head rcu_head; 66 }; 67 68 /* 69 * Returns hash value for IPVS DH entry 70 */ 71 static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr) 72 { 73 __be32 addr_fold = addr->ip; 74 75 #ifdef CONFIG_IP_VS_IPV6 76 if (af == AF_INET6) 77 addr_fold = addr->ip6[0]^addr->ip6[1]^ 78 addr->ip6[2]^addr->ip6[3]; 79 #endif 80 return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS); 81 } 82 83 84 /* 85 * Get ip_vs_dest associated with supplied parameters. 86 */ 87 static inline struct ip_vs_dest * 88 ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr) 89 { 90 return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest); 91 } 92 93 94 /* 95 * Assign all the hash buckets of the specified table with the service. 96 */ 97 static int 98 ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc) 99 { 100 int i; 101 struct ip_vs_dh_bucket *b; 102 struct list_head *p; 103 struct ip_vs_dest *dest; 104 bool empty; 105 106 b = &s->buckets[0]; 107 p = &svc->destinations; 108 empty = list_empty(p); 109 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 110 dest = rcu_dereference_protected(b->dest, 1); 111 if (dest) 112 ip_vs_dest_put(dest); 113 if (empty) 114 RCU_INIT_POINTER(b->dest, NULL); 115 else { 116 if (p == &svc->destinations) 117 p = p->next; 118 119 dest = list_entry(p, struct ip_vs_dest, n_list); 120 ip_vs_dest_hold(dest); 121 RCU_INIT_POINTER(b->dest, dest); 122 123 p = p->next; 124 } 125 b++; 126 } 127 return 0; 128 } 129 130 131 /* 132 * Flush all the hash buckets of the specified table. 133 */ 134 static void ip_vs_dh_flush(struct ip_vs_dh_state *s) 135 { 136 int i; 137 struct ip_vs_dh_bucket *b; 138 struct ip_vs_dest *dest; 139 140 b = &s->buckets[0]; 141 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 142 dest = rcu_dereference_protected(b->dest, 1); 143 if (dest) { 144 ip_vs_dest_put(dest); 145 RCU_INIT_POINTER(b->dest, NULL); 146 } 147 b++; 148 } 149 } 150 151 152 static int ip_vs_dh_init_svc(struct ip_vs_service *svc) 153 { 154 struct ip_vs_dh_state *s; 155 156 /* allocate the DH table for this service */ 157 s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL); 158 if (s == NULL) 159 return -ENOMEM; 160 161 svc->sched_data = s; 162 IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for " 163 "current service\n", 164 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 165 166 /* assign the hash buckets with current dests */ 167 ip_vs_dh_reassign(s, svc); 168 169 return 0; 170 } 171 172 173 static void ip_vs_dh_done_svc(struct ip_vs_service *svc) 174 { 175 struct ip_vs_dh_state *s = svc->sched_data; 176 177 /* got to clean up hash buckets here */ 178 ip_vs_dh_flush(s); 179 180 /* release the table itself */ 181 kfree_rcu(s, rcu_head); 182 IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n", 183 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 184 } 185 186 187 static int ip_vs_dh_dest_changed(struct ip_vs_service *svc, 188 struct ip_vs_dest *dest) 189 { 190 struct ip_vs_dh_state *s = svc->sched_data; 191 192 /* assign the hash buckets with the updated service */ 193 ip_vs_dh_reassign(s, svc); 194 195 return 0; 196 } 197 198 199 /* 200 * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, 201 * consider that the server is overloaded here. 202 */ 203 static inline int is_overloaded(struct ip_vs_dest *dest) 204 { 205 return dest->flags & IP_VS_DEST_F_OVERLOAD; 206 } 207 208 209 /* 210 * Destination hashing scheduling 211 */ 212 static struct ip_vs_dest * 213 ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, 214 struct ip_vs_iphdr *iph) 215 { 216 struct ip_vs_dest *dest; 217 struct ip_vs_dh_state *s; 218 219 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 220 221 s = (struct ip_vs_dh_state *) svc->sched_data; 222 dest = ip_vs_dh_get(svc->af, s, &iph->daddr); 223 if (!dest 224 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 225 || atomic_read(&dest->weight) <= 0 226 || is_overloaded(dest)) { 227 ip_vs_scheduler_err(svc, "no destination available"); 228 return NULL; 229 } 230 231 IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", 232 IP_VS_DBG_ADDR(svc->af, &iph->daddr), 233 IP_VS_DBG_ADDR(dest->af, &dest->addr), 234 ntohs(dest->port)); 235 236 return dest; 237 } 238 239 240 /* 241 * IPVS DH Scheduler structure 242 */ 243 static struct ip_vs_scheduler ip_vs_dh_scheduler = 244 { 245 .name = "dh", 246 .refcnt = ATOMIC_INIT(0), 247 .module = THIS_MODULE, 248 .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), 249 .init_service = ip_vs_dh_init_svc, 250 .done_service = ip_vs_dh_done_svc, 251 .add_dest = ip_vs_dh_dest_changed, 252 .del_dest = ip_vs_dh_dest_changed, 253 .schedule = ip_vs_dh_schedule, 254 }; 255 256 257 static int __init ip_vs_dh_init(void) 258 { 259 return register_ip_vs_scheduler(&ip_vs_dh_scheduler); 260 } 261 262 263 static void __exit ip_vs_dh_cleanup(void) 264 { 265 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); 266 synchronize_rcu(); 267 } 268 269 270 module_init(ip_vs_dh_init); 271 module_exit(ip_vs_dh_cleanup); 272 MODULE_LICENSE("GPL"); 273