xref: /openbmc/linux/net/netfilter/ipvs/ip_vs_dh.c (revision 2874c5fd)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * IPVS:        Destination Hashing scheduling module
4  *
5  * Authors:     Wensong Zhang <wensong@gnuchina.org>
6  *
7  *              Inspired by the consistent hashing scheduler patch from
8  *              Thomas Proell <proellt@gmx.de>
9  *
10  * Changes:
11  */
12 
13 /*
14  * The dh algorithm is to select server by the hash key of destination IP
15  * address. The pseudo code is as follows:
16  *
17  *       n <- servernode[dest_ip];
18  *       if (n is dead) OR
19  *          (n is overloaded) OR (n.weight <= 0) then
20  *                 return NULL;
21  *
22  *       return n;
23  *
24  * Notes that servernode is a 256-bucket hash table that maps the hash
25  * index derived from packet destination IP address to the current server
26  * array. If the dh scheduler is used in cache cluster, it is good to
27  * combine it with cache_bypass feature. When the statically assigned
28  * server is dead or overloaded, the load balancer can bypass the cache
29  * server and send requests to the original server directly.
30  *
31  */
32 
33 #define KMSG_COMPONENT "IPVS"
34 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
35 
36 #include <linux/ip.h>
37 #include <linux/slab.h>
38 #include <linux/module.h>
39 #include <linux/kernel.h>
40 #include <linux/skbuff.h>
41 #include <linux/hash.h>
42 
43 #include <net/ip_vs.h>
44 
45 
46 /*
47  *      IPVS DH bucket
48  */
49 struct ip_vs_dh_bucket {
50 	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
51 };
52 
53 /*
54  *     for IPVS DH entry hash table
55  */
56 #ifndef CONFIG_IP_VS_DH_TAB_BITS
57 #define CONFIG_IP_VS_DH_TAB_BITS        8
58 #endif
59 #define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
60 #define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
61 #define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
62 
63 struct ip_vs_dh_state {
64 	struct ip_vs_dh_bucket		buckets[IP_VS_DH_TAB_SIZE];
65 	struct rcu_head			rcu_head;
66 };
67 
68 /*
69  *	Returns hash value for IPVS DH entry
70  */
ip_vs_dh_hashkey(int af,const union nf_inet_addr * addr)71 static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
72 {
73 	__be32 addr_fold = addr->ip;
74 
75 #ifdef CONFIG_IP_VS_IPV6
76 	if (af == AF_INET6)
77 		addr_fold = addr->ip6[0]^addr->ip6[1]^
78 			    addr->ip6[2]^addr->ip6[3];
79 #endif
80 	return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
81 }
82 
83 
84 /*
85  *      Get ip_vs_dest associated with supplied parameters.
86  */
87 static inline struct ip_vs_dest *
ip_vs_dh_get(int af,struct ip_vs_dh_state * s,const union nf_inet_addr * addr)88 ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
89 {
90 	return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
91 }
92 
93 
94 /*
95  *      Assign all the hash buckets of the specified table with the service.
96  */
97 static int
ip_vs_dh_reassign(struct ip_vs_dh_state * s,struct ip_vs_service * svc)98 ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
99 {
100 	int i;
101 	struct ip_vs_dh_bucket *b;
102 	struct list_head *p;
103 	struct ip_vs_dest *dest;
104 	bool empty;
105 
106 	b = &s->buckets[0];
107 	p = &svc->destinations;
108 	empty = list_empty(p);
109 	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
110 		dest = rcu_dereference_protected(b->dest, 1);
111 		if (dest)
112 			ip_vs_dest_put(dest);
113 		if (empty)
114 			RCU_INIT_POINTER(b->dest, NULL);
115 		else {
116 			if (p == &svc->destinations)
117 				p = p->next;
118 
119 			dest = list_entry(p, struct ip_vs_dest, n_list);
120 			ip_vs_dest_hold(dest);
121 			RCU_INIT_POINTER(b->dest, dest);
122 
123 			p = p->next;
124 		}
125 		b++;
126 	}
127 	return 0;
128 }
129 
130 
131 /*
132  *      Flush all the hash buckets of the specified table.
133  */
ip_vs_dh_flush(struct ip_vs_dh_state * s)134 static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
135 {
136 	int i;
137 	struct ip_vs_dh_bucket *b;
138 	struct ip_vs_dest *dest;
139 
140 	b = &s->buckets[0];
141 	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
142 		dest = rcu_dereference_protected(b->dest, 1);
143 		if (dest) {
144 			ip_vs_dest_put(dest);
145 			RCU_INIT_POINTER(b->dest, NULL);
146 		}
147 		b++;
148 	}
149 }
150 
151 
ip_vs_dh_init_svc(struct ip_vs_service * svc)152 static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
153 {
154 	struct ip_vs_dh_state *s;
155 
156 	/* allocate the DH table for this service */
157 	s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
158 	if (s == NULL)
159 		return -ENOMEM;
160 
161 	svc->sched_data = s;
162 	IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for "
163 		  "current service\n",
164 		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
165 
166 	/* assign the hash buckets with current dests */
167 	ip_vs_dh_reassign(s, svc);
168 
169 	return 0;
170 }
171 
172 
ip_vs_dh_done_svc(struct ip_vs_service * svc)173 static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
174 {
175 	struct ip_vs_dh_state *s = svc->sched_data;
176 
177 	/* got to clean up hash buckets here */
178 	ip_vs_dh_flush(s);
179 
180 	/* release the table itself */
181 	kfree_rcu(s, rcu_head);
182 	IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n",
183 		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
184 }
185 
186 
ip_vs_dh_dest_changed(struct ip_vs_service * svc,struct ip_vs_dest * dest)187 static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
188 				 struct ip_vs_dest *dest)
189 {
190 	struct ip_vs_dh_state *s = svc->sched_data;
191 
192 	/* assign the hash buckets with the updated service */
193 	ip_vs_dh_reassign(s, svc);
194 
195 	return 0;
196 }
197 
198 
199 /*
200  *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
201  *      consider that the server is overloaded here.
202  */
is_overloaded(struct ip_vs_dest * dest)203 static inline int is_overloaded(struct ip_vs_dest *dest)
204 {
205 	return dest->flags & IP_VS_DEST_F_OVERLOAD;
206 }
207 
208 
209 /*
210  *      Destination hashing scheduling
211  */
212 static struct ip_vs_dest *
ip_vs_dh_schedule(struct ip_vs_service * svc,const struct sk_buff * skb,struct ip_vs_iphdr * iph)213 ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
214 		  struct ip_vs_iphdr *iph)
215 {
216 	struct ip_vs_dest *dest;
217 	struct ip_vs_dh_state *s;
218 
219 	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
220 
221 	s = (struct ip_vs_dh_state *) svc->sched_data;
222 	dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
223 	if (!dest
224 	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
225 	    || atomic_read(&dest->weight) <= 0
226 	    || is_overloaded(dest)) {
227 		ip_vs_scheduler_err(svc, "no destination available");
228 		return NULL;
229 	}
230 
231 	IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
232 		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
233 		      IP_VS_DBG_ADDR(dest->af, &dest->addr),
234 		      ntohs(dest->port));
235 
236 	return dest;
237 }
238 
239 
240 /*
241  *      IPVS DH Scheduler structure
242  */
243 static struct ip_vs_scheduler ip_vs_dh_scheduler =
244 {
245 	.name =			"dh",
246 	.refcnt =		ATOMIC_INIT(0),
247 	.module =		THIS_MODULE,
248 	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
249 	.init_service =		ip_vs_dh_init_svc,
250 	.done_service =		ip_vs_dh_done_svc,
251 	.add_dest =		ip_vs_dh_dest_changed,
252 	.del_dest =		ip_vs_dh_dest_changed,
253 	.schedule =		ip_vs_dh_schedule,
254 };
255 
256 
ip_vs_dh_init(void)257 static int __init ip_vs_dh_init(void)
258 {
259 	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
260 }
261 
262 
ip_vs_dh_cleanup(void)263 static void __exit ip_vs_dh_cleanup(void)
264 {
265 	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
266 	synchronize_rcu();
267 }
268 
269 
270 module_init(ip_vs_dh_init);
271 module_exit(ip_vs_dh_cleanup);
272 MODULE_LICENSE("GPL");
273