1 /* 2 * To speed up listener socket lookup, create an array to store all sockets 3 * listening on the same port. This allows a decision to be made after finding 4 * the first socket. 5 */ 6 7 #include <net/sock_reuseport.h> 8 #include <linux/rcupdate.h> 9 10 #define INIT_SOCKS 128 11 12 static DEFINE_SPINLOCK(reuseport_lock); 13 14 static struct sock_reuseport *__reuseport_alloc(u16 max_socks) 15 { 16 size_t size = sizeof(struct sock_reuseport) + 17 sizeof(struct sock *) * max_socks; 18 struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); 19 20 if (!reuse) 21 return NULL; 22 23 reuse->max_socks = max_socks; 24 25 return reuse; 26 } 27 28 int reuseport_alloc(struct sock *sk) 29 { 30 struct sock_reuseport *reuse; 31 32 /* bh lock used since this function call may precede hlist lock in 33 * soft irq of receive path or setsockopt from process context 34 */ 35 spin_lock_bh(&reuseport_lock); 36 WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, 37 lockdep_is_held(&reuseport_lock)), 38 "multiple allocations for the same socket"); 39 reuse = __reuseport_alloc(INIT_SOCKS); 40 if (!reuse) { 41 spin_unlock_bh(&reuseport_lock); 42 return -ENOMEM; 43 } 44 45 reuse->socks[0] = sk; 46 reuse->num_socks = 1; 47 rcu_assign_pointer(sk->sk_reuseport_cb, reuse); 48 49 spin_unlock_bh(&reuseport_lock); 50 51 return 0; 52 } 53 EXPORT_SYMBOL(reuseport_alloc); 54 55 static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) 56 { 57 struct sock_reuseport *more_reuse; 58 u32 more_socks_size, i; 59 60 more_socks_size = reuse->max_socks * 2U; 61 if (more_socks_size > U16_MAX) 62 return NULL; 63 64 more_reuse = __reuseport_alloc(more_socks_size); 65 if (!more_reuse) 66 return NULL; 67 68 more_reuse->max_socks = more_socks_size; 69 more_reuse->num_socks = reuse->num_socks; 70 71 memcpy(more_reuse->socks, reuse->socks, 72 reuse->num_socks * sizeof(struct sock *)); 73 74 for (i = 0; i < reuse->num_socks; ++i) 75 rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, 76 more_reuse); 77 78 kfree_rcu(reuse, rcu); 79 return more_reuse; 80 } 81 82 /** 83 * reuseport_add_sock - Add a socket to the reuseport group of another. 84 * @sk: New socket to add to the group. 85 * @sk2: Socket belonging to the existing reuseport group. 86 * May return ENOMEM and not add socket to group under memory pressure. 87 */ 88 int reuseport_add_sock(struct sock *sk, const struct sock *sk2) 89 { 90 struct sock_reuseport *reuse; 91 92 spin_lock_bh(&reuseport_lock); 93 reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, 94 lockdep_is_held(&reuseport_lock)), 95 WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, 96 lockdep_is_held(&reuseport_lock)), 97 "socket already in reuseport group"); 98 99 if (reuse->num_socks == reuse->max_socks) { 100 reuse = reuseport_grow(reuse); 101 if (!reuse) { 102 spin_unlock_bh(&reuseport_lock); 103 return -ENOMEM; 104 } 105 } 106 107 reuse->socks[reuse->num_socks] = sk; 108 /* paired with smp_rmb() in reuseport_select_sock() */ 109 smp_wmb(); 110 reuse->num_socks++; 111 rcu_assign_pointer(sk->sk_reuseport_cb, reuse); 112 113 spin_unlock_bh(&reuseport_lock); 114 115 return 0; 116 } 117 EXPORT_SYMBOL(reuseport_add_sock); 118 119 void reuseport_detach_sock(struct sock *sk) 120 { 121 struct sock_reuseport *reuse; 122 int i; 123 124 spin_lock_bh(&reuseport_lock); 125 reuse = rcu_dereference_protected(sk->sk_reuseport_cb, 126 lockdep_is_held(&reuseport_lock)); 127 rcu_assign_pointer(sk->sk_reuseport_cb, NULL); 128 129 for (i = 0; i < reuse->num_socks; i++) { 130 if (reuse->socks[i] == sk) { 131 reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; 132 reuse->num_socks--; 133 if (reuse->num_socks == 0) 134 kfree_rcu(reuse, rcu); 135 break; 136 } 137 } 138 spin_unlock_bh(&reuseport_lock); 139 } 140 EXPORT_SYMBOL(reuseport_detach_sock); 141 142 /** 143 * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. 144 * @sk: First socket in the group. 145 * @hash: Use this hash to select. 146 * Returns a socket that should receive the packet (or NULL on error). 147 */ 148 struct sock *reuseport_select_sock(struct sock *sk, u32 hash) 149 { 150 struct sock_reuseport *reuse; 151 struct sock *sk2 = NULL; 152 u16 socks; 153 154 rcu_read_lock(); 155 reuse = rcu_dereference(sk->sk_reuseport_cb); 156 157 /* if memory allocation failed or add call is not yet complete */ 158 if (!reuse) 159 goto out; 160 161 socks = READ_ONCE(reuse->num_socks); 162 if (likely(socks)) { 163 /* paired with smp_wmb() in reuseport_add_sock() */ 164 smp_rmb(); 165 166 sk2 = reuse->socks[reciprocal_scale(hash, socks)]; 167 } 168 169 out: 170 rcu_read_unlock(); 171 return sk2; 172 } 173 EXPORT_SYMBOL(reuseport_select_sock); 174