1639b321bSAndy Grover /*
2c5c1a030SKa-Cheong Poon * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
3639b321bSAndy Grover *
4639b321bSAndy Grover * This software is available to you under a choice of one of two
5639b321bSAndy Grover * licenses. You may choose to be licensed under the terms of the GNU
6639b321bSAndy Grover * General Public License (GPL) Version 2, available from the file
7639b321bSAndy Grover * COPYING in the main directory of this source tree, or the
8639b321bSAndy Grover * OpenIB.org BSD license below:
9639b321bSAndy Grover *
10639b321bSAndy Grover * Redistribution and use in source and binary forms, with or
11639b321bSAndy Grover * without modification, are permitted provided that the following
12639b321bSAndy Grover * conditions are met:
13639b321bSAndy Grover *
14639b321bSAndy Grover * - Redistributions of source code must retain the above
15639b321bSAndy Grover * copyright notice, this list of conditions and the following
16639b321bSAndy Grover * disclaimer.
17639b321bSAndy Grover *
18639b321bSAndy Grover * - Redistributions in binary form must reproduce the above
19639b321bSAndy Grover * copyright notice, this list of conditions and the following
20639b321bSAndy Grover * disclaimer in the documentation and/or other materials
21639b321bSAndy Grover * provided with the distribution.
22639b321bSAndy Grover *
23639b321bSAndy Grover * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24639b321bSAndy Grover * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25639b321bSAndy Grover * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26639b321bSAndy Grover * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27639b321bSAndy Grover * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28639b321bSAndy Grover * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29639b321bSAndy Grover * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30639b321bSAndy Grover * SOFTWARE.
31639b321bSAndy Grover *
32639b321bSAndy Grover */
33639b321bSAndy Grover #include <linux/kernel.h>
34639b321bSAndy Grover #include <net/sock.h>
35639b321bSAndy Grover #include <linux/in.h>
36eee2fa6aSKa-Cheong Poon #include <linux/ipv6.h>
37639b321bSAndy Grover #include <linux/if_arp.h>
3838a4e5e6SChris Mason #include <linux/jhash.h>
39cb0a6056SManuel Zerpies #include <linux/ratelimit.h>
40639b321bSAndy Grover #include "rds.h"
41639b321bSAndy Grover
427b565434Ssantosh.shilimkar@oracle.com static struct rhashtable bind_hash_table;
437b565434Ssantosh.shilimkar@oracle.com
448209432aSBhumika Goyal static const struct rhashtable_params ht_parms = {
457b565434Ssantosh.shilimkar@oracle.com .nelem_hint = 768,
46eee2fa6aSKa-Cheong Poon .key_len = RDS_BOUND_KEY_LEN,
477b565434Ssantosh.shilimkar@oracle.com .key_offset = offsetof(struct rds_sock, rs_bound_key),
487b565434Ssantosh.shilimkar@oracle.com .head_offset = offsetof(struct rds_sock, rs_bound_node),
497b565434Ssantosh.shilimkar@oracle.com .max_size = 16384,
507b565434Ssantosh.shilimkar@oracle.com .min_size = 1024,
519b9acde7SSantosh Shilimkar };
52639b321bSAndy Grover
53eee2fa6aSKa-Cheong Poon /* Create a key for the bind hash table manipulation. Port is in network byte
54eee2fa6aSKa-Cheong Poon * order.
55eee2fa6aSKa-Cheong Poon */
__rds_create_bind_key(u8 * key,const struct in6_addr * addr,__be16 port,__u32 scope_id)56eee2fa6aSKa-Cheong Poon static inline void __rds_create_bind_key(u8 *key, const struct in6_addr *addr,
57eee2fa6aSKa-Cheong Poon __be16 port, __u32 scope_id)
58eee2fa6aSKa-Cheong Poon {
59eee2fa6aSKa-Cheong Poon memcpy(key, addr, sizeof(*addr));
60eee2fa6aSKa-Cheong Poon key += sizeof(*addr);
61eee2fa6aSKa-Cheong Poon memcpy(key, &port, sizeof(port));
62eee2fa6aSKa-Cheong Poon key += sizeof(port);
63eee2fa6aSKa-Cheong Poon memcpy(key, &scope_id, sizeof(scope_id));
64eee2fa6aSKa-Cheong Poon }
65eee2fa6aSKa-Cheong Poon
66639b321bSAndy Grover /*
67639b321bSAndy Grover * Return the rds_sock bound at the given local address.
68639b321bSAndy Grover *
69639b321bSAndy Grover * The rx path can race with rds_release. We notice if rds_release() has
70639b321bSAndy Grover * marked this socket and don't return a rs ref to the rx path.
71639b321bSAndy Grover */
rds_find_bound(const struct in6_addr * addr,__be16 port,__u32 scope_id)72eee2fa6aSKa-Cheong Poon struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
73eee2fa6aSKa-Cheong Poon __u32 scope_id)
74639b321bSAndy Grover {
75eee2fa6aSKa-Cheong Poon u8 key[RDS_BOUND_KEY_LEN];
76639b321bSAndy Grover struct rds_sock *rs;
77639b321bSAndy Grover
78eee2fa6aSKa-Cheong Poon __rds_create_bind_key(key, addr, port, scope_id);
79cc4dfb7fSCong Wang rcu_read_lock();
80cc4dfb7fSCong Wang rs = rhashtable_lookup(&bind_hash_table, key, ht_parms);
816fa19f56SEric Dumazet if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) ||
826fa19f56SEric Dumazet !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt)))
83639b321bSAndy Grover rs = NULL;
846fa19f56SEric Dumazet
85cc4dfb7fSCong Wang rcu_read_unlock();
86639b321bSAndy Grover
87eee2fa6aSKa-Cheong Poon rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
88639b321bSAndy Grover ntohs(port));
898b0a6b46SSantosh Shilimkar
90639b321bSAndy Grover return rs;
91639b321bSAndy Grover }
92639b321bSAndy Grover
93639b321bSAndy Grover /* returns -ve errno or +ve port */
rds_add_bound(struct rds_sock * rs,const struct in6_addr * addr,__be16 * port,__u32 scope_id)94eee2fa6aSKa-Cheong Poon static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
95eee2fa6aSKa-Cheong Poon __be16 *port, __u32 scope_id)
96639b321bSAndy Grover {
97639b321bSAndy Grover int ret = -EADDRINUSE;
98639b321bSAndy Grover u16 rover, last;
99eee2fa6aSKa-Cheong Poon u8 key[RDS_BOUND_KEY_LEN];
100639b321bSAndy Grover
101639b321bSAndy Grover if (*port != 0) {
102639b321bSAndy Grover rover = be16_to_cpu(*port);
1035916e2c1SSowmini Varadhan if (rover == RDS_FLAG_PROBE_PORT)
1045916e2c1SSowmini Varadhan return -EINVAL;
105639b321bSAndy Grover last = rover;
106639b321bSAndy Grover } else {
107*f743f16cSJason A. Donenfeld rover = max_t(u16, get_random_u16(), 2);
108639b321bSAndy Grover last = rover - 1;
109639b321bSAndy Grover }
110639b321bSAndy Grover
111639b321bSAndy Grover do {
112639b321bSAndy Grover if (rover == 0)
113639b321bSAndy Grover rover++;
1149b9acde7SSantosh Shilimkar
1155916e2c1SSowmini Varadhan if (rover == RDS_FLAG_PROBE_PORT)
1165916e2c1SSowmini Varadhan continue;
117eee2fa6aSKa-Cheong Poon __rds_create_bind_key(key, addr, cpu_to_be16(rover),
118eee2fa6aSKa-Cheong Poon scope_id);
119eee2fa6aSKa-Cheong Poon if (rhashtable_lookup_fast(&bind_hash_table, key, ht_parms))
1207b565434Ssantosh.shilimkar@oracle.com continue;
1217b565434Ssantosh.shilimkar@oracle.com
122eee2fa6aSKa-Cheong Poon memcpy(rs->rs_bound_key, key, sizeof(rs->rs_bound_key));
123eee2fa6aSKa-Cheong Poon rs->rs_bound_addr = *addr;
1245916e2c1SSowmini Varadhan net_get_random_once(&rs->rs_hash_initval,
1255916e2c1SSowmini Varadhan sizeof(rs->rs_hash_initval));
1267b565434Ssantosh.shilimkar@oracle.com rs->rs_bound_port = cpu_to_be16(rover);
1277b565434Ssantosh.shilimkar@oracle.com rs->rs_bound_node.next = NULL;
1287b565434Ssantosh.shilimkar@oracle.com rds_sock_addref(rs);
1297b565434Ssantosh.shilimkar@oracle.com if (!rhashtable_insert_fast(&bind_hash_table,
1307b565434Ssantosh.shilimkar@oracle.com &rs->rs_bound_node, ht_parms)) {
13138a4e5e6SChris Mason *port = rs->rs_bound_port;
1321e2b44e7SKa-Cheong Poon rs->rs_bound_scope_id = scope_id;
133639b321bSAndy Grover ret = 0;
1341e2b44e7SKa-Cheong Poon rdsdebug("rs %p binding to %pI6c:%d\n",
1351e2b44e7SKa-Cheong Poon rs, addr, (int)ntohs(*port));
136639b321bSAndy Grover break;
13728126959SSantosh Shilimkar } else {
138eee2fa6aSKa-Cheong Poon rs->rs_bound_addr = in6addr_any;
1397b565434Ssantosh.shilimkar@oracle.com rds_sock_put(rs);
1407b565434Ssantosh.shilimkar@oracle.com ret = -ENOMEM;
1417b565434Ssantosh.shilimkar@oracle.com break;
142639b321bSAndy Grover }
143639b321bSAndy Grover } while (rover++ != last);
144639b321bSAndy Grover
145639b321bSAndy Grover return ret;
146639b321bSAndy Grover }
147639b321bSAndy Grover
rds_remove_bound(struct rds_sock * rs)148639b321bSAndy Grover void rds_remove_bound(struct rds_sock *rs)
149639b321bSAndy Grover {
150639b321bSAndy Grover
151eee2fa6aSKa-Cheong Poon if (ipv6_addr_any(&rs->rs_bound_addr))
1527b565434Ssantosh.shilimkar@oracle.com return;
153639b321bSAndy Grover
154eee2fa6aSKa-Cheong Poon rdsdebug("rs %p unbinding from %pI6c:%d\n",
155639b321bSAndy Grover rs, &rs->rs_bound_addr,
156639b321bSAndy Grover ntohs(rs->rs_bound_port));
157639b321bSAndy Grover
1587b565434Ssantosh.shilimkar@oracle.com rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
159639b321bSAndy Grover rds_sock_put(rs);
160eee2fa6aSKa-Cheong Poon rs->rs_bound_addr = in6addr_any;
161639b321bSAndy Grover }
162639b321bSAndy Grover
rds_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)163639b321bSAndy Grover int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
164639b321bSAndy Grover {
165639b321bSAndy Grover struct sock *sk = sock->sk;
166639b321bSAndy Grover struct rds_sock *rs = rds_sk_to_rs(sk);
167eee2fa6aSKa-Cheong Poon struct in6_addr v6addr, *binding_addr;
168639b321bSAndy Grover struct rds_transport *trans;
169eee2fa6aSKa-Cheong Poon __u32 scope_id = 0;
170639b321bSAndy Grover int ret = 0;
171eee2fa6aSKa-Cheong Poon __be16 port;
172639b321bSAndy Grover
1731e2b44e7SKa-Cheong Poon /* We allow an RDS socket to be bound to either IPv4 or IPv6
1741e2b44e7SKa-Cheong Poon * address.
175eee2fa6aSKa-Cheong Poon */
176dd3ac9a6STetsuo Handa if (addr_len < offsetofend(struct sockaddr, sa_family))
177dd3ac9a6STetsuo Handa return -EINVAL;
1781e2b44e7SKa-Cheong Poon if (uaddr->sa_family == AF_INET) {
179eee2fa6aSKa-Cheong Poon struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
180eee2fa6aSKa-Cheong Poon
1811e2b44e7SKa-Cheong Poon if (addr_len < sizeof(struct sockaddr_in) ||
1821e2b44e7SKa-Cheong Poon sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
1831e2b44e7SKa-Cheong Poon sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
184842841ecSDave Taht ipv4_is_multicast(sin->sin_addr.s_addr))
185eee2fa6aSKa-Cheong Poon return -EINVAL;
186eee2fa6aSKa-Cheong Poon ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
187eee2fa6aSKa-Cheong Poon binding_addr = &v6addr;
188eee2fa6aSKa-Cheong Poon port = sin->sin_port;
189e65d4d96SKa-Cheong Poon #if IS_ENABLED(CONFIG_IPV6)
1901e2b44e7SKa-Cheong Poon } else if (uaddr->sa_family == AF_INET6) {
1911e2b44e7SKa-Cheong Poon struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;
192e65d4d96SKa-Cheong Poon int addr_type;
1931e2b44e7SKa-Cheong Poon
1941e2b44e7SKa-Cheong Poon if (addr_len < sizeof(struct sockaddr_in6))
1951e2b44e7SKa-Cheong Poon return -EINVAL;
1961e2b44e7SKa-Cheong Poon addr_type = ipv6_addr_type(&sin6->sin6_addr);
1971e2b44e7SKa-Cheong Poon if (!(addr_type & IPV6_ADDR_UNICAST)) {
1981e2b44e7SKa-Cheong Poon __be32 addr4;
1991e2b44e7SKa-Cheong Poon
2001e2b44e7SKa-Cheong Poon if (!(addr_type & IPV6_ADDR_MAPPED))
2011e2b44e7SKa-Cheong Poon return -EINVAL;
2021e2b44e7SKa-Cheong Poon
2031e2b44e7SKa-Cheong Poon /* It is a mapped address. Need to do some sanity
2041e2b44e7SKa-Cheong Poon * checks.
2051e2b44e7SKa-Cheong Poon */
2061e2b44e7SKa-Cheong Poon addr4 = sin6->sin6_addr.s6_addr32[3];
2071e2b44e7SKa-Cheong Poon if (addr4 == htonl(INADDR_ANY) ||
2081e2b44e7SKa-Cheong Poon addr4 == htonl(INADDR_BROADCAST) ||
209842841ecSDave Taht ipv4_is_multicast(addr4))
2101e2b44e7SKa-Cheong Poon return -EINVAL;
2111e2b44e7SKa-Cheong Poon }
2121e2b44e7SKa-Cheong Poon /* The scope ID must be specified for link local address. */
2131e2b44e7SKa-Cheong Poon if (addr_type & IPV6_ADDR_LINKLOCAL) {
2141e2b44e7SKa-Cheong Poon if (sin6->sin6_scope_id == 0)
2151e2b44e7SKa-Cheong Poon return -EINVAL;
2161e2b44e7SKa-Cheong Poon scope_id = sin6->sin6_scope_id;
2171e2b44e7SKa-Cheong Poon }
2181e2b44e7SKa-Cheong Poon binding_addr = &sin6->sin6_addr;
2191e2b44e7SKa-Cheong Poon port = sin6->sin6_port;
220e65d4d96SKa-Cheong Poon #endif
221eee2fa6aSKa-Cheong Poon } else {
222eee2fa6aSKa-Cheong Poon return -EINVAL;
223eee2fa6aSKa-Cheong Poon }
224639b321bSAndy Grover lock_sock(sk);
225639b321bSAndy Grover
226eee2fa6aSKa-Cheong Poon /* RDS socket does not allow re-binding. */
227eee2fa6aSKa-Cheong Poon if (!ipv6_addr_any(&rs->rs_bound_addr)) {
228639b321bSAndy Grover ret = -EINVAL;
229639b321bSAndy Grover goto out;
230639b321bSAndy Grover }
2311e2b44e7SKa-Cheong Poon /* Socket is connected. The binding address should have the same
2321e2b44e7SKa-Cheong Poon * scope ID as the connected address, except the case when one is
2331e2b44e7SKa-Cheong Poon * non-link local address (scope_id is 0).
2341e2b44e7SKa-Cheong Poon */
2351e2b44e7SKa-Cheong Poon if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id &&
2361e2b44e7SKa-Cheong Poon rs->rs_bound_scope_id &&
2371e2b44e7SKa-Cheong Poon scope_id != rs->rs_bound_scope_id) {
2381e2b44e7SKa-Cheong Poon ret = -EINVAL;
2391e2b44e7SKa-Cheong Poon goto out;
2401e2b44e7SKa-Cheong Poon }
241639b321bSAndy Grover
242c5c1a030SKa-Cheong Poon /* The transport can be set using SO_RDS_TRANSPORT option before the
243c5c1a030SKa-Cheong Poon * socket is bound.
244c5c1a030SKa-Cheong Poon */
245c5c1a030SKa-Cheong Poon if (rs->rs_transport) {
24648679800SSowmini Varadhan trans = rs->rs_transport;
24705733434SKa-Cheong Poon if (!trans->laddr_check ||
24805733434SKa-Cheong Poon trans->laddr_check(sock_net(sock->sk),
249eee2fa6aSKa-Cheong Poon binding_addr, scope_id) != 0) {
25048679800SSowmini Varadhan ret = -ENOPROTOOPT;
251d97dac54SSowmini Varadhan goto out;
252d97dac54SSowmini Varadhan }
253c5c1a030SKa-Cheong Poon } else {
254c5c1a030SKa-Cheong Poon trans = rds_trans_get_preferred(sock_net(sock->sk),
255c5c1a030SKa-Cheong Poon binding_addr, scope_id);
2568690bfa1SAndy Grover if (!trans) {
257639b321bSAndy Grover ret = -EADDRNOTAVAIL;
258eee2fa6aSKa-Cheong Poon pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n",
259eee2fa6aSKa-Cheong Poon __func__, binding_addr);
260639b321bSAndy Grover goto out;
261639b321bSAndy Grover }
262639b321bSAndy Grover rs->rs_transport = trans;
263c5c1a030SKa-Cheong Poon }
264c5c1a030SKa-Cheong Poon
265c5c1a030SKa-Cheong Poon sock_set_flag(sk, SOCK_RCU_FREE);
266c5c1a030SKa-Cheong Poon ret = rds_add_bound(rs, binding_addr, &port, scope_id);
26705733434SKa-Cheong Poon if (ret)
26805733434SKa-Cheong Poon rs->rs_transport = NULL;
269639b321bSAndy Grover
270639b321bSAndy Grover out:
271639b321bSAndy Grover release_sock(sk);
272639b321bSAndy Grover return ret;
273639b321bSAndy Grover }
2749b9acde7SSantosh Shilimkar
rds_bind_lock_destroy(void)2757b565434Ssantosh.shilimkar@oracle.com void rds_bind_lock_destroy(void)
2769b9acde7SSantosh Shilimkar {
2777b565434Ssantosh.shilimkar@oracle.com rhashtable_destroy(&bind_hash_table);
2787b565434Ssantosh.shilimkar@oracle.com }
2799b9acde7SSantosh Shilimkar
rds_bind_lock_init(void)2807b565434Ssantosh.shilimkar@oracle.com int rds_bind_lock_init(void)
2817b565434Ssantosh.shilimkar@oracle.com {
2827b565434Ssantosh.shilimkar@oracle.com return rhashtable_init(&bind_hash_table, &ht_parms);
2839b9acde7SSantosh Shilimkar }
284