1b2cbae2cSRoland Dreier /*
28bc67414SLeon Romanovsky * Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved.
3b2cbae2cSRoland Dreier * Copyright (c) 2010 Voltaire Inc. All rights reserved.
4b2cbae2cSRoland Dreier *
5b2cbae2cSRoland Dreier * This software is available to you under a choice of one of two
6b2cbae2cSRoland Dreier * licenses. You may choose to be licensed under the terms of the GNU
7b2cbae2cSRoland Dreier * General Public License (GPL) Version 2, available from the file
8b2cbae2cSRoland Dreier * COPYING in the main directory of this source tree, or the
9b2cbae2cSRoland Dreier * OpenIB.org BSD license below:
10b2cbae2cSRoland Dreier *
11b2cbae2cSRoland Dreier * Redistribution and use in source and binary forms, with or
12b2cbae2cSRoland Dreier * without modification, are permitted provided that the following
13b2cbae2cSRoland Dreier * conditions are met:
14b2cbae2cSRoland Dreier *
15b2cbae2cSRoland Dreier * - Redistributions of source code must retain the above
16b2cbae2cSRoland Dreier * copyright notice, this list of conditions and the following
17b2cbae2cSRoland Dreier * disclaimer.
18b2cbae2cSRoland Dreier *
19b2cbae2cSRoland Dreier * - Redistributions in binary form must reproduce the above
20b2cbae2cSRoland Dreier * copyright notice, this list of conditions and the following
21b2cbae2cSRoland Dreier * disclaimer in the documentation and/or other materials
22b2cbae2cSRoland Dreier * provided with the distribution.
23b2cbae2cSRoland Dreier *
24b2cbae2cSRoland Dreier * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25b2cbae2cSRoland Dreier * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26b2cbae2cSRoland Dreier * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27b2cbae2cSRoland Dreier * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28b2cbae2cSRoland Dreier * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29b2cbae2cSRoland Dreier * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30b2cbae2cSRoland Dreier * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31b2cbae2cSRoland Dreier * SOFTWARE.
32b2cbae2cSRoland Dreier */
33b2cbae2cSRoland Dreier
34b2cbae2cSRoland Dreier #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
35b2cbae2cSRoland Dreier
36b108d976SPaul Gortmaker #include <linux/export.h>
37b2cbae2cSRoland Dreier #include <net/netlink.h>
38b2cbae2cSRoland Dreier #include <net/net_namespace.h>
391d2fedd8SParav Pandit #include <net/netns/generic.h>
40b2cbae2cSRoland Dreier #include <net/sock.h>
41b2cbae2cSRoland Dreier #include <rdma/rdma_netlink.h>
421eb5be0eSJason Gunthorpe #include <linux/module.h>
43233c1955SLeon Romanovsky #include "core_priv.h"
44b2cbae2cSRoland Dreier
45c9901724SLeon Romanovsky static struct {
463250b4dbSLeon Romanovsky const struct rdma_nl_cbs *cb_table;
47549af008SParav Pandit /* Synchronizes between ongoing netlink commands and netlink client
48549af008SParav Pandit * unregistration.
49549af008SParav Pandit */
50549af008SParav Pandit struct rw_semaphore sem;
51c9901724SLeon Romanovsky } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
52b2cbae2cSRoland Dreier
rdma_nl_chk_listeners(unsigned int group)5338716732SLeon Romanovsky bool rdma_nl_chk_listeners(unsigned int group)
54bc10ed7dSKaike Wan {
551d2fedd8SParav Pandit struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net);
561d2fedd8SParav Pandit
571d2fedd8SParav Pandit return netlink_has_listeners(rnet->nl_sock, group);
58bc10ed7dSKaike Wan }
59ff61c425SLeon Romanovsky EXPORT_SYMBOL(rdma_nl_chk_listeners);
60bc10ed7dSKaike Wan
is_nl_msg_valid(unsigned int type,unsigned int op)61c9901724SLeon Romanovsky static bool is_nl_msg_valid(unsigned int type, unsigned int op)
62c9901724SLeon Romanovsky {
638b2c7e7aSLeon Romanovsky static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = {
64015a9e66SLinus Torvalds [RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS,
65015a9e66SLinus Torvalds [RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS,
66015a9e66SLinus Torvalds [RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS,
67015a9e66SLinus Torvalds };
68c9901724SLeon Romanovsky
69c9901724SLeon Romanovsky /*
70c9901724SLeon Romanovsky * This BUILD_BUG_ON is intended to catch addition of new
71c9901724SLeon Romanovsky * RDMA netlink protocol without updating the array above.
72c9901724SLeon Romanovsky */
73c9901724SLeon Romanovsky BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6);
74c9901724SLeon Romanovsky
758b2c7e7aSLeon Romanovsky if (type >= RDMA_NL_NUM_CLIENTS)
76c9901724SLeon Romanovsky return false;
77c9901724SLeon Romanovsky
78*272bba19SRuan Jinjie return op < max_num_ops[type];
79c9901724SLeon Romanovsky }
80c9901724SLeon Romanovsky
81549af008SParav Pandit static const struct rdma_nl_cbs *
get_cb_table(const struct sk_buff * skb,unsigned int type,unsigned int op)82549af008SParav Pandit get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
83c9901724SLeon Romanovsky {
841830ba21SLeon Romanovsky const struct rdma_nl_cbs *cb_table;
851830ba21SLeon Romanovsky
861d2fedd8SParav Pandit /*
871d2fedd8SParav Pandit * Currently only NLDEV client is supporting netlink commands in
881d2fedd8SParav Pandit * non init_net net namespace.
891d2fedd8SParav Pandit */
901d2fedd8SParav Pandit if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
91549af008SParav Pandit return NULL;
921d2fedd8SParav Pandit
93549af008SParav Pandit cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
94549af008SParav Pandit if (!cb_table) {
95549af008SParav Pandit /*
96549af008SParav Pandit * Didn't get valid reference of the table, attempt module
97549af008SParav Pandit * load once.
98549af008SParav Pandit */
99549af008SParav Pandit up_read(&rdma_nl_types[type].sem);
100549af008SParav Pandit
1013cea7b4aSWenpeng Liang request_module("rdma-netlink-subsys-%u", type);
102549af008SParav Pandit
103549af008SParav Pandit down_read(&rdma_nl_types[type].sem);
104549af008SParav Pandit cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
105e3bf14bdSJason Gunthorpe }
1061830ba21SLeon Romanovsky if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
107549af008SParav Pandit return NULL;
108549af008SParav Pandit return cb_table;
109c9901724SLeon Romanovsky }
110c9901724SLeon Romanovsky
rdma_nl_register(unsigned int index,const struct rdma_nl_cbs cb_table[])111c9901724SLeon Romanovsky void rdma_nl_register(unsigned int index,
1123250b4dbSLeon Romanovsky const struct rdma_nl_cbs cb_table[])
113b2cbae2cSRoland Dreier {
114549af008SParav Pandit if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
115549af008SParav Pandit WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
116c9901724SLeon Romanovsky return;
117b2cbae2cSRoland Dreier
118549af008SParav Pandit /* Pairs with the READ_ONCE in is_nl_valid() */
119549af008SParav Pandit smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
120c9901724SLeon Romanovsky }
121c9901724SLeon Romanovsky EXPORT_SYMBOL(rdma_nl_register);
122c9901724SLeon Romanovsky
rdma_nl_unregister(unsigned int index)123c9901724SLeon Romanovsky void rdma_nl_unregister(unsigned int index)
124b2cbae2cSRoland Dreier {
125549af008SParav Pandit down_write(&rdma_nl_types[index].sem);
126c9901724SLeon Romanovsky rdma_nl_types[index].cb_table = NULL;
127549af008SParav Pandit up_write(&rdma_nl_types[index].sem);
128b2cbae2cSRoland Dreier }
129c9901724SLeon Romanovsky EXPORT_SYMBOL(rdma_nl_unregister);
130b2cbae2cSRoland Dreier
ibnl_put_msg(struct sk_buff * skb,struct nlmsghdr ** nlh,int seq,int len,int client,int op,int flags)131b2cbae2cSRoland Dreier void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
13230dc5e63STatyana Nikolova int len, int client, int op, int flags)
133b2cbae2cSRoland Dreier {
1341a1c116fSLeon Romanovsky *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), len, flags);
135e0527334SDavid S. Miller if (!*nlh)
136b2cbae2cSRoland Dreier return NULL;
1371a1c116fSLeon Romanovsky return nlmsg_data(*nlh);
138b2cbae2cSRoland Dreier }
139b2cbae2cSRoland Dreier EXPORT_SYMBOL(ibnl_put_msg);
140b2cbae2cSRoland Dreier
ibnl_put_attr(struct sk_buff * skb,struct nlmsghdr * nlh,int len,void * data,int type)141b2cbae2cSRoland Dreier int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
142b2cbae2cSRoland Dreier int len, void *data, int type)
143b2cbae2cSRoland Dreier {
1441a1c116fSLeon Romanovsky if (nla_put(skb, type, len, data)) {
1451a1c116fSLeon Romanovsky nlmsg_cancel(skb, nlh);
146b2cbae2cSRoland Dreier return -EMSGSIZE;
147b2cbae2cSRoland Dreier }
1481a1c116fSLeon Romanovsky return 0;
1491a1c116fSLeon Romanovsky }
150b2cbae2cSRoland Dreier EXPORT_SYMBOL(ibnl_put_attr);
151b2cbae2cSRoland Dreier
rdma_nl_rcv_msg(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1523c3e75d5SLeon Romanovsky static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
1532d4bc933SJohannes Berg struct netlink_ext_ack *extack)
154b2cbae2cSRoland Dreier {
155b2cbae2cSRoland Dreier int type = nlh->nlmsg_type;
156c9901724SLeon Romanovsky unsigned int index = RDMA_NL_GET_CLIENT(type);
1571ae5ccc7SMark Bloch unsigned int op = RDMA_NL_GET_OP(type);
158c729943aSLeon Romanovsky const struct rdma_nl_cbs *cb_table;
159549af008SParav Pandit int err = -EINVAL;
160b2cbae2cSRoland Dreier
161549af008SParav Pandit if (!is_nl_msg_valid(index, op))
162b2cbae2cSRoland Dreier return -EINVAL;
16380d326faSPablo Neira Ayuso
164549af008SParav Pandit down_read(&rdma_nl_types[index].sem);
165549af008SParav Pandit cb_table = get_cb_table(skb, index, op);
166549af008SParav Pandit if (!cb_table)
167549af008SParav Pandit goto done;
168c729943aSLeon Romanovsky
169c729943aSLeon Romanovsky if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
170549af008SParav Pandit !netlink_capable(skb, CAP_NET_ADMIN)) {
171549af008SParav Pandit err = -EPERM;
172549af008SParav Pandit goto done;
173549af008SParav Pandit }
174e3a2b93dSLeon Romanovsky
175b4d91aebSMichael J. Ruhl /*
176b4d91aebSMichael J. Ruhl * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
177b4d91aebSMichael J. Ruhl * mistakenly call the .dump() function.
178b4d91aebSMichael J. Ruhl */
179b4d91aebSMichael J. Ruhl if (index == RDMA_NL_LS) {
180b4d91aebSMichael J. Ruhl if (cb_table[op].doit)
181549af008SParav Pandit err = cb_table[op].doit(skb, nlh, extack);
182549af008SParav Pandit goto done;
183b4d91aebSMichael J. Ruhl }
184647c75acSLeon Romanovsky /* FIXME: Convert IWCM to properly handle doit callbacks */
185a78e8723SLeon Romanovsky if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
186647c75acSLeon Romanovsky struct netlink_dump_control c = {
187647c75acSLeon Romanovsky .dump = cb_table[op].dump,
188647c75acSLeon Romanovsky };
189b4d91aebSMichael J. Ruhl if (c.dump)
190549af008SParav Pandit err = netlink_dump_start(skb->sk, skb, nlh, &c);
191549af008SParav Pandit goto done;
19280d326faSPablo Neira Ayuso }
1931830ba21SLeon Romanovsky
194647c75acSLeon Romanovsky if (cb_table[op].doit)
195549af008SParav Pandit err = cb_table[op].doit(skb, nlh, extack);
196549af008SParav Pandit done:
197549af008SParav Pandit up_read(&rdma_nl_types[index].sem);
198549af008SParav Pandit return err;
1991830ba21SLeon Romanovsky }
200b2cbae2cSRoland Dreier
2013c3e75d5SLeon Romanovsky /*
2023c3e75d5SLeon Romanovsky * This function is similar to netlink_rcv_skb with one exception:
2033c3e75d5SLeon Romanovsky * It calls to the callback for the netlink messages without NLM_F_REQUEST
2043c3e75d5SLeon Romanovsky * flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed
2053c3e75d5SLeon Romanovsky * for that consumer only.
2063c3e75d5SLeon Romanovsky */
rdma_nl_rcv_skb(struct sk_buff * skb,int (* cb)(struct sk_buff *,struct nlmsghdr *,struct netlink_ext_ack *))2073c3e75d5SLeon Romanovsky static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2083c3e75d5SLeon Romanovsky struct nlmsghdr *,
2093c3e75d5SLeon Romanovsky struct netlink_ext_ack *))
210bc10ed7dSKaike Wan {
2113c3e75d5SLeon Romanovsky struct netlink_ext_ack extack = {};
212bc10ed7dSKaike Wan struct nlmsghdr *nlh;
2133c3e75d5SLeon Romanovsky int err;
2143c3e75d5SLeon Romanovsky
2153c3e75d5SLeon Romanovsky while (skb->len >= nlmsg_total_size(0)) {
216bc10ed7dSKaike Wan int msglen;
217bc10ed7dSKaike Wan
218bc10ed7dSKaike Wan nlh = nlmsg_hdr(skb);
2193c3e75d5SLeon Romanovsky err = 0;
220bc10ed7dSKaike Wan
221bc10ed7dSKaike Wan if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2223c3e75d5SLeon Romanovsky return 0;
223bc10ed7dSKaike Wan
2243c3e75d5SLeon Romanovsky /*
2253c3e75d5SLeon Romanovsky * Generally speaking, the only requests are handled
2263c3e75d5SLeon Romanovsky * by the kernel, but RDMA_NL_LS is different, because it
2273c3e75d5SLeon Romanovsky * runs backward netlink scheme. Kernel initiates messages
2283c3e75d5SLeon Romanovsky * and waits for reply with data to keep pathrecord cache
2293c3e75d5SLeon Romanovsky * in sync.
2303c3e75d5SLeon Romanovsky */
2313c3e75d5SLeon Romanovsky if (!(nlh->nlmsg_flags & NLM_F_REQUEST) &&
2323c3e75d5SLeon Romanovsky (RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS))
2333c3e75d5SLeon Romanovsky goto ack;
234bc10ed7dSKaike Wan
2353c3e75d5SLeon Romanovsky /* Skip control messages */
2363c3e75d5SLeon Romanovsky if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2373c3e75d5SLeon Romanovsky goto ack;
238bc10ed7dSKaike Wan
2393c3e75d5SLeon Romanovsky err = cb(skb, nlh, &extack);
2403c3e75d5SLeon Romanovsky if (err == -EINTR)
2413c3e75d5SLeon Romanovsky goto skip;
2423c3e75d5SLeon Romanovsky
2433c3e75d5SLeon Romanovsky ack:
2443c3e75d5SLeon Romanovsky if (nlh->nlmsg_flags & NLM_F_ACK || err)
2453c3e75d5SLeon Romanovsky netlink_ack(skb, nlh, err, &extack);
2463c3e75d5SLeon Romanovsky
2473c3e75d5SLeon Romanovsky skip:
248bc10ed7dSKaike Wan msglen = NLMSG_ALIGN(nlh->nlmsg_len);
249bc10ed7dSKaike Wan if (msglen > skb->len)
250bc10ed7dSKaike Wan msglen = skb->len;
251bc10ed7dSKaike Wan skb_pull(skb, msglen);
252bc10ed7dSKaike Wan }
2533c3e75d5SLeon Romanovsky
2543c3e75d5SLeon Romanovsky return 0;
255bc10ed7dSKaike Wan }
256bc10ed7dSKaike Wan
rdma_nl_rcv(struct sk_buff * skb)2573c3e75d5SLeon Romanovsky static void rdma_nl_rcv(struct sk_buff *skb)
258b2cbae2cSRoland Dreier {
2593c3e75d5SLeon Romanovsky rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
260b2cbae2cSRoland Dreier }
261b2cbae2cSRoland Dreier
rdma_nl_unicast(struct net * net,struct sk_buff * skb,u32 pid)2621d2fedd8SParav Pandit int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
26330dc5e63STatyana Nikolova {
2641d2fedd8SParav Pandit struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
265cea05eadSMustafa Ismail int err;
266cea05eadSMustafa Ismail
2671d2fedd8SParav Pandit err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT);
268cea05eadSMustafa Ismail return (err < 0) ? err : 0;
26930dc5e63STatyana Nikolova }
270f00e6463SLeon Romanovsky EXPORT_SYMBOL(rdma_nl_unicast);
27130dc5e63STatyana Nikolova
rdma_nl_unicast_wait(struct net * net,struct sk_buff * skb,__u32 pid)2721d2fedd8SParav Pandit int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid)
2739047811bSIsmail, Mustafa {
2741d2fedd8SParav Pandit struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
2759047811bSIsmail, Mustafa int err;
2769047811bSIsmail, Mustafa
2771d2fedd8SParav Pandit err = netlink_unicast(rnet->nl_sock, skb, pid, 0);
2789047811bSIsmail, Mustafa return (err < 0) ? err : 0;
2799047811bSIsmail, Mustafa }
280f00e6463SLeon Romanovsky EXPORT_SYMBOL(rdma_nl_unicast_wait);
2819047811bSIsmail, Mustafa
rdma_nl_multicast(struct net * net,struct sk_buff * skb,unsigned int group,gfp_t flags)2821d2fedd8SParav Pandit int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
2831d2fedd8SParav Pandit unsigned int group, gfp_t flags)
28430dc5e63STatyana Nikolova {
2851d2fedd8SParav Pandit struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
2861d2fedd8SParav Pandit
2871d2fedd8SParav Pandit return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags);
28830dc5e63STatyana Nikolova }
2894d7f693aSLeon Romanovsky EXPORT_SYMBOL(rdma_nl_multicast);
29030dc5e63STatyana Nikolova
rdma_nl_init(void)291549af008SParav Pandit void rdma_nl_init(void)
292549af008SParav Pandit {
293549af008SParav Pandit int idx;
294549af008SParav Pandit
295549af008SParav Pandit for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
296549af008SParav Pandit init_rwsem(&rdma_nl_types[idx].sem);
297549af008SParav Pandit }
298549af008SParav Pandit
rdma_nl_exit(void)299c9901724SLeon Romanovsky void rdma_nl_exit(void)
300b2cbae2cSRoland Dreier {
301c9901724SLeon Romanovsky int idx;
302b2cbae2cSRoland Dreier
303c9901724SLeon Romanovsky for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
3041d2fedd8SParav Pandit WARN(rdma_nl_types[idx].cb_table,
3051dc55892SColin Ian King "Netlink client %d wasn't released prior to unloading %s\n",
3061d2fedd8SParav Pandit idx, KBUILD_MODNAME);
3071d2fedd8SParav Pandit }
308b2cbae2cSRoland Dreier
rdma_nl_net_init(struct rdma_dev_net * rnet)3091d2fedd8SParav Pandit int rdma_nl_net_init(struct rdma_dev_net *rnet)
3101d2fedd8SParav Pandit {
3111d2fedd8SParav Pandit struct net *net = read_pnet(&rnet->net);
3121d2fedd8SParav Pandit struct netlink_kernel_cfg cfg = {
3131d2fedd8SParav Pandit .input = rdma_nl_rcv,
3141d2fedd8SParav Pandit };
3151d2fedd8SParav Pandit struct sock *nls;
3161d2fedd8SParav Pandit
3171d2fedd8SParav Pandit nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg);
3181d2fedd8SParav Pandit if (!nls)
3191d2fedd8SParav Pandit return -ENOMEM;
3201d2fedd8SParav Pandit
3211d2fedd8SParav Pandit nls->sk_sndtimeo = 10 * HZ;
3221d2fedd8SParav Pandit rnet->nl_sock = nls;
3231d2fedd8SParav Pandit return 0;
3241d2fedd8SParav Pandit }
3251d2fedd8SParav Pandit
rdma_nl_net_exit(struct rdma_dev_net * rnet)3261d2fedd8SParav Pandit void rdma_nl_net_exit(struct rdma_dev_net *rnet)
3271d2fedd8SParav Pandit {
3281d2fedd8SParav Pandit netlink_kernel_release(rnet->nl_sock);
329b2cbae2cSRoland Dreier }
3301eb5be0eSJason Gunthorpe
3311eb5be0eSJason Gunthorpe MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
332