1 /*
2  * (C) 2015 Red Hat GmbH
3  * Author: Florian Westphal <fw@strlen.de>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  */
9 
10 #include <linux/module.h>
11 #include <linux/static_key.h>
12 #include <linux/hash.h>
13 #include <linux/jhash.h>
14 #include <linux/if_vlan.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/netlink.h>
18 #include <linux/netfilter.h>
19 #include <linux/netfilter/nfnetlink.h>
20 #include <linux/netfilter/nf_tables.h>
21 #include <net/netfilter/nf_tables_core.h>
22 #include <net/netfilter/nf_tables.h>
23 
24 #define NFT_TRACETYPE_LL_HSIZE		20
25 #define NFT_TRACETYPE_NETWORK_HSIZE	40
26 #define NFT_TRACETYPE_TRANSPORT_HSIZE	20
27 
28 DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
29 EXPORT_SYMBOL_GPL(nft_trace_enabled);
30 
31 static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
32 {
33 	__be32 id;
34 
35 	/* using skb address as ID results in a limited number of
36 	 * values (and quick reuse).
37 	 *
38 	 * So we attempt to use as many skb members that will not
39 	 * change while skb is with netfilter.
40 	 */
41 	id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
42 				  skb->skb_iif);
43 
44 	return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
45 }
46 
47 static int trace_fill_header(struct sk_buff *nlskb, u16 type,
48 			     const struct sk_buff *skb,
49 			     int off, unsigned int len)
50 {
51 	struct nlattr *nla;
52 
53 	if (len == 0)
54 		return 0;
55 
56 	nla = nla_reserve(nlskb, type, len);
57 	if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
58 		return -1;
59 
60 	return 0;
61 }
62 
63 static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
64 				   const struct sk_buff *skb)
65 {
66 	struct vlan_ethhdr veth;
67 	int off;
68 
69 	BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
70 
71 	off = skb_mac_header(skb) - skb->data;
72 	if (off != -ETH_HLEN)
73 		return -1;
74 
75 	if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
76 		return -1;
77 
78 	veth.h_vlan_proto = skb->vlan_proto;
79 	veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
80 	veth.h_vlan_encapsulated_proto = skb->protocol;
81 
82 	return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
83 }
84 
85 static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
86 				  const struct net_device *indev,
87 				  const struct net_device *outdev)
88 {
89 	if (indev) {
90 		if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
91 				 htonl(indev->ifindex)))
92 			return -1;
93 
94 		if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
95 				 htons(indev->type)))
96 			return -1;
97 	}
98 
99 	if (outdev) {
100 		if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
101 				 htonl(outdev->ifindex)))
102 			return -1;
103 
104 		if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
105 				 htons(outdev->type)))
106 			return -1;
107 	}
108 
109 	return 0;
110 }
111 
112 static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
113 				  const struct nft_pktinfo *pkt)
114 {
115 	const struct sk_buff *skb = pkt->skb;
116 	unsigned int len = min_t(unsigned int,
117 				 pkt->xt.thoff - skb_network_offset(skb),
118 				 NFT_TRACETYPE_NETWORK_HSIZE);
119 	int off = skb_network_offset(skb);
120 
121 	if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
122 		return -1;
123 
124 	len = min_t(unsigned int, skb->len - pkt->xt.thoff,
125 		    NFT_TRACETYPE_TRANSPORT_HSIZE);
126 
127 	if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
128 			      pkt->xt.thoff, len))
129 		return -1;
130 
131 	if (!skb_mac_header_was_set(skb))
132 		return 0;
133 
134 	if (skb_vlan_tag_get(skb))
135 		return nf_trace_fill_ll_header(nlskb, skb);
136 
137 	off = skb_mac_header(skb) - skb->data;
138 	len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
139 	return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
140 				 skb, off, len);
141 }
142 
143 static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
144 				   const struct nft_traceinfo *info)
145 {
146 	if (!info->rule)
147 		return 0;
148 
149 	/* a continue verdict with ->type == RETURN means that this is
150 	 * an implicit return (end of chain reached).
151 	 *
152 	 * Since no rule matched, the ->rule pointer is invalid.
153 	 */
154 	if (info->type == NFT_TRACETYPE_RETURN &&
155 	    info->verdict->code == NFT_CONTINUE)
156 		return 0;
157 
158 	return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
159 			    cpu_to_be64(info->rule->handle));
160 }
161 
162 void nft_trace_notify(struct nft_traceinfo *info)
163 {
164 	const struct nft_pktinfo *pkt = info->pkt;
165 	struct nfgenmsg *nfmsg;
166 	struct nlmsghdr *nlh;
167 	struct sk_buff *skb;
168 	unsigned int size;
169 	int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
170 
171 	if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE))
172 		return;
173 
174 	size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
175 		nla_total_size(NFT_TABLE_MAXNAMELEN) +
176 		nla_total_size(NFT_CHAIN_MAXNAMELEN) +
177 		nla_total_size(sizeof(__be64)) +	/* rule handle */
178 		nla_total_size(sizeof(__be32)) +	/* trace type */
179 		nla_total_size(0) +			/* VERDICT, nested */
180 			nla_total_size(sizeof(u32)) +	/* verdict code */
181 			nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
182 		nla_total_size(sizeof(u32)) +		/* id */
183 		nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
184 		nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
185 		nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
186 		nla_total_size(sizeof(u32)) +		/* iif */
187 		nla_total_size(sizeof(__be16)) +	/* iiftype */
188 		nla_total_size(sizeof(u32)) +		/* oif */
189 		nla_total_size(sizeof(__be16)) +	/* oiftype */
190 		nla_total_size(sizeof(u32)) +		/* mark */
191 		nla_total_size(sizeof(u32)) +		/* nfproto */
192 		nla_total_size(sizeof(u32));		/* policy */
193 
194 	skb = nlmsg_new(size, GFP_ATOMIC);
195 	if (!skb)
196 		return;
197 
198 	nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
199 	if (!nlh)
200 		goto nla_put_failure;
201 
202 	nfmsg = nlmsg_data(nlh);
203 	nfmsg->nfgen_family	= info->basechain->type->family;
204 	nfmsg->version		= NFNETLINK_V0;
205 	nfmsg->res_id		= 0;
206 
207 	if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf)))
208 		goto nla_put_failure;
209 
210 	if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
211 		goto nla_put_failure;
212 
213 	if (trace_fill_id(skb, pkt->skb))
214 		goto nla_put_failure;
215 
216 	if (info->chain) {
217 		if (nla_put_string(skb, NFTA_TRACE_CHAIN,
218 				   info->chain->name))
219 			goto nla_put_failure;
220 		if (nla_put_string(skb, NFTA_TRACE_TABLE,
221 				   info->chain->table->name))
222 			goto nla_put_failure;
223 	}
224 
225 	if (nf_trace_fill_rule_info(skb, info))
226 		goto nla_put_failure;
227 
228 	switch (info->type) {
229 	case NFT_TRACETYPE_UNSPEC:
230 	case __NFT_TRACETYPE_MAX:
231 		break;
232 	case NFT_TRACETYPE_RETURN:
233 	case NFT_TRACETYPE_RULE:
234 		if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
235 			goto nla_put_failure;
236 		break;
237 	case NFT_TRACETYPE_POLICY:
238 		if (nla_put_be32(skb, NFTA_TRACE_POLICY,
239 				 info->basechain->policy))
240 			goto nla_put_failure;
241 		break;
242 	}
243 
244 	if (pkt->skb->mark &&
245 	    nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
246 		goto nla_put_failure;
247 
248 	if (!info->packet_dumped) {
249 		if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out))
250 			goto nla_put_failure;
251 
252 		if (nf_trace_fill_pkt_info(skb, pkt))
253 			goto nla_put_failure;
254 		info->packet_dumped = true;
255 	}
256 
257 	nlmsg_end(skb, nlh);
258 	nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
259 	return;
260 
261  nla_put_failure:
262 	WARN_ON_ONCE(1);
263 	kfree_skb(skb);
264 }
265 
266 void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
267 		    const struct nft_verdict *verdict,
268 		    const struct nft_chain *chain)
269 {
270 	info->basechain = nft_base_chain(chain);
271 	info->trace = true;
272 	info->packet_dumped = false;
273 	info->pkt = pkt;
274 	info->verdict = verdict;
275 }
276