1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * (C) 2015 Red Hat GmbH
4  * Author: Florian Westphal <fw@strlen.de>
5  */
6 
7 #include <linux/module.h>
8 #include <linux/static_key.h>
9 #include <linux/hash.h>
10 #include <linux/siphash.h>
11 #include <linux/if_vlan.h>
12 #include <linux/init.h>
13 #include <linux/skbuff.h>
14 #include <linux/netlink.h>
15 #include <linux/netfilter.h>
16 #include <linux/netfilter/nfnetlink.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables_core.h>
19 #include <net/netfilter/nf_tables.h>
20 
21 #define NFT_TRACETYPE_LL_HSIZE		20
22 #define NFT_TRACETYPE_NETWORK_HSIZE	40
23 #define NFT_TRACETYPE_TRANSPORT_HSIZE	20
24 
25 DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
26 EXPORT_SYMBOL_GPL(nft_trace_enabled);
27 
trace_fill_header(struct sk_buff * nlskb,u16 type,const struct sk_buff * skb,int off,unsigned int len)28 static int trace_fill_header(struct sk_buff *nlskb, u16 type,
29 			     const struct sk_buff *skb,
30 			     int off, unsigned int len)
31 {
32 	struct nlattr *nla;
33 
34 	if (len == 0)
35 		return 0;
36 
37 	nla = nla_reserve(nlskb, type, len);
38 	if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
39 		return -1;
40 
41 	return 0;
42 }
43 
nf_trace_fill_ll_header(struct sk_buff * nlskb,const struct sk_buff * skb)44 static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
45 				   const struct sk_buff *skb)
46 {
47 	struct vlan_ethhdr veth;
48 	int off;
49 
50 	BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
51 
52 	off = skb_mac_header(skb) - skb->data;
53 	if (off != -ETH_HLEN)
54 		return -1;
55 
56 	if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
57 		return -1;
58 
59 	veth.h_vlan_proto = skb->vlan_proto;
60 	veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
61 	veth.h_vlan_encapsulated_proto = skb->protocol;
62 
63 	return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
64 }
65 
nf_trace_fill_dev_info(struct sk_buff * nlskb,const struct net_device * indev,const struct net_device * outdev)66 static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
67 				  const struct net_device *indev,
68 				  const struct net_device *outdev)
69 {
70 	if (indev) {
71 		if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
72 				 htonl(indev->ifindex)))
73 			return -1;
74 
75 		if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
76 				 htons(indev->type)))
77 			return -1;
78 	}
79 
80 	if (outdev) {
81 		if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
82 				 htonl(outdev->ifindex)))
83 			return -1;
84 
85 		if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
86 				 htons(outdev->type)))
87 			return -1;
88 	}
89 
90 	return 0;
91 }
92 
nf_trace_fill_pkt_info(struct sk_buff * nlskb,const struct nft_pktinfo * pkt)93 static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
94 				  const struct nft_pktinfo *pkt)
95 {
96 	const struct sk_buff *skb = pkt->skb;
97 	int off = skb_network_offset(skb);
98 	unsigned int len, nh_end;
99 
100 	nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len;
101 	len = min_t(unsigned int, nh_end - skb_network_offset(skb),
102 		    NFT_TRACETYPE_NETWORK_HSIZE);
103 	if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
104 		return -1;
105 
106 	if (pkt->flags & NFT_PKTINFO_L4PROTO) {
107 		len = min_t(unsigned int, skb->len - nft_thoff(pkt),
108 			    NFT_TRACETYPE_TRANSPORT_HSIZE);
109 		if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
110 				      nft_thoff(pkt), len))
111 			return -1;
112 	}
113 
114 	if (!skb_mac_header_was_set(skb))
115 		return 0;
116 
117 	if (skb_vlan_tag_get(skb))
118 		return nf_trace_fill_ll_header(nlskb, skb);
119 
120 	off = skb_mac_header(skb) - skb->data;
121 	len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
122 	return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
123 				 skb, off, len);
124 }
125 
nf_trace_fill_rule_info(struct sk_buff * nlskb,const struct nft_verdict * verdict,const struct nft_rule_dp * rule,const struct nft_traceinfo * info)126 static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
127 				   const struct nft_verdict *verdict,
128 				   const struct nft_rule_dp *rule,
129 				   const struct nft_traceinfo *info)
130 {
131 	if (!rule || rule->is_last)
132 		return 0;
133 
134 	/* a continue verdict with ->type == RETURN means that this is
135 	 * an implicit return (end of chain reached).
136 	 *
137 	 * Since no rule matched, the ->rule pointer is invalid.
138 	 */
139 	if (info->type == NFT_TRACETYPE_RETURN &&
140 	    verdict->code == NFT_CONTINUE)
141 		return 0;
142 
143 	return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
144 			    cpu_to_be64(rule->handle),
145 			    NFTA_TRACE_PAD);
146 }
147 
nft_trace_have_verdict_chain(const struct nft_verdict * verdict,struct nft_traceinfo * info)148 static bool nft_trace_have_verdict_chain(const struct nft_verdict *verdict,
149 					 struct nft_traceinfo *info)
150 {
151 	switch (info->type) {
152 	case NFT_TRACETYPE_RETURN:
153 	case NFT_TRACETYPE_RULE:
154 		break;
155 	default:
156 		return false;
157 	}
158 
159 	switch (verdict->code) {
160 	case NFT_JUMP:
161 	case NFT_GOTO:
162 		break;
163 	default:
164 		return false;
165 	}
166 
167 	return true;
168 }
169 
nft_trace_get_chain(const struct nft_rule_dp * rule,const struct nft_traceinfo * info)170 static const struct nft_chain *nft_trace_get_chain(const struct nft_rule_dp *rule,
171 						   const struct nft_traceinfo *info)
172 {
173 	const struct nft_rule_dp_last *last;
174 
175 	if (!rule)
176 		return &info->basechain->chain;
177 
178 	while (!rule->is_last)
179 		rule = nft_rule_next(rule);
180 
181 	last = (const struct nft_rule_dp_last *)rule;
182 
183 	if (WARN_ON_ONCE(!last->chain))
184 		return &info->basechain->chain;
185 
186 	return last->chain;
187 }
188 
nft_trace_notify(const struct nft_pktinfo * pkt,const struct nft_verdict * verdict,const struct nft_rule_dp * rule,struct nft_traceinfo * info)189 void nft_trace_notify(const struct nft_pktinfo *pkt,
190 		      const struct nft_verdict *verdict,
191 		      const struct nft_rule_dp *rule,
192 		      struct nft_traceinfo *info)
193 {
194 	const struct nft_chain *chain;
195 	struct nlmsghdr *nlh;
196 	struct sk_buff *skb;
197 	unsigned int size;
198 	u32 mark = 0;
199 	u16 event;
200 
201 	if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
202 		return;
203 
204 	chain = nft_trace_get_chain(rule, info);
205 
206 	size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
207 		nla_total_size(strlen(chain->table->name)) +
208 		nla_total_size(strlen(chain->name)) +
209 		nla_total_size_64bit(sizeof(__be64)) +	/* rule handle */
210 		nla_total_size(sizeof(__be32)) +	/* trace type */
211 		nla_total_size(0) +			/* VERDICT, nested */
212 			nla_total_size(sizeof(u32)) +	/* verdict code */
213 		nla_total_size(sizeof(u32)) +		/* id */
214 		nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
215 		nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
216 		nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
217 		nla_total_size(sizeof(u32)) +		/* iif */
218 		nla_total_size(sizeof(__be16)) +	/* iiftype */
219 		nla_total_size(sizeof(u32)) +		/* oif */
220 		nla_total_size(sizeof(__be16)) +	/* oiftype */
221 		nla_total_size(sizeof(u32)) +		/* mark */
222 		nla_total_size(sizeof(u32)) +		/* nfproto */
223 		nla_total_size(sizeof(u32));		/* policy */
224 
225 	if (nft_trace_have_verdict_chain(verdict, info))
226 		size += nla_total_size(strlen(verdict->chain->name)); /* jump target */
227 
228 	skb = nlmsg_new(size, GFP_ATOMIC);
229 	if (!skb)
230 		return;
231 
232 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
233 	nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family,
234 			   NFNETLINK_V0, 0);
235 	if (!nlh)
236 		goto nla_put_failure;
237 
238 	if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
239 		goto nla_put_failure;
240 
241 	if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
242 		goto nla_put_failure;
243 
244 	if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
245 		goto nla_put_failure;
246 
247 	if (nla_put_string(skb, NFTA_TRACE_CHAIN, chain->name))
248 		goto nla_put_failure;
249 
250 	if (nla_put_string(skb, NFTA_TRACE_TABLE, chain->table->name))
251 		goto nla_put_failure;
252 
253 	if (nf_trace_fill_rule_info(skb, verdict, rule, info))
254 		goto nla_put_failure;
255 
256 	switch (info->type) {
257 	case NFT_TRACETYPE_UNSPEC:
258 	case __NFT_TRACETYPE_MAX:
259 		break;
260 	case NFT_TRACETYPE_RETURN:
261 	case NFT_TRACETYPE_RULE: {
262 		unsigned int v;
263 
264 		if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
265 			goto nla_put_failure;
266 
267 		/* pkt->skb undefined iff NF_STOLEN, disable dump */
268 		v = verdict->code & NF_VERDICT_MASK;
269 		if (v == NF_STOLEN)
270 			info->packet_dumped = true;
271 		else
272 			mark = pkt->skb->mark;
273 
274 		break;
275 	}
276 	case NFT_TRACETYPE_POLICY:
277 		mark = pkt->skb->mark;
278 
279 		if (nla_put_be32(skb, NFTA_TRACE_POLICY,
280 				 htonl(info->basechain->policy)))
281 			goto nla_put_failure;
282 		break;
283 	}
284 
285 	if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark)))
286 		goto nla_put_failure;
287 
288 	if (!info->packet_dumped) {
289 		if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt)))
290 			goto nla_put_failure;
291 
292 		if (nf_trace_fill_pkt_info(skb, pkt))
293 			goto nla_put_failure;
294 		info->packet_dumped = true;
295 	}
296 
297 	nlmsg_end(skb, nlh);
298 	nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
299 	return;
300 
301  nla_put_failure:
302 	WARN_ON_ONCE(1);
303 	kfree_skb(skb);
304 }
305 
nft_trace_init(struct nft_traceinfo * info,const struct nft_pktinfo * pkt,const struct nft_chain * chain)306 void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
307 		    const struct nft_chain *chain)
308 {
309 	static siphash_key_t trace_key __read_mostly;
310 	struct sk_buff *skb = pkt->skb;
311 
312 	info->basechain = nft_base_chain(chain);
313 	info->trace = true;
314 	info->nf_trace = pkt->skb->nf_trace;
315 	info->packet_dumped = false;
316 
317 	net_get_random_once(&trace_key, sizeof(trace_key));
318 
319 	info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
320 					skb_get_hash_net(nft_net(pkt), skb),
321 					skb->skb_iif,
322 					&trace_key);
323 }
324