1  // SPDX-License-Identifier: GPL-2.0-only
2  #include <linux/kernel.h>
3  #include <linux/init.h>
4  #include <linux/module.h>
5  #include <linux/netfilter.h>
6  #include <linux/rhashtable.h>
7  #include <linux/ip.h>
8  #include <linux/ipv6.h>
9  #include <linux/netdevice.h>
10  #include <linux/if_ether.h>
11  #include <net/gso.h>
12  #include <net/ip.h>
13  #include <net/ipv6.h>
14  #include <net/ip6_route.h>
15  #include <net/neighbour.h>
16  #include <net/netfilter/nf_flow_table.h>
17  #include <net/netfilter/nf_conntrack_acct.h>
18  /* For layer 4 checksum field offset. */
19  #include <linux/tcp.h>
20  #include <linux/udp.h>
21  
nf_flow_state_check(struct flow_offload * flow,int proto,struct sk_buff * skb,unsigned int thoff)22  static int nf_flow_state_check(struct flow_offload *flow, int proto,
23  			       struct sk_buff *skb, unsigned int thoff)
24  {
25  	struct tcphdr *tcph;
26  
27  	if (proto != IPPROTO_TCP)
28  		return 0;
29  
30  	tcph = (void *)(skb_network_header(skb) + thoff);
31  	if (unlikely(tcph->fin || tcph->rst)) {
32  		flow_offload_teardown(flow);
33  		return -1;
34  	}
35  
36  	return 0;
37  }
38  
nf_flow_nat_ip_tcp(struct sk_buff * skb,unsigned int thoff,__be32 addr,__be32 new_addr)39  static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
40  			       __be32 addr, __be32 new_addr)
41  {
42  	struct tcphdr *tcph;
43  
44  	tcph = (void *)(skb_network_header(skb) + thoff);
45  	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
46  }
47  
nf_flow_nat_ip_udp(struct sk_buff * skb,unsigned int thoff,__be32 addr,__be32 new_addr)48  static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
49  			       __be32 addr, __be32 new_addr)
50  {
51  	struct udphdr *udph;
52  
53  	udph = (void *)(skb_network_header(skb) + thoff);
54  	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
55  		inet_proto_csum_replace4(&udph->check, skb, addr,
56  					 new_addr, true);
57  		if (!udph->check)
58  			udph->check = CSUM_MANGLED_0;
59  	}
60  }
61  
nf_flow_nat_ip_l4proto(struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,__be32 addr,__be32 new_addr)62  static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
63  				   unsigned int thoff, __be32 addr,
64  				   __be32 new_addr)
65  {
66  	switch (iph->protocol) {
67  	case IPPROTO_TCP:
68  		nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
69  		break;
70  	case IPPROTO_UDP:
71  		nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
72  		break;
73  	}
74  }
75  
nf_flow_snat_ip(const struct flow_offload * flow,struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,enum flow_offload_tuple_dir dir)76  static void nf_flow_snat_ip(const struct flow_offload *flow,
77  			    struct sk_buff *skb, struct iphdr *iph,
78  			    unsigned int thoff, enum flow_offload_tuple_dir dir)
79  {
80  	__be32 addr, new_addr;
81  
82  	switch (dir) {
83  	case FLOW_OFFLOAD_DIR_ORIGINAL:
84  		addr = iph->saddr;
85  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
86  		iph->saddr = new_addr;
87  		break;
88  	case FLOW_OFFLOAD_DIR_REPLY:
89  		addr = iph->daddr;
90  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
91  		iph->daddr = new_addr;
92  		break;
93  	}
94  	csum_replace4(&iph->check, addr, new_addr);
95  
96  	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
97  }
98  
nf_flow_dnat_ip(const struct flow_offload * flow,struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,enum flow_offload_tuple_dir dir)99  static void nf_flow_dnat_ip(const struct flow_offload *flow,
100  			    struct sk_buff *skb, struct iphdr *iph,
101  			    unsigned int thoff, enum flow_offload_tuple_dir dir)
102  {
103  	__be32 addr, new_addr;
104  
105  	switch (dir) {
106  	case FLOW_OFFLOAD_DIR_ORIGINAL:
107  		addr = iph->daddr;
108  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
109  		iph->daddr = new_addr;
110  		break;
111  	case FLOW_OFFLOAD_DIR_REPLY:
112  		addr = iph->saddr;
113  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
114  		iph->saddr = new_addr;
115  		break;
116  	}
117  	csum_replace4(&iph->check, addr, new_addr);
118  
119  	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
120  }
121  
nf_flow_nat_ip(const struct flow_offload * flow,struct sk_buff * skb,unsigned int thoff,enum flow_offload_tuple_dir dir,struct iphdr * iph)122  static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
123  			  unsigned int thoff, enum flow_offload_tuple_dir dir,
124  			  struct iphdr *iph)
125  {
126  	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
127  		nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
128  		nf_flow_snat_ip(flow, skb, iph, thoff, dir);
129  	}
130  	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
131  		nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
132  		nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
133  	}
134  }
135  
ip_has_options(unsigned int thoff)136  static bool ip_has_options(unsigned int thoff)
137  {
138  	return thoff != sizeof(struct iphdr);
139  }
140  
nf_flow_tuple_encap(struct sk_buff * skb,struct flow_offload_tuple * tuple)141  static void nf_flow_tuple_encap(struct sk_buff *skb,
142  				struct flow_offload_tuple *tuple)
143  {
144  	struct vlan_ethhdr *veth;
145  	struct pppoe_hdr *phdr;
146  	int i = 0;
147  
148  	if (skb_vlan_tag_present(skb)) {
149  		tuple->encap[i].id = skb_vlan_tag_get(skb);
150  		tuple->encap[i].proto = skb->vlan_proto;
151  		i++;
152  	}
153  	switch (skb->protocol) {
154  	case htons(ETH_P_8021Q):
155  		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
156  		tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
157  		tuple->encap[i].proto = skb->protocol;
158  		break;
159  	case htons(ETH_P_PPP_SES):
160  		phdr = (struct pppoe_hdr *)skb_network_header(skb);
161  		tuple->encap[i].id = ntohs(phdr->sid);
162  		tuple->encap[i].proto = skb->protocol;
163  		break;
164  	}
165  }
166  
167  struct nf_flowtable_ctx {
168  	const struct net_device	*in;
169  	u32			offset;
170  	u32			hdrsize;
171  };
172  
nf_flow_tuple_ip(struct nf_flowtable_ctx * ctx,struct sk_buff * skb,struct flow_offload_tuple * tuple)173  static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
174  			    struct flow_offload_tuple *tuple)
175  {
176  	struct flow_ports *ports;
177  	unsigned int thoff;
178  	struct iphdr *iph;
179  	u8 ipproto;
180  
181  	if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
182  		return -1;
183  
184  	iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
185  	thoff = (iph->ihl * 4);
186  
187  	if (ip_is_fragment(iph) ||
188  	    unlikely(ip_has_options(thoff)))
189  		return -1;
190  
191  	thoff += ctx->offset;
192  
193  	ipproto = iph->protocol;
194  	switch (ipproto) {
195  	case IPPROTO_TCP:
196  		ctx->hdrsize = sizeof(struct tcphdr);
197  		break;
198  	case IPPROTO_UDP:
199  		ctx->hdrsize = sizeof(struct udphdr);
200  		break;
201  #ifdef CONFIG_NF_CT_PROTO_GRE
202  	case IPPROTO_GRE:
203  		ctx->hdrsize = sizeof(struct gre_base_hdr);
204  		break;
205  #endif
206  	default:
207  		return -1;
208  	}
209  
210  	if (iph->ttl <= 1)
211  		return -1;
212  
213  	if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
214  		return -1;
215  
216  	switch (ipproto) {
217  	case IPPROTO_TCP:
218  	case IPPROTO_UDP:
219  		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
220  		tuple->src_port		= ports->source;
221  		tuple->dst_port		= ports->dest;
222  		break;
223  	case IPPROTO_GRE: {
224  		struct gre_base_hdr *greh;
225  
226  		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
227  		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
228  			return -1;
229  		break;
230  	}
231  	}
232  
233  	iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
234  
235  	tuple->src_v4.s_addr	= iph->saddr;
236  	tuple->dst_v4.s_addr	= iph->daddr;
237  	tuple->l3proto		= AF_INET;
238  	tuple->l4proto		= ipproto;
239  	tuple->iifidx		= ctx->in->ifindex;
240  	nf_flow_tuple_encap(skb, tuple);
241  
242  	return 0;
243  }
244  
245  /* Based on ip_exceeds_mtu(). */
nf_flow_exceeds_mtu(const struct sk_buff * skb,unsigned int mtu)246  static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
247  {
248  	if (skb->len <= mtu)
249  		return false;
250  
251  	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
252  		return false;
253  
254  	return true;
255  }
256  
nf_flow_dst_check(struct flow_offload_tuple * tuple)257  static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
258  {
259  	if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
260  	    tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
261  		return true;
262  
263  	return dst_check(tuple->dst_cache, tuple->dst_cookie);
264  }
265  
nf_flow_xmit_xfrm(struct sk_buff * skb,const struct nf_hook_state * state,struct dst_entry * dst)266  static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
267  				      const struct nf_hook_state *state,
268  				      struct dst_entry *dst)
269  {
270  	skb_orphan(skb);
271  	skb_dst_set_noref(skb, dst);
272  	dst_output(state->net, state->sk, skb);
273  	return NF_STOLEN;
274  }
275  
nf_flow_skb_encap_protocol(struct sk_buff * skb,__be16 proto,u32 * offset)276  static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
277  				       u32 *offset)
278  {
279  	struct vlan_ethhdr *veth;
280  	__be16 inner_proto;
281  
282  	switch (skb->protocol) {
283  	case htons(ETH_P_8021Q):
284  		if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth)))
285  			return false;
286  
287  		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
288  		if (veth->h_vlan_encapsulated_proto == proto) {
289  			*offset += VLAN_HLEN;
290  			return true;
291  		}
292  		break;
293  	case htons(ETH_P_PPP_SES):
294  		if (nf_flow_pppoe_proto(skb, &inner_proto) &&
295  		    inner_proto == proto) {
296  			*offset += PPPOE_SES_HLEN;
297  			return true;
298  		}
299  		break;
300  	}
301  
302  	return false;
303  }
304  
nf_flow_encap_pop(struct sk_buff * skb,struct flow_offload_tuple_rhash * tuplehash)305  static void nf_flow_encap_pop(struct sk_buff *skb,
306  			      struct flow_offload_tuple_rhash *tuplehash)
307  {
308  	struct vlan_hdr *vlan_hdr;
309  	int i;
310  
311  	for (i = 0; i < tuplehash->tuple.encap_num; i++) {
312  		if (skb_vlan_tag_present(skb)) {
313  			__vlan_hwaccel_clear_tag(skb);
314  			continue;
315  		}
316  		switch (skb->protocol) {
317  		case htons(ETH_P_8021Q):
318  			vlan_hdr = (struct vlan_hdr *)skb->data;
319  			__skb_pull(skb, VLAN_HLEN);
320  			vlan_set_encap_proto(skb, vlan_hdr);
321  			skb_reset_network_header(skb);
322  			break;
323  		case htons(ETH_P_PPP_SES):
324  			skb->protocol = __nf_flow_pppoe_proto(skb);
325  			skb_pull(skb, PPPOE_SES_HLEN);
326  			skb_reset_network_header(skb);
327  			break;
328  		}
329  	}
330  }
331  
nf_flow_queue_xmit(struct net * net,struct sk_buff * skb,const struct flow_offload_tuple_rhash * tuplehash,unsigned short type)332  static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
333  				       const struct flow_offload_tuple_rhash *tuplehash,
334  				       unsigned short type)
335  {
336  	struct net_device *outdev;
337  
338  	outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
339  	if (!outdev)
340  		return NF_DROP;
341  
342  	skb->dev = outdev;
343  	dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
344  			tuplehash->tuple.out.h_source, skb->len);
345  	dev_queue_xmit(skb);
346  
347  	return NF_STOLEN;
348  }
349  
350  static struct flow_offload_tuple_rhash *
nf_flow_offload_lookup(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct sk_buff * skb)351  nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
352  		       struct nf_flowtable *flow_table, struct sk_buff *skb)
353  {
354  	struct flow_offload_tuple tuple = {};
355  
356  	if (skb->protocol != htons(ETH_P_IP) &&
357  	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
358  		return NULL;
359  
360  	if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
361  		return NULL;
362  
363  	return flow_offload_lookup(flow_table, &tuple);
364  }
365  
nf_flow_offload_forward(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct flow_offload_tuple_rhash * tuplehash,struct sk_buff * skb)366  static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
367  				   struct nf_flowtable *flow_table,
368  				   struct flow_offload_tuple_rhash *tuplehash,
369  				   struct sk_buff *skb)
370  {
371  	enum flow_offload_tuple_dir dir;
372  	struct flow_offload *flow;
373  	unsigned int thoff, mtu;
374  	struct iphdr *iph;
375  
376  	dir = tuplehash->tuple.dir;
377  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
378  
379  	mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
380  	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
381  		return 0;
382  
383  	iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
384  	thoff = (iph->ihl * 4) + ctx->offset;
385  	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
386  		return 0;
387  
388  	if (!nf_flow_dst_check(&tuplehash->tuple)) {
389  		flow_offload_teardown(flow);
390  		return 0;
391  	}
392  
393  	if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
394  		return -1;
395  
396  	flow_offload_refresh(flow_table, flow, false);
397  
398  	nf_flow_encap_pop(skb, tuplehash);
399  	thoff -= ctx->offset;
400  
401  	iph = ip_hdr(skb);
402  	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
403  
404  	ip_decrease_ttl(iph);
405  	skb_clear_tstamp(skb);
406  
407  	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
408  		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
409  
410  	return 1;
411  }
412  
413  unsigned int
nf_flow_offload_ip_hook(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)414  nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
415  			const struct nf_hook_state *state)
416  {
417  	struct flow_offload_tuple_rhash *tuplehash;
418  	struct nf_flowtable *flow_table = priv;
419  	enum flow_offload_tuple_dir dir;
420  	struct nf_flowtable_ctx ctx = {
421  		.in	= state->in,
422  	};
423  	struct flow_offload *flow;
424  	struct net_device *outdev;
425  	struct rtable *rt;
426  	__be32 nexthop;
427  	int ret;
428  
429  	tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
430  	if (!tuplehash)
431  		return NF_ACCEPT;
432  
433  	ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb);
434  	if (ret < 0)
435  		return NF_DROP;
436  	else if (ret == 0)
437  		return NF_ACCEPT;
438  
439  	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
440  		rt = dst_rtable(tuplehash->tuple.dst_cache);
441  		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
442  		IPCB(skb)->iif = skb->dev->ifindex;
443  		IPCB(skb)->flags = IPSKB_FORWARDED;
444  		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
445  	}
446  
447  	dir = tuplehash->tuple.dir;
448  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
449  
450  	switch (tuplehash->tuple.xmit_type) {
451  	case FLOW_OFFLOAD_XMIT_NEIGH:
452  		rt = dst_rtable(tuplehash->tuple.dst_cache);
453  		outdev = rt->dst.dev;
454  		skb->dev = outdev;
455  		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
456  		skb_dst_set_noref(skb, &rt->dst);
457  		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
458  		ret = NF_STOLEN;
459  		break;
460  	case FLOW_OFFLOAD_XMIT_DIRECT:
461  		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
462  		if (ret == NF_DROP)
463  			flow_offload_teardown(flow);
464  		break;
465  	default:
466  		WARN_ON_ONCE(1);
467  		ret = NF_DROP;
468  		break;
469  	}
470  
471  	return ret;
472  }
473  EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
474  
nf_flow_nat_ipv6_tcp(struct sk_buff * skb,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr,struct ipv6hdr * ip6h)475  static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
476  				 struct in6_addr *addr,
477  				 struct in6_addr *new_addr,
478  				 struct ipv6hdr *ip6h)
479  {
480  	struct tcphdr *tcph;
481  
482  	tcph = (void *)(skb_network_header(skb) + thoff);
483  	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
484  				  new_addr->s6_addr32, true);
485  }
486  
nf_flow_nat_ipv6_udp(struct sk_buff * skb,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr)487  static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
488  				 struct in6_addr *addr,
489  				 struct in6_addr *new_addr)
490  {
491  	struct udphdr *udph;
492  
493  	udph = (void *)(skb_network_header(skb) + thoff);
494  	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
495  		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
496  					  new_addr->s6_addr32, true);
497  		if (!udph->check)
498  			udph->check = CSUM_MANGLED_0;
499  	}
500  }
501  
nf_flow_nat_ipv6_l4proto(struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr)502  static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
503  				     unsigned int thoff, struct in6_addr *addr,
504  				     struct in6_addr *new_addr)
505  {
506  	switch (ip6h->nexthdr) {
507  	case IPPROTO_TCP:
508  		nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
509  		break;
510  	case IPPROTO_UDP:
511  		nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
512  		break;
513  	}
514  }
515  
nf_flow_snat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,enum flow_offload_tuple_dir dir)516  static void nf_flow_snat_ipv6(const struct flow_offload *flow,
517  			      struct sk_buff *skb, struct ipv6hdr *ip6h,
518  			      unsigned int thoff,
519  			      enum flow_offload_tuple_dir dir)
520  {
521  	struct in6_addr addr, new_addr;
522  
523  	switch (dir) {
524  	case FLOW_OFFLOAD_DIR_ORIGINAL:
525  		addr = ip6h->saddr;
526  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
527  		ip6h->saddr = new_addr;
528  		break;
529  	case FLOW_OFFLOAD_DIR_REPLY:
530  		addr = ip6h->daddr;
531  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
532  		ip6h->daddr = new_addr;
533  		break;
534  	}
535  
536  	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
537  }
538  
nf_flow_dnat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,enum flow_offload_tuple_dir dir)539  static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
540  			      struct sk_buff *skb, struct ipv6hdr *ip6h,
541  			      unsigned int thoff,
542  			      enum flow_offload_tuple_dir dir)
543  {
544  	struct in6_addr addr, new_addr;
545  
546  	switch (dir) {
547  	case FLOW_OFFLOAD_DIR_ORIGINAL:
548  		addr = ip6h->daddr;
549  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
550  		ip6h->daddr = new_addr;
551  		break;
552  	case FLOW_OFFLOAD_DIR_REPLY:
553  		addr = ip6h->saddr;
554  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
555  		ip6h->saddr = new_addr;
556  		break;
557  	}
558  
559  	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
560  }
561  
nf_flow_nat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,enum flow_offload_tuple_dir dir,struct ipv6hdr * ip6h)562  static void nf_flow_nat_ipv6(const struct flow_offload *flow,
563  			     struct sk_buff *skb,
564  			     enum flow_offload_tuple_dir dir,
565  			     struct ipv6hdr *ip6h)
566  {
567  	unsigned int thoff = sizeof(*ip6h);
568  
569  	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
570  		nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
571  		nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
572  	}
573  	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
574  		nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
575  		nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
576  	}
577  }
578  
nf_flow_tuple_ipv6(struct nf_flowtable_ctx * ctx,struct sk_buff * skb,struct flow_offload_tuple * tuple)579  static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
580  			      struct flow_offload_tuple *tuple)
581  {
582  	struct flow_ports *ports;
583  	struct ipv6hdr *ip6h;
584  	unsigned int thoff;
585  	u8 nexthdr;
586  
587  	thoff = sizeof(*ip6h) + ctx->offset;
588  	if (!pskb_may_pull(skb, thoff))
589  		return -1;
590  
591  	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
592  
593  	nexthdr = ip6h->nexthdr;
594  	switch (nexthdr) {
595  	case IPPROTO_TCP:
596  		ctx->hdrsize = sizeof(struct tcphdr);
597  		break;
598  	case IPPROTO_UDP:
599  		ctx->hdrsize = sizeof(struct udphdr);
600  		break;
601  #ifdef CONFIG_NF_CT_PROTO_GRE
602  	case IPPROTO_GRE:
603  		ctx->hdrsize = sizeof(struct gre_base_hdr);
604  		break;
605  #endif
606  	default:
607  		return -1;
608  	}
609  
610  	if (ip6h->hop_limit <= 1)
611  		return -1;
612  
613  	if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
614  		return -1;
615  
616  	switch (nexthdr) {
617  	case IPPROTO_TCP:
618  	case IPPROTO_UDP:
619  		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
620  		tuple->src_port		= ports->source;
621  		tuple->dst_port		= ports->dest;
622  		break;
623  	case IPPROTO_GRE: {
624  		struct gre_base_hdr *greh;
625  
626  		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
627  		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
628  			return -1;
629  		break;
630  	}
631  	}
632  
633  	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
634  
635  	tuple->src_v6		= ip6h->saddr;
636  	tuple->dst_v6		= ip6h->daddr;
637  	tuple->l3proto		= AF_INET6;
638  	tuple->l4proto		= nexthdr;
639  	tuple->iifidx		= ctx->in->ifindex;
640  	nf_flow_tuple_encap(skb, tuple);
641  
642  	return 0;
643  }
644  
nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct flow_offload_tuple_rhash * tuplehash,struct sk_buff * skb)645  static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
646  					struct nf_flowtable *flow_table,
647  					struct flow_offload_tuple_rhash *tuplehash,
648  					struct sk_buff *skb)
649  {
650  	enum flow_offload_tuple_dir dir;
651  	struct flow_offload *flow;
652  	unsigned int thoff, mtu;
653  	struct ipv6hdr *ip6h;
654  
655  	dir = tuplehash->tuple.dir;
656  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
657  
658  	mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
659  	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
660  		return 0;
661  
662  	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
663  	thoff = sizeof(*ip6h) + ctx->offset;
664  	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
665  		return 0;
666  
667  	if (!nf_flow_dst_check(&tuplehash->tuple)) {
668  		flow_offload_teardown(flow);
669  		return 0;
670  	}
671  
672  	if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
673  		return -1;
674  
675  	flow_offload_refresh(flow_table, flow, false);
676  
677  	nf_flow_encap_pop(skb, tuplehash);
678  
679  	ip6h = ipv6_hdr(skb);
680  	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
681  
682  	ip6h->hop_limit--;
683  	skb_clear_tstamp(skb);
684  
685  	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
686  		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
687  
688  	return 1;
689  }
690  
691  static struct flow_offload_tuple_rhash *
nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct sk_buff * skb)692  nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
693  			    struct nf_flowtable *flow_table,
694  			    struct sk_buff *skb)
695  {
696  	struct flow_offload_tuple tuple = {};
697  
698  	if (skb->protocol != htons(ETH_P_IPV6) &&
699  	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
700  		return NULL;
701  
702  	if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
703  		return NULL;
704  
705  	return flow_offload_lookup(flow_table, &tuple);
706  }
707  
708  unsigned int
nf_flow_offload_ipv6_hook(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)709  nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
710  			  const struct nf_hook_state *state)
711  {
712  	struct flow_offload_tuple_rhash *tuplehash;
713  	struct nf_flowtable *flow_table = priv;
714  	enum flow_offload_tuple_dir dir;
715  	struct nf_flowtable_ctx ctx = {
716  		.in	= state->in,
717  	};
718  	const struct in6_addr *nexthop;
719  	struct flow_offload *flow;
720  	struct net_device *outdev;
721  	struct rt6_info *rt;
722  	int ret;
723  
724  	tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb);
725  	if (tuplehash == NULL)
726  		return NF_ACCEPT;
727  
728  	ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
729  	if (ret < 0)
730  		return NF_DROP;
731  	else if (ret == 0)
732  		return NF_ACCEPT;
733  
734  	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
735  		rt = dst_rt6_info(tuplehash->tuple.dst_cache);
736  		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
737  		IP6CB(skb)->iif = skb->dev->ifindex;
738  		IP6CB(skb)->flags = IP6SKB_FORWARDED;
739  		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
740  	}
741  
742  	dir = tuplehash->tuple.dir;
743  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
744  
745  	switch (tuplehash->tuple.xmit_type) {
746  	case FLOW_OFFLOAD_XMIT_NEIGH:
747  		rt = dst_rt6_info(tuplehash->tuple.dst_cache);
748  		outdev = rt->dst.dev;
749  		skb->dev = outdev;
750  		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
751  		skb_dst_set_noref(skb, &rt->dst);
752  		neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
753  		ret = NF_STOLEN;
754  		break;
755  	case FLOW_OFFLOAD_XMIT_DIRECT:
756  		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
757  		if (ret == NF_DROP)
758  			flow_offload_teardown(flow);
759  		break;
760  	default:
761  		WARN_ON_ONCE(1);
762  		ret = NF_DROP;
763  		break;
764  	}
765  
766  	return ret;
767  }
768  EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
769