1#!/bin/bash
2#
3# This tests nf_queue:
4# 1. can process packets from all hooks
5# 2. support running nfqueue from more than one base chain
6#
7# shellcheck disable=SC2162,SC2317
8
9source lib.sh
10ret=0
11timeout=5
12
13cleanup()
14{
15	ip netns pids "$ns1" | xargs kill 2>/dev/null
16	ip netns pids "$ns2" | xargs kill 2>/dev/null
17	ip netns pids "$nsrouter" | xargs kill 2>/dev/null
18
19	cleanup_all_ns
20
21	rm -f "$TMPINPUT"
22	rm -f "$TMPFILE0"
23	rm -f "$TMPFILE1"
24	rm -f "$TMPFILE2" "$TMPFILE3"
25}
26
27checktool "nft --version" "test without nft tool"
28checktool "socat -h" "run test without socat"
29
30modprobe -q sctp
31
32trap cleanup EXIT
33
34setup_ns ns1 ns2 ns3 nsrouter
35
36TMPFILE0=$(mktemp)
37TMPFILE1=$(mktemp)
38TMPFILE2=$(mktemp)
39TMPFILE3=$(mktemp)
40
41TMPINPUT=$(mktemp)
42COUNT=200
43[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
44dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
45
46if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
47    echo "SKIP: No virtual ethernet pair device support in kernel"
48    exit $ksft_skip
49fi
50ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
51ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
52
53ip -net "$nsrouter" link set veth0 up
54ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
55ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
56
57ip -net "$nsrouter" link set veth1 up
58ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
59ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
60
61ip -net "$nsrouter" link set veth2 up
62ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
63ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
64
65ip -net "$ns1" link set eth0 up
66ip -net "$ns2" link set eth0 up
67ip -net "$ns3" link set eth0 up
68
69ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
70ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
71ip -net "$ns1" route add default via 10.0.1.1
72ip -net "$ns1" route add default via dead:1::1
73
74ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
75ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
76ip -net "$ns2" route add default via 10.0.2.1
77ip -net "$ns2" route add default via dead:2::1
78
79ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
80ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
81ip -net "$ns3" route add default via 10.0.3.1
82ip -net "$ns3" route add default via dead:3::1
83
84load_ruleset() {
85	local name=$1
86	local prio=$2
87
88ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
89table inet $name {
90	chain nfq {
91		ip protocol icmp queue bypass
92		icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
93	}
94	chain pre {
95		type filter hook prerouting priority $prio; policy accept;
96		jump nfq
97	}
98	chain input {
99		type filter hook input priority $prio; policy accept;
100		jump nfq
101	}
102	chain forward {
103		type filter hook forward priority $prio; policy accept;
104		tcp dport 12345 queue num 2
105		jump nfq
106	}
107	chain output {
108		type filter hook output priority $prio; policy accept;
109		tcp dport 12345 queue num 3
110		tcp sport 23456 queue num 3
111		jump nfq
112	}
113	chain post {
114		type filter hook postrouting priority $prio; policy accept;
115		jump nfq
116	}
117}
118EOF
119}
120
121load_counter_ruleset() {
122	local prio=$1
123
124ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
125table inet countrules {
126	chain pre {
127		type filter hook prerouting priority $prio; policy accept;
128		counter
129	}
130	chain input {
131		type filter hook input priority $prio; policy accept;
132		counter
133	}
134	chain forward {
135		type filter hook forward priority $prio; policy accept;
136		counter
137	}
138	chain output {
139		type filter hook output priority $prio; policy accept;
140		counter
141	}
142	chain post {
143		type filter hook postrouting priority $prio; policy accept;
144		counter
145	}
146}
147EOF
148}
149
150test_ping() {
151  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
152	return 1
153  fi
154
155  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
156	return 2
157  fi
158
159  return 0
160}
161
162test_ping_router() {
163  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
164	return 3
165  fi
166
167  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
168	return 4
169  fi
170
171  return 0
172}
173
174test_queue_blackhole() {
175	local proto=$1
176
177ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
178table $proto blackh {
179	chain forward {
180	type filter hook forward priority 0; policy accept;
181		queue num 600
182	}
183}
184EOF
185	if [ "$proto" = "ip" ] ;then
186		ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
187		lret=$?
188	elif [ "$proto" = "ip6" ]; then
189		ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null
190		lret=$?
191	else
192		lret=111
193	fi
194
195	# queue without bypass keyword should drop traffic if no listener exists.
196	if [ "$lret" -eq 0 ];then
197		echo "FAIL: $proto expected failure, got $lret" 1>&2
198		exit 1
199	fi
200
201	if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then
202	        echo "FAIL: $proto: Could not delete blackh table"
203	        exit 1
204	fi
205
206        echo "PASS: $proto: statement with no listener results in packet drop"
207}
208
209nf_queue_wait()
210{
211	local procfile="/proc/self/net/netfilter/nfnetlink_queue"
212	local netns id
213
214	netns="$1"
215	id="$2"
216
217	# if this file doesn't exist, nfnetlink_module isn't loaded.
218	# rather than loading it ourselves, wait for kernel module autoload
219	# completion, nfnetlink should do so automatically because nf_queue
220	# helper program, spawned in the background, asked for this functionality.
221	test -f "$procfile" &&
222		ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id "
223}
224
225test_queue()
226{
227	local expected="$1"
228	local last=""
229
230	# spawn nf_queue listeners
231	ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" &
232	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" &
233
234	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0
235	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
236
237	if ! test_ping;then
238		echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2
239		exit $ret
240	fi
241
242	if ! test_ping_router;then
243		echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2
244		exit $ret
245	fi
246
247	wait
248	ret=$?
249
250	for file in $TMPFILE0 $TMPFILE1; do
251		last=$(tail -n1 "$file")
252		if [ x"$last" != x"$expected packets total" ]; then
253			echo "FAIL: Expected $expected packets total, but got $last" 1>&2
254			ip netns exec "$nsrouter" nft list ruleset
255			exit 1
256		fi
257	done
258
259	echo "PASS: Expected and received $last"
260}
261
262listener_ready()
263{
264	ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345
265}
266
267test_tcp_forward()
268{
269	ip netns exec "$nsrouter" ./nf_queue -q 2 &
270	local nfqpid=$!
271
272	timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
273	local rpid=$!
274
275	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
276	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
277
278	ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
279
280	wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
281	kill "$nfqpid"
282}
283
284test_tcp_localhost()
285{
286	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
287	local rpid=$!
288
289	ip netns exec "$nsrouter" ./nf_queue -q 3 &
290	local nfqpid=$!
291
292	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
293	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
294
295	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
296
297	wait "$rpid" && echo "PASS: tcp via loopback"
298	kill "$nfqpid"
299}
300
301test_tcp_localhost_connectclose()
302{
303	ip netns exec "$nsrouter" ./nf_queue -q 3 &
304	local nfqpid=$!
305
306	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
307
308	timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3
309
310	kill "$nfqpid"
311	wait && echo "PASS: tcp via loopback with connect/close"
312}
313
314test_tcp_localhost_requeue()
315{
316ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
317flush ruleset
318table inet filter {
319	chain output {
320		type filter hook output priority 0; policy accept;
321		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
322	}
323	chain post {
324		type filter hook postrouting priority 0; policy accept;
325		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
326	}
327}
328EOF
329	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
330	local rpid=$!
331
332	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" &
333
334	# nfqueue 1 will be called via output hook.  But this time,
335        # re-queue the packet to nfqueue program on queue 2.
336	ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" &
337
338	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
339	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null
340
341	wait
342
343	if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
344		echo "FAIL: lost packets during requeue?!" 1>&2
345		return
346	fi
347
348	echo "PASS: tcp via loopback and re-queueing"
349}
350
351test_icmp_vrf() {
352	if ! ip -net "$ns1" link add tvrf type vrf table 9876;then
353		echo "SKIP: Could not add vrf device"
354		return
355	fi
356
357	ip -net "$ns1" li set eth0 master tvrf
358	ip -net "$ns1" li set tvrf up
359
360	ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
361ip netns exec "$ns1" nft -f /dev/stdin <<EOF
362flush ruleset
363table inet filter {
364	chain output {
365		type filter hook output priority 0; policy accept;
366		meta oifname "tvrf" icmp type echo-request counter queue num 1
367		meta oifname "eth0" icmp type echo-request counter queue num 1
368	}
369	chain post {
370		type filter hook postrouting priority 0; policy accept;
371		meta oifname "tvrf" icmp type echo-request counter queue num 1
372		meta oifname "eth0" icmp type echo-request counter queue num 1
373	}
374}
375EOF
376	ip netns exec "$ns1" ./nf_queue -q 1 &
377	local nfqpid=$!
378
379	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
380
381	ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
382
383	for n in output post; do
384		for d in tvrf eth0; do
385			if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
386				kill "$nfqpid"
387				echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
388				ip netns exec "$ns1" nft list ruleset
389				ret=1
390				return
391			fi
392		done
393	done
394
395	kill "$nfqpid"
396	echo "PASS: icmp+nfqueue via vrf"
397}
398
399sctp_listener_ready()
400{
401	ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345
402}
403
404check_output_files()
405{
406	local f1="$1"
407	local f2="$2"
408	local err="$3"
409
410	if ! cmp "$f1" "$f2" ; then
411		echo "FAIL: $err: input and output file differ" 1>&2
412		echo -n " Input file" 1>&2
413		ls -l "$f1" 1>&2
414		echo -n "Output file" 1>&2
415		ls -l "$f2" 1>&2
416		ret=1
417	fi
418}
419
420test_sctp_forward()
421{
422	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
423flush ruleset
424table inet sctpq {
425        chain forward {
426        type filter hook forward priority 0; policy accept;
427                sctp dport 12345 queue num 10
428        }
429}
430EOF
431	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
432	local rpid=$!
433
434	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
435
436	ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
437	local nfqpid=$!
438
439	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
440
441	if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then
442		echo "FAIL:  Could not delete sctpq table"
443		exit 1
444	fi
445
446	wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
447	kill "$nfqpid"
448
449	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
450}
451
452test_sctp_output()
453{
454        ip netns exec "$ns1" nft -f /dev/stdin <<EOF
455table inet sctpq {
456        chain output {
457        type filter hook output priority 0; policy accept;
458                sctp dport 12345 queue num 11
459        }
460}
461EOF
462	# reduce test file size, software segmentation causes sk wmem increase.
463	dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
464
465	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
466	local rpid=$!
467
468	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
469
470	ip netns exec "$ns1" ./nf_queue -q 11 &
471	local nfqpid=$!
472
473	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
474
475	if ! ip netns exec "$ns1" nft delete table inet sctpq; then
476		echo "FAIL:  Could not delete sctpq table"
477		exit 1
478	fi
479
480	# must wait before checking completeness of output file.
481	wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
482	kill "$nfqpid"
483
484	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
485}
486
487udp_listener_ready()
488{
489	ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
490}
491
492output_files_written()
493{
494	test -s "$1" && test -s "$2"
495}
496
497test_udp_ct_race()
498{
499        ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
500flush ruleset
501table inet udpq {
502	chain prerouting {
503		type nat hook prerouting priority dstnat - 5; policy accept;
504		ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 }
505	}
506        chain postrouting {
507		type filter hook postrouting priority srcnat - 5; policy accept;
508		udp dport 12345 counter queue num 12
509        }
510}
511EOF
512	:> "$TMPFILE1"
513	:> "$TMPFILE2"
514
515	timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE1",trunc &
516	local rpid1=$!
517
518	timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE2",trunc &
519	local rpid2=$!
520
521	ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
522	local nfqpid=$!
523
524	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
525	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
526	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
527
528	# Send two packets, one should end up in ns1, other in ns2.
529	# This is because nfqueue will delay packet for long enough so that
530	# second packet will not find existing conntrack entry.
531	echo "Packet 1" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
532	echo "Packet 2" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
533
534	busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
535
536	kill "$nfqpid"
537
538	if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
539		echo "FAIL: Expected One udp conntrack entry"
540		ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345
541		ret=1
542	fi
543
544	if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
545		echo "FAIL: Could not delete udpq table"
546		ret=1
547		return
548	fi
549
550	NUMLINES1=$(wc -l < "$TMPFILE1")
551	NUMLINES2=$(wc -l < "$TMPFILE2")
552
553	if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then
554		ret=1
555		echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2"
556		echo -n "$TMPFILE1: ";cat "$TMPFILE1"
557		echo -n "$TMPFILE2: ";cat "$TMPFILE2"
558		return
559	fi
560
561	echo "PASS: both udp receivers got one packet each"
562}
563
564test_queue_removal()
565{
566	read tainted_then < /proc/sys/kernel/tainted
567
568	ip netns exec "$ns1" nft -f - <<EOF
569flush ruleset
570table ip filter {
571	chain output {
572		type filter hook output priority 0; policy accept;
573		ip protocol icmp queue num 0
574	}
575}
576EOF
577	ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 &
578	local nfqpid=$!
579
580	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
581
582	ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null
583	kill $nfqpid
584
585	ip netns exec "$ns1" nft flush ruleset
586
587	if [ "$tainted_then" -ne 0 ];then
588		return
589	fi
590
591	read tainted_now < /proc/sys/kernel/tainted
592	if [ "$tainted_now" -eq 0 ];then
593		echo "PASS: queue program exiting while packets queued"
594	else
595		echo "TAINT: queue program exiting while packets queued"
596		ret=1
597	fi
598}
599
600ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
601ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
602ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
603ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
604
605load_ruleset "filter" 0
606
607if test_ping; then
608	# queue bypass works (rules were skipped, no listener)
609	echo "PASS: ${ns1} can reach ${ns2}"
610else
611	echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
612	exit $ret
613fi
614
615test_queue_blackhole ip
616test_queue_blackhole ip6
617
618# dummy ruleset to add base chains between the
619# queueing rules.  We don't want the second reinject
620# to re-execute the old hooks.
621load_counter_ruleset 10
622
623# we are hooking all: prerouting/input/forward/output/postrouting.
624# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
625# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
626# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
627# so we expect that userspace program receives 10 packets.
628test_queue 10
629
630# same.  We queue to a second program as well.
631load_ruleset "filter2" 20
632test_queue 20
633ip netns exec "$ns1" nft flush ruleset
634
635test_tcp_forward
636test_tcp_localhost
637test_tcp_localhost_connectclose
638test_tcp_localhost_requeue
639test_sctp_forward
640test_sctp_output
641test_udp_ct_race
642
643# should be last, adds vrf device in ns1 and changes routes
644test_icmp_vrf
645test_queue_removal
646
647exit $ret
648