1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Kselftest framework requirement - SKIP code is 4.
5ksft_skip=4
6
7# Conntrack needs to reassemble fragments in order to have complete
8# packets for rule matching.  Reassembly can lead to packet loss.
9
10# Consider the following setup:
11#            +--------+       +---------+       +--------+
12#            |Router A|-------|Wanrouter|-------|Router B|
13#            |        |.IPIP..|         |..IPIP.|        |
14#            +--------+       +---------+       +--------+
15#           /                  mtu 1400                   \
16#          /                                               \
17#+--------+                                                 +--------+
18#|Client A|                                                 |Client B|
19#|        |                                                 |        |
20#+--------+                                                 +--------+
21
22# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
23# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
24# on its interfaces.
25
26rnd=$(mktemp -u XXXXXXXX)
27rx=$(mktemp)
28
29r_a="ns-ra-$rnd"
30r_b="ns-rb-$rnd"
31r_w="ns-rw-$rnd"
32c_a="ns-ca-$rnd"
33c_b="ns-cb-$rnd"
34
35checktool (){
36	if ! $1 > /dev/null 2>&1; then
37		echo "SKIP: Could not $2"
38		exit $ksft_skip
39	fi
40}
41
42checktool "iptables --version" "run test without iptables"
43checktool "ip -Version" "run test without ip tool"
44checktool "which nc" "run test without nc (netcat)"
45checktool "ip netns add ${r_a}" "create net namespace"
46
47for n in ${r_b} ${r_w} ${c_a} ${c_b};do
48	ip netns add ${n}
49done
50
51cleanup() {
52	for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
53		ip netns del ${n}
54	done
55	rm -f ${rx}
56}
57
58trap cleanup EXIT
59
60test_path() {
61	msg="$1"
62
63	ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null &
64
65	sleep 1
66	for i in 1 2 3; do
67		head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000
68	done
69
70	wait
71
72	bytes=$(wc -c < ${rx})
73
74	if [ $bytes -eq 1400 ];then
75		echo "OK: PMTU $msg connection tracking"
76	else
77		echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
78		exit 1
79	fi
80}
81
82# Detailed setup for Router A
83# ---------------------------
84# Interfaces:
85# eth0: 10.2.2.1/24
86# eth1: 192.168.10.1/24
87# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
88# Routes:
89# 192.168.20.0/24 dev ipip0    (192.168.20.0/24 is subnet of Client B)
90# 10.4.4.1 via 10.2.2.254      (Router B via Wanrouter)
91# No iptables rules at all.
92
93ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
94ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
95
96l_addr="10.2.2.1"
97r_addr="10.4.4.1"
98ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
99
100for dev in lo veth0 veth1 ipip0; do
101    ip -net ${r_a} link set $dev up
102done
103
104ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
105ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
106
107ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
108ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
109
110ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
111
112# Detailed setup for Router B
113# ---------------------------
114# Interfaces:
115# eth0: 10.4.4.1/24
116# eth1: 192.168.20.1/24
117# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
118# Routes:
119# 192.168.10.0/24 dev ipip0    (192.168.10.0/24 is subnet of Client A)
120# 10.2.2.1 via 10.4.4.254      (Router A via Wanrouter)
121# No iptables rules at all.
122
123ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
124ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
125
126l_addr="10.4.4.1"
127r_addr="10.2.2.1"
128
129ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
130
131for dev in lo veth0 veth1 ipip0; do
132	ip -net ${r_b} link set $dev up
133done
134
135ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
136ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
137
138ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
139ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
140ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
141
142# Client A
143ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
144ip -net ${c_a} link set dev lo up
145ip -net ${c_a} link set dev veth0 up
146ip -net ${c_a} route add default via 192.168.10.1
147
148# Client A
149ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
150ip -net ${c_b} link set dev veth0 up
151ip -net ${c_b} link set dev lo up
152ip -net ${c_b} route add default via 192.168.20.1
153
154# Wan
155ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
156ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
157
158ip -net ${r_w} link set dev lo up
159ip -net ${r_w} link set dev veth0 up mtu 1400
160ip -net ${r_w} link set dev veth1 up mtu 1400
161
162ip -net ${r_a} link set dev veth0 mtu 1400
163ip -net ${r_b} link set dev veth0 mtu 1400
164
165ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
166
167# Path MTU discovery
168# ------------------
169# Running tracepath from Client A to Client B shows PMTU discovery is working
170# as expected:
171#
172# clienta:~# tracepath 192.168.20.2
173# 1?: [LOCALHOST]                      pmtu 1500
174# 1:  192.168.10.1                                          0.867ms
175# 1:  192.168.10.1                                          0.302ms
176# 2:  192.168.10.1                                          0.312ms pmtu 1480
177# 2:  no reply
178# 3:  192.168.10.1                                          0.510ms pmtu 1380
179# 3:  192.168.20.2                                          2.320ms reached
180# Resume: pmtu 1380 hops 3 back 3
181
182# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
183
184# Router A has learned PMTU (1400) to Router B from Wanrouter.
185# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
186# from Router A.
187
188#Send large UDP packet
189#---------------------
190#Now we send a 1400 bytes UDP packet from Client A to Client B:
191
192# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000
193test_path "without"
194
195# The IPv4 stack on Client A already knows the PMTU to Client B, so the
196# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
197# fragments between eth1 and ipip0. The fragments fit into the tunnel and
198# reach their destination.
199
200#When sending the large UDP packet again, Router A now reassembles the
201#fragments before routing the packet over ipip0. The resulting IPIP
202#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
203#dropped on Router A before sending.
204
205ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
206test_path "with"
207