1#!/bin/bash
2#============================================================================
3# ${XEN_SCRIPT_DIR}/remus-netbuf-setup
4#
5# Script for attaching a network buffer to the specified vif (in any mode).
6# The hotplugging system will call this script when starting remus via libxl
7# API, libxl_domain_remus_start.
8#
9# Usage:
10# remus-netbuf-setup (setup|teardown)
11#
12# Environment vars:
13# vifname     vif interface name (required).
14# XENBUS_PATH path in Xenstore, where the REMUS_IFB device details will be
15#             stored or read from (required).
16#             (libxl passes /libxl/<domid>/remus/netbuf/<devid>)
17# REMUS_IFB   ifb interface to be cleaned up (required). [for teardown op only]
18
19# Written to the store: (setup operation)
20# XENBUS_PATH/ifb=<ifbdevName> the REMUS_IFB device serving
21#  as the intermediate buffer through which the interface's network output
22#  can be controlled.
23#
24
25# Remus network buffering requirements:
26
27# We need to buffer (queue) egress traffic from every vif attached to
28# the guest and release the buffers when the checkpoint associated
29# with them has been committed at the backup host. We achieve this
30# with the help of the plug queuing discipline (sch_plug module).
31# Simply put, Remus' network buffering imposes traffic
32# shaping on the guest's vif(s).
33
34# Limitations and Workarounds:
35
36# Egress traffic from a vif appears as ingress traffic to dom0. Linux
37# supports policing (dropping packets) but not traffic shaping
38# (queuing packets) on ingress traffic. The standard workaround to
39# this limitation is to attach an ingress qdisc to the guest vif,
40# redirect all egress traffic from the guest to an intermediate
41# queuing interface, and apply egress rules to it. The IFB
42# (Intermediate Functional Block) device serves the purpose of an
43# intermediate queuing interface.
44#
45
46# The following commands install a network buffer on a
47# guest's vif (vif1.0) using an IFB device (ifb0):
48#
49#  ip link set dev ifb0 up
50#  tc qdisc add dev vif1.0 ingress
51#  tc filter add dev vif1.0 parent ffff: proto ip \
52#    prio 10 u32 match u32 0 0 action mirred egress redirect dev ifb0
53#  nl-qdisc-add --dev=ifb0 --parent root plug
54#  nl-qdisc-add --dev=ifb0 --parent root --update plug --limit=10000000
55#                                                (10MB limit on buffer)
56#
57# So order of operations when installing a network buffer on vif1.0
58# 1. find a free ifb and bring up the device
59# 2. redirect traffic from vif1.0 to ifb:
60#   2.1 add ingress qdisc to vif1.0 (to capture outgoing packets from guest)
61#   2.2 use tc filter command with actions mirred egress + redirect
62# 3. install plug_qdisc on ifb device, with which we can buffer/release
63#    guest's network output from vif1.0
64#
65# Note:
66# 1. If the setup process fails, the script's cleanup is limited to removing the
67#    ingress qdisc on the guest vif, so that its traffic can flow normally.
68#    The chosen ifb device is not torn down. Libxl has to execute the
69#    teardown op to remove other qdiscs and subsequently free the IFB device.
70#
71# 2. The teardown op may be invoked multiple times by libxl.
72
73#============================================================================
74
75# Unlike other vif scripts, vif-common is not needed here as it executes vif
76#specific setup code such as renaming.
77dir=$(dirname "$0")
78. "$dir/xen-hotplug-common.sh"
79
80findCommand "$@"
81
82if [ "$command" != "setup" -a  "$command" != "teardown" ]
83then
84  echo "Invalid command: $command"
85  log err "Invalid command: $command"
86  exit 1
87fi
88
89evalVariables "$@"
90
91: ${vifname:?}
92: ${XENBUS_PATH:?}
93
94check_libnl_tools() {
95    if ! command -v nl-qdisc-list > /dev/null 2>&1; then
96        fatal "Unable to find nl-qdisc-list tool"
97    fi
98    if ! command -v nl-qdisc-add > /dev/null 2>&1; then
99        fatal "Unable to find nl-qdisc-add tool"
100    fi
101    if ! command -v nl-qdisc-delete > /dev/null 2>&1; then
102        fatal "Unable to find nl-qdisc-delete tool"
103    fi
104}
105
106# We only check for modules. We don't load them.
107# User/Admin is supposed to load ifb during boot time,
108# ensuring that there are enough free ifbs in the system.
109# Other modules will be loaded automatically by tc commands.
110check_modules() {
111    for m in ifb sch_plug sch_ingress act_mirred cls_u32
112    do
113        if ! modinfo $m > /dev/null 2>&1; then
114            fatal "Unable to find $m kernel module"
115        fi
116    done
117}
118
119#return 0 if the ifb is free
120check_ifb() {
121    local installed=`nl-qdisc-list -d $1`
122    [ -n "$installed" ] && return 1
123
124    for domid in `xenstore-list "/local/domain" 2>/dev/null || true`
125    do
126        [ $domid -eq 0 ] && continue
127        xenstore-exists "/libxl/$domid/remus/netbuf" || continue
128        for devid in `xenstore-list "/libxl/$domid/remus/netbuf" 2>/dev/null || true`
129        do
130            local path="/libxl/$domid/remus/netbuf/$devid/ifb"
131            xenstore-exists $path || continue
132            local ifb=`xenstore-read "$path" 2>/dev/null || true`
133            [ "$ifb" = "$1" ] && return 1
134        done
135    done
136
137    return 0
138}
139
140setup_ifb() {
141
142    for ifb in `ifconfig -a -s|egrep ^ifb|cut -d ' ' -f1`
143    do
144        check_ifb "$ifb" || continue
145        REMUS_IFB="$ifb"
146        break
147    done
148
149    if [ -z "$REMUS_IFB" ]
150    then
151        fatal "Unable to find a free ifb device for $vifname"
152    fi
153
154    #not using xenstore_write that automatically exits on error
155    #because we need to cleanup
156    xenstore_write "$XENBUS_PATH/ifb" "$REMUS_IFB"
157    do_or_die ip link set dev "$REMUS_IFB" up
158}
159
160redirect_vif_traffic() {
161    local vif=$1
162    local ifb=$2
163
164    do_or_die tc qdisc add dev "$vif" ingress
165
166    tc filter add dev "$vif" parent ffff: proto ip prio 10 \
167        u32 match u32 0 0 action mirred egress redirect dev "$ifb" >/dev/null 2>&1
168
169    if [ $? -ne 0 ]
170    then
171        do_without_error tc qdisc del dev "$vif" ingress
172        fatal "Failed to redirect traffic from $vif to $ifb"
173    fi
174}
175
176add_plug_qdisc() {
177    local vif=$1
178    local ifb=$2
179
180    nl-qdisc-add --dev="$ifb" --parent root plug >/dev/null 2>&1
181    if [ $? -ne 0 ]
182    then
183        do_without_error tc qdisc del dev "$vif" ingress
184        fatal "Failed to add plug qdisc to $ifb"
185    fi
186
187    #set ifb buffering limit in bytes. Its okay if this command fails
188    nl-qdisc-add --dev="$ifb" --parent root \
189        --update plug --limit=10000000 >/dev/null 2>&1 || true
190}
191
192teardown_netbuf() {
193    local vif=$1
194    local ifb=$2
195
196    #Check if the XENBUS_PATH/ifb exists and has IFB name same as REMUS_IFB.
197    #Otherwise, if the teardown op is called multiple times, then we may end
198    #up freeing another domain's allocated IFB inside the if loop.
199    xenstore-exists "$XENBUS_PATH/ifb" && \
200        local ifb2=`xenstore-read "$XENBUS_PATH/ifb" 2>/dev/null || true`
201
202    if [[ "$ifb2" && "$ifb2" == "$ifb" ]]; then
203        do_without_error ip link set dev "$ifb" down
204        do_without_error nl-qdisc-delete --dev="$ifb" --parent root plug >/dev/null 2>&1
205        xenstore-rm -t "$XENBUS_PATH/ifb" 2>/dev/null || true
206    fi
207    do_without_error tc qdisc del dev "$vif" ingress
208    xenstore-rm -t "$XENBUS_PATH/hotplug-status" 2>/dev/null || true
209    xenstore-rm -t "$XENBUS_PATH/hotplug-error" 2>/dev/null || true
210}
211
212case "$command" in
213    setup)
214        check_libnl_tools
215        check_modules
216
217        claim_lock "pickifb"
218        setup_ifb
219        redirect_vif_traffic "$vifname" "$REMUS_IFB"
220        add_plug_qdisc "$vifname" "$REMUS_IFB"
221        release_lock "pickifb"
222
223        success
224        ;;
225    teardown)
226        teardown_netbuf "$vifname" "$REMUS_IFB"
227        ;;
228esac
229
230log debug "Successful remus-netbuf-setup $command for $vifname, ifb $REMUS_IFB."
231