1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VMware vSockets Driver
4  *
5  * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
6  */
7 
8 #include <linux/types.h>
9 #include <linux/socket.h>
10 #include <linux/stddef.h>
11 #include <net/sock.h>
12 
13 #include "vmci_transport_notify.h"
14 
15 #define PKT_FIELD(vsk, field_name) \
16 	(vmci_trans(vsk)->notify.pkt_q_state.field_name)
17 
vmci_transport_notify_waiting_write(struct vsock_sock * vsk)18 static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
19 {
20 	bool retval;
21 	u64 notify_limit;
22 
23 	if (!PKT_FIELD(vsk, peer_waiting_write))
24 		return false;
25 
26 	/* When the sender blocks, we take that as a sign that the sender is
27 	 * faster than the receiver. To reduce the transmit rate of the sender,
28 	 * we delay the sending of the read notification by decreasing the
29 	 * write_notify_window. The notification is delayed until the number of
30 	 * bytes used in the queue drops below the write_notify_window.
31 	 */
32 
33 	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
34 		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
35 		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
36 			PKT_FIELD(vsk, write_notify_window) =
37 			    PKT_FIELD(vsk, write_notify_min_window);
38 		} else {
39 			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
40 			if (PKT_FIELD(vsk, write_notify_window) <
41 			    PKT_FIELD(vsk, write_notify_min_window))
42 				PKT_FIELD(vsk, write_notify_window) =
43 				    PKT_FIELD(vsk, write_notify_min_window);
44 
45 		}
46 	}
47 	notify_limit = vmci_trans(vsk)->consume_size -
48 		PKT_FIELD(vsk, write_notify_window);
49 
50 	/* The notify_limit is used to delay notifications in the case where
51 	 * flow control is enabled. Below the test is expressed in terms of
52 	 * free space in the queue: if free_space > ConsumeSize -
53 	 * write_notify_window then notify An alternate way of expressing this
54 	 * is to rewrite the expression to use the data ready in the receive
55 	 * queue: if write_notify_window > bufferReady then notify as
56 	 * free_space == ConsumeSize - bufferReady.
57 	 */
58 
59 	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
60 		notify_limit;
61 
62 	if (retval) {
63 		/* Once we notify the peer, we reset the detected flag so the
64 		 * next wait will again cause a decrease in the window size.
65 		 */
66 
67 		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
68 	}
69 	return retval;
70 }
71 
72 static void
vmci_transport_handle_read(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)73 vmci_transport_handle_read(struct sock *sk,
74 			   struct vmci_transport_packet *pkt,
75 			   bool bottom_half,
76 			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
77 {
78 	sk->sk_write_space(sk);
79 }
80 
81 static void
vmci_transport_handle_wrote(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)82 vmci_transport_handle_wrote(struct sock *sk,
83 			    struct vmci_transport_packet *pkt,
84 			    bool bottom_half,
85 			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
86 {
87 	sk->sk_data_ready(sk);
88 }
89 
vsock_block_update_write_window(struct sock * sk)90 static void vsock_block_update_write_window(struct sock *sk)
91 {
92 	struct vsock_sock *vsk = vsock_sk(sk);
93 
94 	if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
95 		PKT_FIELD(vsk, write_notify_window) =
96 		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
97 			vmci_trans(vsk)->consume_size);
98 }
99 
vmci_transport_send_read_notification(struct sock * sk)100 static int vmci_transport_send_read_notification(struct sock *sk)
101 {
102 	struct vsock_sock *vsk;
103 	bool sent_read;
104 	unsigned int retries;
105 	int err;
106 
107 	vsk = vsock_sk(sk);
108 	sent_read = false;
109 	retries = 0;
110 	err = 0;
111 
112 	if (vmci_transport_notify_waiting_write(vsk)) {
113 		/* Notify the peer that we have read, retrying the send on
114 		 * failure up to our maximum value.  XXX For now we just log
115 		 * the failure, but later we should schedule a work item to
116 		 * handle the resend until it succeeds.  That would require
117 		 * keeping track of work items in the vsk and cleaning them up
118 		 * upon socket close.
119 		 */
120 		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
121 		       !sent_read &&
122 		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
123 			err = vmci_transport_send_read(sk);
124 			if (err >= 0)
125 				sent_read = true;
126 
127 			retries++;
128 		}
129 
130 		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
131 			pr_err("%p unable to send read notification to peer\n",
132 			       sk);
133 		else
134 			PKT_FIELD(vsk, peer_waiting_write) = false;
135 
136 	}
137 	return err;
138 }
139 
vmci_transport_notify_pkt_socket_init(struct sock * sk)140 static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
141 {
142 	struct vsock_sock *vsk = vsock_sk(sk);
143 
144 	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
145 	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
146 	PKT_FIELD(vsk, peer_waiting_write) = false;
147 	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
148 }
149 
vmci_transport_notify_pkt_socket_destruct(struct vsock_sock * vsk)150 static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
151 {
152 	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
153 	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
154 	PKT_FIELD(vsk, peer_waiting_write) = false;
155 	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
156 }
157 
158 static int
vmci_transport_notify_pkt_poll_in(struct sock * sk,size_t target,bool * data_ready_now)159 vmci_transport_notify_pkt_poll_in(struct sock *sk,
160 				  size_t target, bool *data_ready_now)
161 {
162 	struct vsock_sock *vsk = vsock_sk(sk);
163 
164 	if (vsock_stream_has_data(vsk)) {
165 		*data_ready_now = true;
166 	} else {
167 		/* We can't read right now because there is nothing in the
168 		 * queue. Ask for notifications when there is something to
169 		 * read.
170 		 */
171 		if (sk->sk_state == TCP_ESTABLISHED)
172 			vsock_block_update_write_window(sk);
173 		*data_ready_now = false;
174 	}
175 
176 	return 0;
177 }
178 
179 static int
vmci_transport_notify_pkt_poll_out(struct sock * sk,size_t target,bool * space_avail_now)180 vmci_transport_notify_pkt_poll_out(struct sock *sk,
181 				   size_t target, bool *space_avail_now)
182 {
183 	s64 produce_q_free_space;
184 	struct vsock_sock *vsk = vsock_sk(sk);
185 
186 	produce_q_free_space = vsock_stream_has_space(vsk);
187 	if (produce_q_free_space > 0) {
188 		*space_avail_now = true;
189 		return 0;
190 	} else if (produce_q_free_space == 0) {
191 		/* This is a connected socket but we can't currently send data.
192 		 * Nothing else to do.
193 		 */
194 		*space_avail_now = false;
195 	}
196 
197 	return 0;
198 }
199 
200 static int
vmci_transport_notify_pkt_recv_init(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)201 vmci_transport_notify_pkt_recv_init(
202 				struct sock *sk,
203 				size_t target,
204 				struct vmci_transport_recv_notify_data *data)
205 {
206 	struct vsock_sock *vsk = vsock_sk(sk);
207 
208 	data->consume_head = 0;
209 	data->produce_tail = 0;
210 	data->notify_on_block = false;
211 
212 	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
213 		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
214 		if (PKT_FIELD(vsk, write_notify_window) <
215 		    PKT_FIELD(vsk, write_notify_min_window)) {
216 			/* If the current window is smaller than the new
217 			 * minimal window size, we need to reevaluate whether
218 			 * we need to notify the sender. If the number of ready
219 			 * bytes are smaller than the new window, we need to
220 			 * send a notification to the sender before we block.
221 			 */
222 
223 			PKT_FIELD(vsk, write_notify_window) =
224 			    PKT_FIELD(vsk, write_notify_min_window);
225 			data->notify_on_block = true;
226 		}
227 	}
228 
229 	return 0;
230 }
231 
232 static int
vmci_transport_notify_pkt_recv_pre_block(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)233 vmci_transport_notify_pkt_recv_pre_block(
234 				struct sock *sk,
235 				size_t target,
236 				struct vmci_transport_recv_notify_data *data)
237 {
238 	int err = 0;
239 
240 	vsock_block_update_write_window(sk);
241 
242 	if (data->notify_on_block) {
243 		err = vmci_transport_send_read_notification(sk);
244 		if (err < 0)
245 			return err;
246 		data->notify_on_block = false;
247 	}
248 
249 	return err;
250 }
251 
252 static int
vmci_transport_notify_pkt_recv_post_dequeue(struct sock * sk,size_t target,ssize_t copied,bool data_read,struct vmci_transport_recv_notify_data * data)253 vmci_transport_notify_pkt_recv_post_dequeue(
254 				struct sock *sk,
255 				size_t target,
256 				ssize_t copied,
257 				bool data_read,
258 				struct vmci_transport_recv_notify_data *data)
259 {
260 	struct vsock_sock *vsk;
261 	int err;
262 	bool was_full = false;
263 	u64 free_space;
264 
265 	vsk = vsock_sk(sk);
266 	err = 0;
267 
268 	if (data_read) {
269 		smp_mb();
270 
271 		free_space =
272 			vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
273 		was_full = free_space == copied;
274 
275 		if (was_full)
276 			PKT_FIELD(vsk, peer_waiting_write) = true;
277 
278 		err = vmci_transport_send_read_notification(sk);
279 		if (err < 0)
280 			return err;
281 
282 		/* See the comment in
283 		 * vmci_transport_notify_pkt_send_post_enqueue().
284 		 */
285 		sk->sk_data_ready(sk);
286 	}
287 
288 	return err;
289 }
290 
291 static int
vmci_transport_notify_pkt_send_init(struct sock * sk,struct vmci_transport_send_notify_data * data)292 vmci_transport_notify_pkt_send_init(
293 				struct sock *sk,
294 				struct vmci_transport_send_notify_data *data)
295 {
296 	data->consume_head = 0;
297 	data->produce_tail = 0;
298 
299 	return 0;
300 }
301 
302 static int
vmci_transport_notify_pkt_send_post_enqueue(struct sock * sk,ssize_t written,struct vmci_transport_send_notify_data * data)303 vmci_transport_notify_pkt_send_post_enqueue(
304 				struct sock *sk,
305 				ssize_t written,
306 				struct vmci_transport_send_notify_data *data)
307 {
308 	int err = 0;
309 	struct vsock_sock *vsk;
310 	bool sent_wrote = false;
311 	bool was_empty;
312 	int retries = 0;
313 
314 	vsk = vsock_sk(sk);
315 
316 	smp_mb();
317 
318 	was_empty =
319 		vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
320 	if (was_empty) {
321 		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
322 		       !sent_wrote &&
323 		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
324 			err = vmci_transport_send_wrote(sk);
325 			if (err >= 0)
326 				sent_wrote = true;
327 
328 			retries++;
329 		}
330 	}
331 
332 	if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
333 		pr_err("%p unable to send wrote notification to peer\n",
334 		       sk);
335 		return err;
336 	}
337 
338 	return err;
339 }
340 
341 static void
vmci_transport_notify_pkt_handle_pkt(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src,bool * pkt_processed)342 vmci_transport_notify_pkt_handle_pkt(
343 				struct sock *sk,
344 				struct vmci_transport_packet *pkt,
345 				bool bottom_half,
346 				struct sockaddr_vm *dst,
347 				struct sockaddr_vm *src, bool *pkt_processed)
348 {
349 	bool processed = false;
350 
351 	switch (pkt->type) {
352 	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
353 		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
354 		processed = true;
355 		break;
356 	case VMCI_TRANSPORT_PACKET_TYPE_READ:
357 		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
358 		processed = true;
359 		break;
360 	}
361 
362 	if (pkt_processed)
363 		*pkt_processed = processed;
364 }
365 
vmci_transport_notify_pkt_process_request(struct sock * sk)366 static void vmci_transport_notify_pkt_process_request(struct sock *sk)
367 {
368 	struct vsock_sock *vsk = vsock_sk(sk);
369 
370 	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
371 	if (vmci_trans(vsk)->consume_size <
372 		PKT_FIELD(vsk, write_notify_min_window))
373 		PKT_FIELD(vsk, write_notify_min_window) =
374 			vmci_trans(vsk)->consume_size;
375 }
376 
vmci_transport_notify_pkt_process_negotiate(struct sock * sk)377 static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
378 {
379 	struct vsock_sock *vsk = vsock_sk(sk);
380 
381 	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
382 	if (vmci_trans(vsk)->consume_size <
383 		PKT_FIELD(vsk, write_notify_min_window))
384 		PKT_FIELD(vsk, write_notify_min_window) =
385 			vmci_trans(vsk)->consume_size;
386 }
387 
388 static int
vmci_transport_notify_pkt_recv_pre_dequeue(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)389 vmci_transport_notify_pkt_recv_pre_dequeue(
390 				struct sock *sk,
391 				size_t target,
392 				struct vmci_transport_recv_notify_data *data)
393 {
394 	return 0; /* NOP for QState. */
395 }
396 
397 static int
vmci_transport_notify_pkt_send_pre_block(struct sock * sk,struct vmci_transport_send_notify_data * data)398 vmci_transport_notify_pkt_send_pre_block(
399 				struct sock *sk,
400 				struct vmci_transport_send_notify_data *data)
401 {
402 	return 0; /* NOP for QState. */
403 }
404 
405 static int
vmci_transport_notify_pkt_send_pre_enqueue(struct sock * sk,struct vmci_transport_send_notify_data * data)406 vmci_transport_notify_pkt_send_pre_enqueue(
407 				struct sock *sk,
408 				struct vmci_transport_send_notify_data *data)
409 {
410 	return 0; /* NOP for QState. */
411 }
412 
413 /* Socket always on control packet based operations. */
414 const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
415 	.socket_init = vmci_transport_notify_pkt_socket_init,
416 	.socket_destruct = vmci_transport_notify_pkt_socket_destruct,
417 	.poll_in = vmci_transport_notify_pkt_poll_in,
418 	.poll_out = vmci_transport_notify_pkt_poll_out,
419 	.handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
420 	.recv_init = vmci_transport_notify_pkt_recv_init,
421 	.recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
422 	.recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
423 	.recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
424 	.send_init = vmci_transport_notify_pkt_send_init,
425 	.send_pre_block = vmci_transport_notify_pkt_send_pre_block,
426 	.send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
427 	.send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
428 	.process_request = vmci_transport_notify_pkt_process_request,
429 	.process_negotiate = vmci_transport_notify_pkt_process_negotiate,
430 };
431