1 #include <sys/types.h>
2 #include <sys/stat.h>
3 #include <sys/mman.h>
4 #include <unistd.h>
5 #include <errno.h>
6 #include <string.h>
7
8 #include "liburing.h"
9 #include "barrier.h"
10
__io_uring_get_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr,int wait)11 static int __io_uring_get_cqe(struct io_uring *ring,
12 struct io_uring_cqe **cqe_ptr, int wait)
13 {
14 struct io_uring_cq *cq = &ring->cq;
15 const unsigned mask = *cq->kring_mask;
16 unsigned head;
17 int ret;
18
19 *cqe_ptr = NULL;
20 head = *cq->khead;
21 do {
22 /*
23 * It's necessary to use a read_barrier() before reading
24 * the CQ tail, since the kernel updates it locklessly. The
25 * kernel has the matching store barrier for the update. The
26 * kernel also ensures that previous stores to CQEs are ordered
27 * with the tail update.
28 */
29 read_barrier();
30 if (head != *cq->ktail) {
31 *cqe_ptr = &cq->cqes[head & mask];
32 break;
33 }
34 if (!wait)
35 break;
36 ret = io_uring_enter(ring->ring_fd, 0, 1,
37 IORING_ENTER_GETEVENTS, NULL);
38 if (ret < 0)
39 return -errno;
40 } while (1);
41
42 return 0;
43 }
44
45 /*
46 * Return an IO completion, if one is readily available. Returns 0 with
47 * cqe_ptr filled in on success, -errno on failure.
48 */
io_uring_peek_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)49 int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
50 {
51 return __io_uring_get_cqe(ring, cqe_ptr, 0);
52 }
53
54 /*
55 * Return an IO completion, waiting for it if necessary. Returns 0 with
56 * cqe_ptr filled in on success, -errno on failure.
57 */
io_uring_wait_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)58 int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
59 {
60 return __io_uring_get_cqe(ring, cqe_ptr, 1);
61 }
62
63 /*
64 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
65 *
66 * Returns number of sqes submitted
67 */
io_uring_submit(struct io_uring * ring)68 int io_uring_submit(struct io_uring *ring)
69 {
70 struct io_uring_sq *sq = &ring->sq;
71 const unsigned mask = *sq->kring_mask;
72 unsigned ktail, ktail_next, submitted, to_submit;
73 int ret;
74
75 /*
76 * If we have pending IO in the kring, submit it first. We need a
77 * read barrier here to match the kernels store barrier when updating
78 * the SQ head.
79 */
80 read_barrier();
81 if (*sq->khead != *sq->ktail) {
82 submitted = *sq->kring_entries;
83 goto submit;
84 }
85
86 if (sq->sqe_head == sq->sqe_tail)
87 return 0;
88
89 /*
90 * Fill in sqes that we have queued up, adding them to the kernel ring
91 */
92 submitted = 0;
93 ktail = ktail_next = *sq->ktail;
94 to_submit = sq->sqe_tail - sq->sqe_head;
95 while (to_submit--) {
96 ktail_next++;
97 read_barrier();
98
99 sq->array[ktail & mask] = sq->sqe_head & mask;
100 ktail = ktail_next;
101
102 sq->sqe_head++;
103 submitted++;
104 }
105
106 if (!submitted)
107 return 0;
108
109 if (*sq->ktail != ktail) {
110 /*
111 * First write barrier ensures that the SQE stores are updated
112 * with the tail update. This is needed so that the kernel
113 * will never see a tail update without the preceeding sQE
114 * stores being done.
115 */
116 write_barrier();
117 *sq->ktail = ktail;
118 /*
119 * The kernel has the matching read barrier for reading the
120 * SQ tail.
121 */
122 write_barrier();
123 }
124
125 submit:
126 ret = io_uring_enter(ring->ring_fd, submitted, 0,
127 IORING_ENTER_GETEVENTS, NULL);
128 if (ret < 0)
129 return -errno;
130
131 return ret;
132 }
133
134 /*
135 * Return an sqe to fill. Application must later call io_uring_submit()
136 * when it's ready to tell the kernel about it. The caller may call this
137 * function multiple times before calling io_uring_submit().
138 *
139 * Returns a vacant sqe, or NULL if we're full.
140 */
io_uring_get_sqe(struct io_uring * ring)141 struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
142 {
143 struct io_uring_sq *sq = &ring->sq;
144 unsigned next = sq->sqe_tail + 1;
145 struct io_uring_sqe *sqe;
146
147 /*
148 * All sqes are used
149 */
150 if (next - sq->sqe_head > *sq->kring_entries)
151 return NULL;
152
153 sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask];
154 sq->sqe_tail = next;
155 return sqe;
156 }
157