1*ba6e0e5cSBreno Leitao /* SPDX-License-Identifier: MIT */
2*ba6e0e5cSBreno Leitao
3*ba6e0e5cSBreno Leitao #include <linux/io_uring.h>
4*ba6e0e5cSBreno Leitao #include <sys/mman.h>
5*ba6e0e5cSBreno Leitao #include <sys/syscall.h>
6*ba6e0e5cSBreno Leitao #include <stdio.h>
7*ba6e0e5cSBreno Leitao #include <string.h>
8*ba6e0e5cSBreno Leitao #include <unistd.h>
9*ba6e0e5cSBreno Leitao
10*ba6e0e5cSBreno Leitao struct io_sq_ring {
11*ba6e0e5cSBreno Leitao unsigned int *head;
12*ba6e0e5cSBreno Leitao unsigned int *tail;
13*ba6e0e5cSBreno Leitao unsigned int *ring_mask;
14*ba6e0e5cSBreno Leitao unsigned int *ring_entries;
15*ba6e0e5cSBreno Leitao unsigned int *flags;
16*ba6e0e5cSBreno Leitao unsigned int *array;
17*ba6e0e5cSBreno Leitao };
18*ba6e0e5cSBreno Leitao
19*ba6e0e5cSBreno Leitao struct io_cq_ring {
20*ba6e0e5cSBreno Leitao unsigned int *head;
21*ba6e0e5cSBreno Leitao unsigned int *tail;
22*ba6e0e5cSBreno Leitao unsigned int *ring_mask;
23*ba6e0e5cSBreno Leitao unsigned int *ring_entries;
24*ba6e0e5cSBreno Leitao struct io_uring_cqe *cqes;
25*ba6e0e5cSBreno Leitao };
26*ba6e0e5cSBreno Leitao
27*ba6e0e5cSBreno Leitao struct io_uring_sq {
28*ba6e0e5cSBreno Leitao unsigned int *khead;
29*ba6e0e5cSBreno Leitao unsigned int *ktail;
30*ba6e0e5cSBreno Leitao unsigned int *kring_mask;
31*ba6e0e5cSBreno Leitao unsigned int *kring_entries;
32*ba6e0e5cSBreno Leitao unsigned int *kflags;
33*ba6e0e5cSBreno Leitao unsigned int *kdropped;
34*ba6e0e5cSBreno Leitao unsigned int *array;
35*ba6e0e5cSBreno Leitao struct io_uring_sqe *sqes;
36*ba6e0e5cSBreno Leitao
37*ba6e0e5cSBreno Leitao unsigned int sqe_head;
38*ba6e0e5cSBreno Leitao unsigned int sqe_tail;
39*ba6e0e5cSBreno Leitao
40*ba6e0e5cSBreno Leitao size_t ring_sz;
41*ba6e0e5cSBreno Leitao };
42*ba6e0e5cSBreno Leitao
43*ba6e0e5cSBreno Leitao struct io_uring_cq {
44*ba6e0e5cSBreno Leitao unsigned int *khead;
45*ba6e0e5cSBreno Leitao unsigned int *ktail;
46*ba6e0e5cSBreno Leitao unsigned int *kring_mask;
47*ba6e0e5cSBreno Leitao unsigned int *kring_entries;
48*ba6e0e5cSBreno Leitao unsigned int *koverflow;
49*ba6e0e5cSBreno Leitao struct io_uring_cqe *cqes;
50*ba6e0e5cSBreno Leitao
51*ba6e0e5cSBreno Leitao size_t ring_sz;
52*ba6e0e5cSBreno Leitao };
53*ba6e0e5cSBreno Leitao
54*ba6e0e5cSBreno Leitao struct io_uring {
55*ba6e0e5cSBreno Leitao struct io_uring_sq sq;
56*ba6e0e5cSBreno Leitao struct io_uring_cq cq;
57*ba6e0e5cSBreno Leitao int ring_fd;
58*ba6e0e5cSBreno Leitao };
59*ba6e0e5cSBreno Leitao
60*ba6e0e5cSBreno Leitao #if defined(__x86_64) || defined(__i386__)
61*ba6e0e5cSBreno Leitao #define read_barrier() __asm__ __volatile__("":::"memory")
62*ba6e0e5cSBreno Leitao #define write_barrier() __asm__ __volatile__("":::"memory")
63*ba6e0e5cSBreno Leitao #else
64*ba6e0e5cSBreno Leitao #define read_barrier() __sync_synchronize()
65*ba6e0e5cSBreno Leitao #define write_barrier() __sync_synchronize()
66*ba6e0e5cSBreno Leitao #endif
67*ba6e0e5cSBreno Leitao
io_uring_mmap(int fd,struct io_uring_params * p,struct io_uring_sq * sq,struct io_uring_cq * cq)68*ba6e0e5cSBreno Leitao static inline int io_uring_mmap(int fd, struct io_uring_params *p,
69*ba6e0e5cSBreno Leitao struct io_uring_sq *sq, struct io_uring_cq *cq)
70*ba6e0e5cSBreno Leitao {
71*ba6e0e5cSBreno Leitao size_t size;
72*ba6e0e5cSBreno Leitao void *ptr;
73*ba6e0e5cSBreno Leitao int ret;
74*ba6e0e5cSBreno Leitao
75*ba6e0e5cSBreno Leitao sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int);
76*ba6e0e5cSBreno Leitao ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
77*ba6e0e5cSBreno Leitao MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
78*ba6e0e5cSBreno Leitao if (ptr == MAP_FAILED)
79*ba6e0e5cSBreno Leitao return -errno;
80*ba6e0e5cSBreno Leitao sq->khead = ptr + p->sq_off.head;
81*ba6e0e5cSBreno Leitao sq->ktail = ptr + p->sq_off.tail;
82*ba6e0e5cSBreno Leitao sq->kring_mask = ptr + p->sq_off.ring_mask;
83*ba6e0e5cSBreno Leitao sq->kring_entries = ptr + p->sq_off.ring_entries;
84*ba6e0e5cSBreno Leitao sq->kflags = ptr + p->sq_off.flags;
85*ba6e0e5cSBreno Leitao sq->kdropped = ptr + p->sq_off.dropped;
86*ba6e0e5cSBreno Leitao sq->array = ptr + p->sq_off.array;
87*ba6e0e5cSBreno Leitao
88*ba6e0e5cSBreno Leitao size = p->sq_entries * sizeof(struct io_uring_sqe);
89*ba6e0e5cSBreno Leitao sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
90*ba6e0e5cSBreno Leitao MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
91*ba6e0e5cSBreno Leitao if (sq->sqes == MAP_FAILED) {
92*ba6e0e5cSBreno Leitao ret = -errno;
93*ba6e0e5cSBreno Leitao err:
94*ba6e0e5cSBreno Leitao munmap(sq->khead, sq->ring_sz);
95*ba6e0e5cSBreno Leitao return ret;
96*ba6e0e5cSBreno Leitao }
97*ba6e0e5cSBreno Leitao
98*ba6e0e5cSBreno Leitao cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
99*ba6e0e5cSBreno Leitao ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
100*ba6e0e5cSBreno Leitao MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
101*ba6e0e5cSBreno Leitao if (ptr == MAP_FAILED) {
102*ba6e0e5cSBreno Leitao ret = -errno;
103*ba6e0e5cSBreno Leitao munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
104*ba6e0e5cSBreno Leitao goto err;
105*ba6e0e5cSBreno Leitao }
106*ba6e0e5cSBreno Leitao cq->khead = ptr + p->cq_off.head;
107*ba6e0e5cSBreno Leitao cq->ktail = ptr + p->cq_off.tail;
108*ba6e0e5cSBreno Leitao cq->kring_mask = ptr + p->cq_off.ring_mask;
109*ba6e0e5cSBreno Leitao cq->kring_entries = ptr + p->cq_off.ring_entries;
110*ba6e0e5cSBreno Leitao cq->koverflow = ptr + p->cq_off.overflow;
111*ba6e0e5cSBreno Leitao cq->cqes = ptr + p->cq_off.cqes;
112*ba6e0e5cSBreno Leitao return 0;
113*ba6e0e5cSBreno Leitao }
114*ba6e0e5cSBreno Leitao
io_uring_setup(unsigned int entries,struct io_uring_params * p)115*ba6e0e5cSBreno Leitao static inline int io_uring_setup(unsigned int entries,
116*ba6e0e5cSBreno Leitao struct io_uring_params *p)
117*ba6e0e5cSBreno Leitao {
118*ba6e0e5cSBreno Leitao return syscall(__NR_io_uring_setup, entries, p);
119*ba6e0e5cSBreno Leitao }
120*ba6e0e5cSBreno Leitao
io_uring_enter(int fd,unsigned int to_submit,unsigned int min_complete,unsigned int flags,sigset_t * sig)121*ba6e0e5cSBreno Leitao static inline int io_uring_enter(int fd, unsigned int to_submit,
122*ba6e0e5cSBreno Leitao unsigned int min_complete,
123*ba6e0e5cSBreno Leitao unsigned int flags, sigset_t *sig)
124*ba6e0e5cSBreno Leitao {
125*ba6e0e5cSBreno Leitao return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
126*ba6e0e5cSBreno Leitao flags, sig, _NSIG / 8);
127*ba6e0e5cSBreno Leitao }
128*ba6e0e5cSBreno Leitao
io_uring_queue_init(unsigned int entries,struct io_uring * ring,unsigned int flags)129*ba6e0e5cSBreno Leitao static inline int io_uring_queue_init(unsigned int entries,
130*ba6e0e5cSBreno Leitao struct io_uring *ring,
131*ba6e0e5cSBreno Leitao unsigned int flags)
132*ba6e0e5cSBreno Leitao {
133*ba6e0e5cSBreno Leitao struct io_uring_params p;
134*ba6e0e5cSBreno Leitao int fd, ret;
135*ba6e0e5cSBreno Leitao
136*ba6e0e5cSBreno Leitao memset(ring, 0, sizeof(*ring));
137*ba6e0e5cSBreno Leitao memset(&p, 0, sizeof(p));
138*ba6e0e5cSBreno Leitao p.flags = flags;
139*ba6e0e5cSBreno Leitao
140*ba6e0e5cSBreno Leitao fd = io_uring_setup(entries, &p);
141*ba6e0e5cSBreno Leitao if (fd < 0)
142*ba6e0e5cSBreno Leitao return fd;
143*ba6e0e5cSBreno Leitao ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
144*ba6e0e5cSBreno Leitao if (!ret)
145*ba6e0e5cSBreno Leitao ring->ring_fd = fd;
146*ba6e0e5cSBreno Leitao else
147*ba6e0e5cSBreno Leitao close(fd);
148*ba6e0e5cSBreno Leitao return ret;
149*ba6e0e5cSBreno Leitao }
150*ba6e0e5cSBreno Leitao
151*ba6e0e5cSBreno Leitao /* Get a sqe */
io_uring_get_sqe(struct io_uring * ring)152*ba6e0e5cSBreno Leitao static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
153*ba6e0e5cSBreno Leitao {
154*ba6e0e5cSBreno Leitao struct io_uring_sq *sq = &ring->sq;
155*ba6e0e5cSBreno Leitao
156*ba6e0e5cSBreno Leitao if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
157*ba6e0e5cSBreno Leitao return NULL;
158*ba6e0e5cSBreno Leitao return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
159*ba6e0e5cSBreno Leitao }
160*ba6e0e5cSBreno Leitao
io_uring_wait_cqe(struct io_uring * ring,struct io_uring_cqe ** cqe_ptr)161*ba6e0e5cSBreno Leitao static inline int io_uring_wait_cqe(struct io_uring *ring,
162*ba6e0e5cSBreno Leitao struct io_uring_cqe **cqe_ptr)
163*ba6e0e5cSBreno Leitao {
164*ba6e0e5cSBreno Leitao struct io_uring_cq *cq = &ring->cq;
165*ba6e0e5cSBreno Leitao const unsigned int mask = *cq->kring_mask;
166*ba6e0e5cSBreno Leitao unsigned int head = *cq->khead;
167*ba6e0e5cSBreno Leitao int ret;
168*ba6e0e5cSBreno Leitao
169*ba6e0e5cSBreno Leitao *cqe_ptr = NULL;
170*ba6e0e5cSBreno Leitao do {
171*ba6e0e5cSBreno Leitao read_barrier();
172*ba6e0e5cSBreno Leitao if (head != *cq->ktail) {
173*ba6e0e5cSBreno Leitao *cqe_ptr = &cq->cqes[head & mask];
174*ba6e0e5cSBreno Leitao break;
175*ba6e0e5cSBreno Leitao }
176*ba6e0e5cSBreno Leitao ret = io_uring_enter(ring->ring_fd, 0, 1,
177*ba6e0e5cSBreno Leitao IORING_ENTER_GETEVENTS, NULL);
178*ba6e0e5cSBreno Leitao if (ret < 0)
179*ba6e0e5cSBreno Leitao return -errno;
180*ba6e0e5cSBreno Leitao } while (1);
181*ba6e0e5cSBreno Leitao
182*ba6e0e5cSBreno Leitao return 0;
183*ba6e0e5cSBreno Leitao }
184*ba6e0e5cSBreno Leitao
io_uring_submit(struct io_uring * ring)185*ba6e0e5cSBreno Leitao static inline int io_uring_submit(struct io_uring *ring)
186*ba6e0e5cSBreno Leitao {
187*ba6e0e5cSBreno Leitao struct io_uring_sq *sq = &ring->sq;
188*ba6e0e5cSBreno Leitao const unsigned int mask = *sq->kring_mask;
189*ba6e0e5cSBreno Leitao unsigned int ktail, submitted, to_submit;
190*ba6e0e5cSBreno Leitao int ret;
191*ba6e0e5cSBreno Leitao
192*ba6e0e5cSBreno Leitao read_barrier();
193*ba6e0e5cSBreno Leitao if (*sq->khead != *sq->ktail) {
194*ba6e0e5cSBreno Leitao submitted = *sq->kring_entries;
195*ba6e0e5cSBreno Leitao goto submit;
196*ba6e0e5cSBreno Leitao }
197*ba6e0e5cSBreno Leitao if (sq->sqe_head == sq->sqe_tail)
198*ba6e0e5cSBreno Leitao return 0;
199*ba6e0e5cSBreno Leitao
200*ba6e0e5cSBreno Leitao ktail = *sq->ktail;
201*ba6e0e5cSBreno Leitao to_submit = sq->sqe_tail - sq->sqe_head;
202*ba6e0e5cSBreno Leitao for (submitted = 0; submitted < to_submit; submitted++) {
203*ba6e0e5cSBreno Leitao read_barrier();
204*ba6e0e5cSBreno Leitao sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
205*ba6e0e5cSBreno Leitao }
206*ba6e0e5cSBreno Leitao if (!submitted)
207*ba6e0e5cSBreno Leitao return 0;
208*ba6e0e5cSBreno Leitao
209*ba6e0e5cSBreno Leitao if (*sq->ktail != ktail) {
210*ba6e0e5cSBreno Leitao write_barrier();
211*ba6e0e5cSBreno Leitao *sq->ktail = ktail;
212*ba6e0e5cSBreno Leitao write_barrier();
213*ba6e0e5cSBreno Leitao }
214*ba6e0e5cSBreno Leitao submit:
215*ba6e0e5cSBreno Leitao ret = io_uring_enter(ring->ring_fd, submitted, 0,
216*ba6e0e5cSBreno Leitao IORING_ENTER_GETEVENTS, NULL);
217*ba6e0e5cSBreno Leitao return ret < 0 ? -errno : ret;
218*ba6e0e5cSBreno Leitao }
219*ba6e0e5cSBreno Leitao
io_uring_queue_exit(struct io_uring * ring)220*ba6e0e5cSBreno Leitao static inline void io_uring_queue_exit(struct io_uring *ring)
221*ba6e0e5cSBreno Leitao {
222*ba6e0e5cSBreno Leitao struct io_uring_sq *sq = &ring->sq;
223*ba6e0e5cSBreno Leitao
224*ba6e0e5cSBreno Leitao munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe));
225*ba6e0e5cSBreno Leitao munmap(sq->khead, sq->ring_sz);
226*ba6e0e5cSBreno Leitao close(ring->ring_fd);
227*ba6e0e5cSBreno Leitao }
228*ba6e0e5cSBreno Leitao
229*ba6e0e5cSBreno Leitao /* Prepare and send the SQE */
io_uring_prep_cmd(struct io_uring_sqe * sqe,int op,int sockfd,int level,int optname,const void * optval,int optlen)230*ba6e0e5cSBreno Leitao static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op,
231*ba6e0e5cSBreno Leitao int sockfd,
232*ba6e0e5cSBreno Leitao int level, int optname,
233*ba6e0e5cSBreno Leitao const void *optval,
234*ba6e0e5cSBreno Leitao int optlen)
235*ba6e0e5cSBreno Leitao {
236*ba6e0e5cSBreno Leitao memset(sqe, 0, sizeof(*sqe));
237*ba6e0e5cSBreno Leitao sqe->opcode = (__u8)IORING_OP_URING_CMD;
238*ba6e0e5cSBreno Leitao sqe->fd = sockfd;
239*ba6e0e5cSBreno Leitao sqe->cmd_op = op;
240*ba6e0e5cSBreno Leitao
241*ba6e0e5cSBreno Leitao sqe->level = level;
242*ba6e0e5cSBreno Leitao sqe->optname = optname;
243*ba6e0e5cSBreno Leitao sqe->optval = (unsigned long long)optval;
244*ba6e0e5cSBreno Leitao sqe->optlen = optlen;
245*ba6e0e5cSBreno Leitao }
246*ba6e0e5cSBreno Leitao
io_uring_register_buffers(struct io_uring * ring,const struct iovec * iovecs,unsigned int nr_iovecs)247*ba6e0e5cSBreno Leitao static inline int io_uring_register_buffers(struct io_uring *ring,
248*ba6e0e5cSBreno Leitao const struct iovec *iovecs,
249*ba6e0e5cSBreno Leitao unsigned int nr_iovecs)
250*ba6e0e5cSBreno Leitao {
251*ba6e0e5cSBreno Leitao int ret;
252*ba6e0e5cSBreno Leitao
253*ba6e0e5cSBreno Leitao ret = syscall(__NR_io_uring_register, ring->ring_fd,
254*ba6e0e5cSBreno Leitao IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
255*ba6e0e5cSBreno Leitao return (ret < 0) ? -errno : ret;
256*ba6e0e5cSBreno Leitao }
257*ba6e0e5cSBreno Leitao
io_uring_prep_send(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags)258*ba6e0e5cSBreno Leitao static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
259*ba6e0e5cSBreno Leitao const void *buf, size_t len, int flags)
260*ba6e0e5cSBreno Leitao {
261*ba6e0e5cSBreno Leitao memset(sqe, 0, sizeof(*sqe));
262*ba6e0e5cSBreno Leitao sqe->opcode = (__u8)IORING_OP_SEND;
263*ba6e0e5cSBreno Leitao sqe->fd = sockfd;
264*ba6e0e5cSBreno Leitao sqe->addr = (unsigned long)buf;
265*ba6e0e5cSBreno Leitao sqe->len = len;
266*ba6e0e5cSBreno Leitao sqe->msg_flags = (__u32)flags;
267*ba6e0e5cSBreno Leitao }
268*ba6e0e5cSBreno Leitao
io_uring_prep_sendzc(struct io_uring_sqe * sqe,int sockfd,const void * buf,size_t len,int flags,unsigned int zc_flags)269*ba6e0e5cSBreno Leitao static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
270*ba6e0e5cSBreno Leitao const void *buf, size_t len, int flags,
271*ba6e0e5cSBreno Leitao unsigned int zc_flags)
272*ba6e0e5cSBreno Leitao {
273*ba6e0e5cSBreno Leitao io_uring_prep_send(sqe, sockfd, buf, len, flags);
274*ba6e0e5cSBreno Leitao sqe->opcode = (__u8)IORING_OP_SEND_ZC;
275*ba6e0e5cSBreno Leitao sqe->ioprio = zc_flags;
276*ba6e0e5cSBreno Leitao }
277*ba6e0e5cSBreno Leitao
io_uring_cqe_seen(struct io_uring * ring)278*ba6e0e5cSBreno Leitao static inline void io_uring_cqe_seen(struct io_uring *ring)
279*ba6e0e5cSBreno Leitao {
280*ba6e0e5cSBreno Leitao *(&ring->cq)->khead += 1;
281*ba6e0e5cSBreno Leitao write_barrier();
282*ba6e0e5cSBreno Leitao }
283