Merge tag 'vfio-v4.5-rc2' of git://github.com/awilliam/linux-vfio
[deliverable/linux.git] / tools / virtio / ringtest / ring.c
CommitLineData
481eaec3
MT
1/*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
5 *
6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7 * signalling, unconditionally.
8 */
9#define _GNU_SOURCE
10#include "main.h"
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14
15/* Next - Where next entry will be written.
16 * Prev - "Next" value when event triggered previously.
17 * Event - Peer requested event after writing this entry.
18 */
19static inline bool need_event(unsigned short event,
20 unsigned short next,
21 unsigned short prev)
22{
23 return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
24}
25
26/* Design:
27 * Guest adds descriptors with unique index values and DESC_HW in flags.
28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29 * Flags are always set last.
30 */
31#define DESC_HW 0x1
32
33struct desc {
34 unsigned short flags;
35 unsigned short index;
36 unsigned len;
37 unsigned long long addr;
38};
39
40/* how much padding is needed to avoid false cache sharing */
41#define HOST_GUEST_PADDING 0x80
42
43/* Mostly read */
44struct event {
45 unsigned short kick_index;
46 unsigned char reserved0[HOST_GUEST_PADDING - 2];
47 unsigned short call_index;
48 unsigned char reserved1[HOST_GUEST_PADDING - 2];
49};
50
51struct data {
52 void *buf; /* descriptor is writeable, we can't get buf from there */
53 void *data;
54} *data;
55
56struct desc *ring;
57struct event *event;
58
59struct guest {
60 unsigned avail_idx;
61 unsigned last_used_idx;
62 unsigned num_free;
63 unsigned kicked_avail_idx;
64 unsigned char reserved[HOST_GUEST_PADDING - 12];
65} guest;
66
67struct host {
68 /* we do not need to track last avail index
69 * unless we have more than one in flight.
70 */
71 unsigned used_idx;
72 unsigned called_used_idx;
73 unsigned char reserved[HOST_GUEST_PADDING - 4];
74} host;
75
76/* implemented by ring */
77void alloc_ring(void)
78{
79 int ret;
80 int i;
81
82 ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
83 if (ret) {
84 perror("Unable to allocate ring buffer.\n");
85 exit(3);
86 }
87 event = malloc(sizeof *event);
88 if (!event) {
89 perror("Unable to allocate event buffer.\n");
90 exit(3);
91 }
92 memset(event, 0, sizeof *event);
93 guest.avail_idx = 0;
94 guest.kicked_avail_idx = -1;
95 guest.last_used_idx = 0;
96 host.used_idx = 0;
97 host.called_used_idx = -1;
98 for (i = 0; i < ring_size; ++i) {
99 struct desc desc = {
100 .index = i,
101 };
102 ring[i] = desc;
103 }
104 guest.num_free = ring_size;
105 data = malloc(ring_size * sizeof *data);
106 if (!data) {
107 perror("Unable to allocate data buffer.\n");
108 exit(3);
109 }
110 memset(data, 0, ring_size * sizeof *data);
111}
112
113/* guest side */
114int add_inbuf(unsigned len, void *buf, void *datap)
115{
116 unsigned head, index;
117
118 if (!guest.num_free)
119 return -1;
120
121 guest.num_free--;
122 head = (ring_size - 1) & (guest.avail_idx++);
123
124 /* Start with a write. On MESI architectures this helps
125 * avoid a shared state with consumer that is polling this descriptor.
126 */
127 ring[head].addr = (unsigned long)(void*)buf;
128 ring[head].len = len;
129 /* read below might bypass write above. That is OK because it's just an
130 * optimization. If this happens, we will get the cache line in a
131 * shared state which is unfortunate, but probably not worth it to
132 * add an explicit full barrier to avoid this.
133 */
134 barrier();
135 index = ring[head].index;
136 data[index].buf = buf;
137 data[index].data = datap;
138 /* Barrier A (for pairing) */
139 smp_release();
140 ring[head].flags = DESC_HW;
141
142 return 0;
143}
144
145void *get_buf(unsigned *lenp, void **bufp)
146{
147 unsigned head = (ring_size - 1) & guest.last_used_idx;
148 unsigned index;
149 void *datap;
150
151 if (ring[head].flags & DESC_HW)
152 return NULL;
153 /* Barrier B (for pairing) */
154 smp_acquire();
155 *lenp = ring[head].len;
156 index = ring[head].index & (ring_size - 1);
157 datap = data[index].data;
158 *bufp = data[index].buf;
159 data[index].buf = NULL;
160 data[index].data = NULL;
161 guest.num_free++;
162 guest.last_used_idx++;
163 return datap;
164}
165
166void poll_used(void)
167{
168 unsigned head = (ring_size - 1) & guest.last_used_idx;
169
170 while (ring[head].flags & DESC_HW)
171 busy_wait();
172}
173
174void disable_call()
175{
176 /* Doing nothing to disable calls might cause
177 * extra interrupts, but reduces the number of cache misses.
178 */
179}
180
181bool enable_call()
182{
183 unsigned head = (ring_size - 1) & guest.last_used_idx;
184
185 event->call_index = guest.last_used_idx;
186 /* Flush call index write */
187 /* Barrier D (for pairing) */
188 smp_mb();
189 return ring[head].flags & DESC_HW;
190}
191
192void kick_available(void)
193{
194 /* Flush in previous flags write */
195 /* Barrier C (for pairing) */
196 smp_mb();
197 if (!need_event(event->kick_index,
198 guest.avail_idx,
199 guest.kicked_avail_idx))
200 return;
201
202 guest.kicked_avail_idx = guest.avail_idx;
203 kick();
204}
205
206/* host side */
207void disable_kick()
208{
209 /* Doing nothing to disable kicks might cause
210 * extra interrupts, but reduces the number of cache misses.
211 */
212}
213
214bool enable_kick()
215{
216 unsigned head = (ring_size - 1) & host.used_idx;
217
218 event->kick_index = host.used_idx;
219 /* Barrier C (for pairing) */
220 smp_mb();
221 return !(ring[head].flags & DESC_HW);
222}
223
224void poll_avail(void)
225{
226 unsigned head = (ring_size - 1) & host.used_idx;
227
228 while (!(ring[head].flags & DESC_HW))
229 busy_wait();
230}
231
232bool use_buf(unsigned *lenp, void **bufp)
233{
234 unsigned head = (ring_size - 1) & host.used_idx;
235
236 if (!(ring[head].flags & DESC_HW))
237 return false;
238
239 /* make sure length read below is not speculated */
240 /* Barrier A (for pairing) */
241 smp_acquire();
242
243 /* simple in-order completion: we don't need
244 * to touch index at all. This also means we
245 * can just modify the descriptor in-place.
246 */
247 ring[head].len--;
248 /* Make sure len is valid before flags.
249 * Note: alternative is to write len and flags in one access -
250 * possible on 64 bit architectures but wmb is free on Intel anyway
251 * so I have no way to test whether it's a gain.
252 */
253 /* Barrier B (for pairing) */
254 smp_release();
255 ring[head].flags = 0;
256 host.used_idx++;
257 return true;
258}
259
260void call_used(void)
261{
262 /* Flush in previous flags write */
263 /* Barrier D (for pairing) */
264 smp_mb();
265 if (!need_event(event->call_index,
266 host.used_idx,
267 host.called_used_idx))
268 return;
269
270 host.called_used_idx = host.used_idx;
271 call();
272}
This page took 0.033709 seconds and 5 git commands to generate.