Commit | Line | Data |
---|---|---|
d4c9ff2d FEL |
1 | /* |
2 | * kvm trace | |
3 | * | |
4 | * It is designed to allow debugging traces of kvm to be generated | |
5 | * on UP / SMP machines. Each trace entry can be timestamped so that | |
6 | * it's possible to reconstruct a chronological record of trace events. | |
7 | * The implementation refers to blktrace kernel support. | |
8 | * | |
9 | * Copyright (c) 2008 Intel Corporation | |
10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | |
11 | * | |
12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | |
13 | * | |
14 | * Date: Feb 2008 | |
15 | */ | |
16 | ||
17 | #include <linux/module.h> | |
18 | #include <linux/relay.h> | |
19 | #include <linux/debugfs.h> | |
3f7f95c6 | 20 | #include <linux/ktime.h> |
d4c9ff2d FEL |
21 | |
22 | #include <linux/kvm_host.h> | |
23 | ||
24 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | |
25 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | |
26 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | |
27 | ||
28 | struct kvm_trace { | |
29 | int trace_state; | |
30 | struct rchan *rchan; | |
31 | struct dentry *lost_file; | |
32 | atomic_t lost_records; | |
33 | }; | |
34 | static struct kvm_trace *kvm_trace; | |
35 | ||
36 | struct kvm_trace_probe { | |
37 | const char *name; | |
38 | const char *format; | |
3f7f95c6 | 39 | u32 timestamp_in; |
d4c9ff2d FEL |
40 | marker_probe_func *probe_func; |
41 | }; | |
42 | ||
3f7f95c6 | 43 | static inline int calc_rec_size(int timestamp, int extra) |
d4c9ff2d FEL |
44 | { |
45 | int rec_size = KVM_TRC_HEAD_SIZE; | |
46 | ||
47 | rec_size += extra; | |
3f7f95c6 | 48 | return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; |
d4c9ff2d FEL |
49 | } |
50 | ||
51 | static void kvm_add_trace(void *probe_private, void *call_data, | |
52 | const char *format, va_list *args) | |
53 | { | |
54 | struct kvm_trace_probe *p = probe_private; | |
55 | struct kvm_trace *kt = kvm_trace; | |
56 | struct kvm_trace_rec rec; | |
57 | struct kvm_vcpu *vcpu; | |
e32c8f2c CE |
58 | int i, size; |
59 | u32 extra; | |
d4c9ff2d FEL |
60 | |
61 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | |
62 | return; | |
63 | ||
e32c8f2c | 64 | rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); |
d4c9ff2d FEL |
65 | vcpu = va_arg(*args, struct kvm_vcpu *); |
66 | rec.pid = current->tgid; | |
67 | rec.vcpu_id = vcpu->vcpu_id; | |
68 | ||
69 | extra = va_arg(*args, u32); | |
70 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | |
71 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | |
d4c9ff2d | 72 | |
3f7f95c6 | 73 | rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) |
e32c8f2c | 74 | | TRACE_REC_NUM_DATA_ARGS(extra); |
d4c9ff2d | 75 | |
3f7f95c6 CE |
76 | if (p->timestamp_in) { |
77 | rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); | |
d4c9ff2d | 78 | |
e32c8f2c | 79 | for (i = 0; i < extra; i++) |
3f7f95c6 | 80 | rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); |
d4c9ff2d | 81 | } else { |
e32c8f2c | 82 | for (i = 0; i < extra; i++) |
3f7f95c6 | 83 | rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); |
d4c9ff2d FEL |
84 | } |
85 | ||
3f7f95c6 | 86 | size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); |
d4c9ff2d FEL |
87 | relay_write(kt->rchan, &rec, size); |
88 | } | |
89 | ||
90 | static struct kvm_trace_probe kvm_trace_probes[] = { | |
91 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | |
92 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | |
93 | }; | |
94 | ||
95 | static int lost_records_get(void *data, u64 *val) | |
96 | { | |
97 | struct kvm_trace *kt = data; | |
98 | ||
99 | *val = atomic_read(&kt->lost_records); | |
100 | return 0; | |
101 | } | |
102 | ||
103 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | |
104 | ||
105 | /* | |
106 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | |
107 | * many times we encountered a full subbuffer, to tell user space app the | |
108 | * lost records there were. | |
109 | */ | |
110 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | |
111 | void *prev_subbuf, size_t prev_padding) | |
112 | { | |
113 | struct kvm_trace *kt; | |
114 | ||
9ef621d3 TL |
115 | if (!relay_buf_full(buf)) { |
116 | if (!prev_subbuf) { | |
117 | /* | |
118 | * executed only once when the channel is opened | |
119 | * save metadata as first record | |
120 | */ | |
121 | subbuf_start_reserve(buf, sizeof(u32)); | |
122 | *(u32 *)subbuf = 0x12345678; | |
123 | } | |
124 | ||
d4c9ff2d | 125 | return 1; |
9ef621d3 | 126 | } |
d4c9ff2d FEL |
127 | |
128 | kt = buf->chan->private_data; | |
129 | atomic_inc(&kt->lost_records); | |
130 | ||
131 | return 0; | |
132 | } | |
133 | ||
134 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | |
135 | struct dentry *parent, | |
136 | int mode, | |
137 | struct rchan_buf *buf, | |
138 | int *is_global) | |
139 | { | |
140 | return debugfs_create_file(filename, mode, parent, buf, | |
141 | &relay_file_operations); | |
142 | } | |
143 | ||
144 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | |
145 | { | |
146 | debugfs_remove(dentry); | |
147 | return 0; | |
148 | } | |
149 | ||
150 | static struct rchan_callbacks kvm_relay_callbacks = { | |
151 | .subbuf_start = kvm_subbuf_start_callback, | |
152 | .create_buf_file = kvm_create_buf_file_callack, | |
153 | .remove_buf_file = kvm_remove_buf_file_callback, | |
154 | }; | |
155 | ||
156 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | |
157 | { | |
158 | struct kvm_trace *kt; | |
159 | int i, r = -ENOMEM; | |
160 | ||
161 | if (!kuts->buf_size || !kuts->buf_nr) | |
162 | return -EINVAL; | |
163 | ||
164 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | |
165 | if (!kt) | |
166 | goto err; | |
167 | ||
168 | r = -EIO; | |
169 | atomic_set(&kt->lost_records, 0); | |
76f7c879 | 170 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, |
d4c9ff2d FEL |
171 | kt, &kvm_trace_lost_ops); |
172 | if (!kt->lost_file) | |
173 | goto err; | |
174 | ||
76f7c879 | 175 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, |
d4c9ff2d FEL |
176 | kuts->buf_nr, &kvm_relay_callbacks, kt); |
177 | if (!kt->rchan) | |
178 | goto err; | |
179 | ||
180 | kvm_trace = kt; | |
181 | ||
182 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | |
183 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | |
184 | ||
185 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | |
186 | if (r) | |
187 | printk(KERN_INFO "Unable to register probe %s\n", | |
188 | p->name); | |
189 | } | |
190 | ||
191 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | |
192 | ||
193 | return 0; | |
194 | err: | |
195 | if (kt) { | |
196 | if (kt->lost_file) | |
197 | debugfs_remove(kt->lost_file); | |
198 | if (kt->rchan) | |
199 | relay_close(kt->rchan); | |
200 | kfree(kt); | |
201 | } | |
202 | return r; | |
203 | } | |
204 | ||
205 | static int kvm_trace_enable(char __user *arg) | |
206 | { | |
207 | struct kvm_user_trace_setup kuts; | |
208 | int ret; | |
209 | ||
210 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | |
211 | if (ret) | |
212 | return -EFAULT; | |
213 | ||
214 | ret = do_kvm_trace_enable(&kuts); | |
215 | if (ret) | |
216 | return ret; | |
217 | ||
218 | return 0; | |
219 | } | |
220 | ||
221 | static int kvm_trace_pause(void) | |
222 | { | |
223 | struct kvm_trace *kt = kvm_trace; | |
224 | int r = -EINVAL; | |
225 | ||
226 | if (kt == NULL) | |
227 | return r; | |
228 | ||
229 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | |
230 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | |
231 | relay_flush(kt->rchan); | |
232 | r = 0; | |
233 | } | |
234 | ||
235 | return r; | |
236 | } | |
237 | ||
238 | void kvm_trace_cleanup(void) | |
239 | { | |
240 | struct kvm_trace *kt = kvm_trace; | |
241 | int i; | |
242 | ||
243 | if (kt == NULL) | |
244 | return; | |
245 | ||
246 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | |
247 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | |
248 | ||
249 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | |
250 | ||
251 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | |
252 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | |
253 | marker_probe_unregister(p->name, p->probe_func, p); | |
254 | } | |
b8209182 | 255 | marker_synchronize_unregister(); |
d4c9ff2d FEL |
256 | |
257 | relay_close(kt->rchan); | |
258 | debugfs_remove(kt->lost_file); | |
259 | kfree(kt); | |
260 | } | |
261 | } | |
262 | ||
263 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | |
264 | { | |
265 | void __user *argp = (void __user *)arg; | |
266 | long r = -EINVAL; | |
267 | ||
268 | if (!capable(CAP_SYS_ADMIN)) | |
269 | return -EPERM; | |
270 | ||
271 | switch (ioctl) { | |
272 | case KVM_TRACE_ENABLE: | |
273 | r = kvm_trace_enable(argp); | |
274 | break; | |
275 | case KVM_TRACE_PAUSE: | |
276 | r = kvm_trace_pause(); | |
277 | break; | |
278 | case KVM_TRACE_DISABLE: | |
279 | r = 0; | |
280 | kvm_trace_cleanup(); | |
281 | break; | |
282 | } | |
283 | ||
284 | return r; | |
285 | } |