Commit | Line | Data |
---|---|---|
688e6c72 CW |
1 | /* |
2 | * Copyright © 2015 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | */ | |
24 | ||
25 | #include "i915_drv.h" | |
26 | ||
27 | static void intel_breadcrumbs_fake_irq(unsigned long data) | |
28 | { | |
29 | struct intel_engine_cs *engine = (struct intel_engine_cs *)data; | |
30 | ||
31 | /* | |
32 | * The timer persists in case we cannot enable interrupts, | |
33 | * or if we have previously seen seqno/interrupt incoherency | |
34 | * ("missed interrupt" syndrome). Here the worker will wake up | |
35 | * every jiffie in order to kick the oldest waiter to do the | |
36 | * coherent seqno check. | |
37 | */ | |
38 | rcu_read_lock(); | |
39 | if (intel_engine_wakeup(engine)) | |
40 | mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); | |
41 | rcu_read_unlock(); | |
42 | } | |
43 | ||
44 | static void irq_enable(struct intel_engine_cs *engine) | |
45 | { | |
3d5564e9 CW |
46 | /* Enabling the IRQ may miss the generation of the interrupt, but |
47 | * we still need to force the barrier before reading the seqno, | |
48 | * just in case. | |
49 | */ | |
50 | engine->irq_posted = true; | |
688e6c72 CW |
51 | WARN_ON(!engine->irq_get(engine)); |
52 | } | |
53 | ||
54 | static void irq_disable(struct intel_engine_cs *engine) | |
55 | { | |
56 | engine->irq_put(engine); | |
3d5564e9 | 57 | engine->irq_posted = false; |
688e6c72 CW |
58 | } |
59 | ||
60 | static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) | |
61 | { | |
62 | struct intel_engine_cs *engine = | |
63 | container_of(b, struct intel_engine_cs, breadcrumbs); | |
64 | struct drm_i915_private *i915 = engine->i915; | |
688e6c72 CW |
65 | |
66 | assert_spin_locked(&b->lock); | |
67 | if (b->rpm_wakelock) | |
68 | return false; | |
69 | ||
70 | /* Since we are waiting on a request, the GPU should be busy | |
71 | * and should have its own rpm reference. For completeness, | |
72 | * record an rpm reference for ourselves to cover the | |
73 | * interrupt we unmask. | |
74 | */ | |
75 | intel_runtime_pm_get_noresume(i915); | |
76 | b->rpm_wakelock = true; | |
77 | ||
78 | /* No interrupts? Kick the waiter every jiffie! */ | |
79 | if (intel_irqs_enabled(i915)) { | |
3d5564e9 | 80 | if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) |
688e6c72 | 81 | irq_enable(engine); |
688e6c72 CW |
82 | b->irq_enabled = true; |
83 | } | |
84 | ||
85 | if (!b->irq_enabled || | |
86 | test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) | |
87 | mod_timer(&b->fake_irq, jiffies + 1); | |
88 | ||
3d5564e9 | 89 | return engine->irq_posted; |
688e6c72 CW |
90 | } |
91 | ||
92 | static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) | |
93 | { | |
94 | struct intel_engine_cs *engine = | |
95 | container_of(b, struct intel_engine_cs, breadcrumbs); | |
96 | ||
97 | assert_spin_locked(&b->lock); | |
98 | if (!b->rpm_wakelock) | |
99 | return; | |
100 | ||
101 | if (b->irq_enabled) { | |
102 | irq_disable(engine); | |
103 | b->irq_enabled = false; | |
104 | } | |
105 | ||
106 | intel_runtime_pm_put(engine->i915); | |
107 | b->rpm_wakelock = false; | |
108 | } | |
109 | ||
110 | static inline struct intel_wait *to_wait(struct rb_node *node) | |
111 | { | |
112 | return container_of(node, struct intel_wait, node); | |
113 | } | |
114 | ||
115 | static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, | |
116 | struct intel_wait *wait) | |
117 | { | |
118 | assert_spin_locked(&b->lock); | |
119 | ||
120 | /* This request is completed, so remove it from the tree, mark it as | |
121 | * complete, and *then* wake up the associated task. | |
122 | */ | |
123 | rb_erase(&wait->node, &b->waiters); | |
124 | RB_CLEAR_NODE(&wait->node); | |
125 | ||
126 | wake_up_process(wait->tsk); /* implicit smp_wmb() */ | |
127 | } | |
128 | ||
129 | static bool __intel_engine_add_wait(struct intel_engine_cs *engine, | |
130 | struct intel_wait *wait) | |
131 | { | |
132 | struct intel_breadcrumbs *b = &engine->breadcrumbs; | |
133 | struct rb_node **p, *parent, *completed; | |
134 | bool first; | |
135 | u32 seqno; | |
136 | ||
137 | /* Insert the request into the retirement ordered list | |
138 | * of waiters by walking the rbtree. If we are the oldest | |
139 | * seqno in the tree (the first to be retired), then | |
140 | * set ourselves as the bottom-half. | |
141 | * | |
142 | * As we descend the tree, prune completed branches since we hold the | |
143 | * spinlock we know that the first_waiter must be delayed and can | |
144 | * reduce some of the sequential wake up latency if we take action | |
145 | * ourselves and wake up the completed tasks in parallel. Also, by | |
146 | * removing stale elements in the tree, we may be able to reduce the | |
147 | * ping-pong between the old bottom-half and ourselves as first-waiter. | |
148 | */ | |
149 | first = true; | |
150 | parent = NULL; | |
151 | completed = NULL; | |
1b7744e7 | 152 | seqno = intel_engine_get_seqno(engine); |
688e6c72 CW |
153 | |
154 | /* If the request completed before we managed to grab the spinlock, | |
155 | * return now before adding ourselves to the rbtree. We let the | |
156 | * current bottom-half handle any pending wakeups and instead | |
157 | * try and get out of the way quickly. | |
158 | */ | |
159 | if (i915_seqno_passed(seqno, wait->seqno)) { | |
160 | RB_CLEAR_NODE(&wait->node); | |
161 | return first; | |
162 | } | |
163 | ||
164 | p = &b->waiters.rb_node; | |
165 | while (*p) { | |
166 | parent = *p; | |
167 | if (wait->seqno == to_wait(parent)->seqno) { | |
168 | /* We have multiple waiters on the same seqno, select | |
169 | * the highest priority task (that with the smallest | |
170 | * task->prio) to serve as the bottom-half for this | |
171 | * group. | |
172 | */ | |
173 | if (wait->tsk->prio > to_wait(parent)->tsk->prio) { | |
174 | p = &parent->rb_right; | |
175 | first = false; | |
176 | } else { | |
177 | p = &parent->rb_left; | |
178 | } | |
179 | } else if (i915_seqno_passed(wait->seqno, | |
180 | to_wait(parent)->seqno)) { | |
181 | p = &parent->rb_right; | |
182 | if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) | |
183 | completed = parent; | |
184 | else | |
185 | first = false; | |
186 | } else { | |
187 | p = &parent->rb_left; | |
188 | } | |
189 | } | |
190 | rb_link_node(&wait->node, parent, p); | |
191 | rb_insert_color(&wait->node, &b->waiters); | |
192 | GEM_BUG_ON(!first && !b->tasklet); | |
193 | ||
194 | if (completed) { | |
195 | struct rb_node *next = rb_next(completed); | |
196 | ||
197 | GEM_BUG_ON(!next && !first); | |
198 | if (next && next != &wait->node) { | |
199 | GEM_BUG_ON(first); | |
200 | b->first_wait = to_wait(next); | |
201 | smp_store_mb(b->tasklet, b->first_wait->tsk); | |
202 | /* As there is a delay between reading the current | |
203 | * seqno, processing the completed tasks and selecting | |
204 | * the next waiter, we may have missed the interrupt | |
205 | * and so need for the next bottom-half to wakeup. | |
206 | * | |
207 | * Also as we enable the IRQ, we may miss the | |
208 | * interrupt for that seqno, so we have to wake up | |
209 | * the next bottom-half in order to do a coherent check | |
210 | * in case the seqno passed. | |
211 | */ | |
212 | __intel_breadcrumbs_enable_irq(b); | |
3d5564e9 CW |
213 | if (READ_ONCE(engine->irq_posted)) |
214 | wake_up_process(to_wait(next)->tsk); | |
688e6c72 CW |
215 | } |
216 | ||
217 | do { | |
218 | struct intel_wait *crumb = to_wait(completed); | |
219 | completed = rb_prev(completed); | |
220 | __intel_breadcrumbs_finish(b, crumb); | |
221 | } while (completed); | |
222 | } | |
223 | ||
224 | if (first) { | |
225 | GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); | |
226 | b->first_wait = wait; | |
227 | smp_store_mb(b->tasklet, wait->tsk); | |
228 | first = __intel_breadcrumbs_enable_irq(b); | |
229 | } | |
230 | GEM_BUG_ON(!b->tasklet); | |
231 | GEM_BUG_ON(!b->first_wait); | |
232 | GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); | |
233 | ||
234 | return first; | |
235 | } | |
236 | ||
237 | bool intel_engine_add_wait(struct intel_engine_cs *engine, | |
238 | struct intel_wait *wait) | |
239 | { | |
240 | struct intel_breadcrumbs *b = &engine->breadcrumbs; | |
241 | bool first; | |
242 | ||
243 | spin_lock(&b->lock); | |
244 | first = __intel_engine_add_wait(engine, wait); | |
245 | spin_unlock(&b->lock); | |
246 | ||
247 | return first; | |
248 | } | |
249 | ||
250 | void intel_engine_enable_fake_irq(struct intel_engine_cs *engine) | |
251 | { | |
252 | mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); | |
253 | } | |
254 | ||
255 | static inline bool chain_wakeup(struct rb_node *rb, int priority) | |
256 | { | |
257 | return rb && to_wait(rb)->tsk->prio <= priority; | |
258 | } | |
259 | ||
260 | void intel_engine_remove_wait(struct intel_engine_cs *engine, | |
261 | struct intel_wait *wait) | |
262 | { | |
263 | struct intel_breadcrumbs *b = &engine->breadcrumbs; | |
264 | ||
265 | /* Quick check to see if this waiter was already decoupled from | |
266 | * the tree by the bottom-half to avoid contention on the spinlock | |
267 | * by the herd. | |
268 | */ | |
269 | if (RB_EMPTY_NODE(&wait->node)) | |
270 | return; | |
271 | ||
272 | spin_lock(&b->lock); | |
273 | ||
274 | if (RB_EMPTY_NODE(&wait->node)) | |
275 | goto out_unlock; | |
276 | ||
277 | if (b->first_wait == wait) { | |
278 | struct rb_node *next; | |
279 | const int priority = wait->tsk->prio; | |
280 | ||
281 | GEM_BUG_ON(b->tasklet != wait->tsk); | |
282 | ||
283 | /* We are the current bottom-half. Find the next candidate, | |
284 | * the first waiter in the queue on the remaining oldest | |
285 | * request. As multiple seqnos may complete in the time it | |
286 | * takes us to wake up and find the next waiter, we have to | |
287 | * wake up that waiter for it to perform its own coherent | |
288 | * completion check. | |
289 | */ | |
290 | next = rb_next(&wait->node); | |
291 | if (chain_wakeup(next, priority)) { | |
292 | /* If the next waiter is already complete, | |
293 | * wake it up and continue onto the next waiter. So | |
294 | * if have a small herd, they will wake up in parallel | |
295 | * rather than sequentially, which should reduce | |
296 | * the overall latency in waking all the completed | |
297 | * clients. | |
298 | * | |
299 | * However, waking up a chain adds extra latency to | |
300 | * the first_waiter. This is undesirable if that | |
301 | * waiter is a high priority task. | |
302 | */ | |
1b7744e7 | 303 | u32 seqno = intel_engine_get_seqno(engine); |
688e6c72 CW |
304 | |
305 | while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { | |
306 | struct rb_node *n = rb_next(next); | |
307 | ||
308 | __intel_breadcrumbs_finish(b, to_wait(next)); | |
309 | next = n; | |
310 | if (!chain_wakeup(next, priority)) | |
311 | break; | |
312 | } | |
313 | } | |
314 | ||
315 | if (next) { | |
316 | /* In our haste, we may have completed the first waiter | |
317 | * before we enabled the interrupt. Do so now as we | |
318 | * have a second waiter for a future seqno. Afterwards, | |
319 | * we have to wake up that waiter in case we missed | |
320 | * the interrupt, or if we have to handle an | |
321 | * exception rather than a seqno completion. | |
322 | */ | |
323 | b->first_wait = to_wait(next); | |
324 | smp_store_mb(b->tasklet, b->first_wait->tsk); | |
325 | if (b->first_wait->seqno != wait->seqno) | |
326 | __intel_breadcrumbs_enable_irq(b); | |
327 | wake_up_process(b->tasklet); | |
328 | } else { | |
329 | b->first_wait = NULL; | |
330 | WRITE_ONCE(b->tasklet, NULL); | |
331 | __intel_breadcrumbs_disable_irq(b); | |
332 | } | |
333 | } else { | |
334 | GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); | |
335 | } | |
336 | ||
337 | GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); | |
338 | rb_erase(&wait->node, &b->waiters); | |
339 | ||
340 | out_unlock: | |
341 | GEM_BUG_ON(b->first_wait == wait); | |
342 | GEM_BUG_ON(rb_first(&b->waiters) != | |
343 | (b->first_wait ? &b->first_wait->node : NULL)); | |
344 | GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters)); | |
345 | spin_unlock(&b->lock); | |
346 | } | |
347 | ||
348 | int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) | |
349 | { | |
350 | struct intel_breadcrumbs *b = &engine->breadcrumbs; | |
351 | ||
352 | spin_lock_init(&b->lock); | |
353 | setup_timer(&b->fake_irq, | |
354 | intel_breadcrumbs_fake_irq, | |
355 | (unsigned long)engine); | |
356 | ||
357 | return 0; | |
358 | } | |
359 | ||
360 | void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) | |
361 | { | |
362 | struct intel_breadcrumbs *b = &engine->breadcrumbs; | |
363 | ||
364 | del_timer_sync(&b->fake_irq); | |
365 | } | |
366 | ||
367 | unsigned int intel_kick_waiters(struct drm_i915_private *i915) | |
368 | { | |
369 | struct intel_engine_cs *engine; | |
370 | unsigned int mask = 0; | |
371 | ||
372 | /* To avoid the task_struct disappearing beneath us as we wake up | |
373 | * the process, we must first inspect the task_struct->state under the | |
374 | * RCU lock, i.e. as we call wake_up_process() we must be holding the | |
375 | * rcu_read_lock(). | |
376 | */ | |
377 | rcu_read_lock(); | |
378 | for_each_engine(engine, i915) | |
379 | if (unlikely(intel_engine_wakeup(engine))) | |
380 | mask |= intel_engine_flag(engine); | |
381 | rcu_read_unlock(); | |
382 | ||
383 | return mask; | |
384 | } |