drm/i915: Use a slab for object allocation
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
760285e7
DH
28#include <drm/drmP.h>
29#include <drm/i915_drm.h>
673a394b 30#include "i915_drv.h"
1c5d22f7 31#include "i915_trace.h"
652c393a 32#include "intel_drv.h"
5949eac4 33#include <linux/shmem_fs.h>
5a0e3ad6 34#include <linux/slab.h>
673a394b 35#include <linux/swap.h>
79e53945 36#include <linux/pci.h>
1286ff73 37#include <linux/dma-buf.h>
673a394b 38
05394f39
CW
39static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
40static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
88241785
CW
41static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
42 unsigned alignment,
86a1ee26
CW
43 bool map_and_fenceable,
44 bool nonblocking);
05394f39
CW
45static int i915_gem_phys_pwrite(struct drm_device *dev,
46 struct drm_i915_gem_object *obj,
71acb5eb 47 struct drm_i915_gem_pwrite *args,
05394f39 48 struct drm_file *file);
673a394b 49
61050808
CW
50static void i915_gem_write_fence(struct drm_device *dev, int reg,
51 struct drm_i915_gem_object *obj);
52static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
53 struct drm_i915_fence_reg *fence,
54 bool enable);
55
17250b71 56static int i915_gem_inactive_shrink(struct shrinker *shrinker,
1495f230 57 struct shrink_control *sc);
6c085a72
CW
58static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
59static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
8c59967c 60static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
31169714 61
61050808
CW
62static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
63{
64 if (obj->tiling_mode)
65 i915_gem_release_mmap(obj);
66
67 /* As we do not have an associated fence register, we will force
68 * a tiling change if we ever need to acquire one.
69 */
5d82e3e6 70 obj->fence_dirty = false;
61050808
CW
71 obj->fence_reg = I915_FENCE_REG_NONE;
72}
73
73aa808f
CW
74/* some bookkeeping */
75static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
76 size_t size)
77{
78 dev_priv->mm.object_count++;
79 dev_priv->mm.object_memory += size;
80}
81
82static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
83 size_t size)
84{
85 dev_priv->mm.object_count--;
86 dev_priv->mm.object_memory -= size;
87}
88
21dd3734
CW
89static int
90i915_gem_wait_for_error(struct drm_device *dev)
30dbf0c0
CW
91{
92 struct drm_i915_private *dev_priv = dev->dev_private;
93 struct completion *x = &dev_priv->error_completion;
94 unsigned long flags;
95 int ret;
96
97 if (!atomic_read(&dev_priv->mm.wedged))
98 return 0;
99
0a6759c6
DV
100 /*
101 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
102 * userspace. If it takes that long something really bad is going on and
103 * we should simply try to bail out and fail as gracefully as possible.
104 */
105 ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
106 if (ret == 0) {
107 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
108 return -EIO;
109 } else if (ret < 0) {
30dbf0c0 110 return ret;
0a6759c6 111 }
30dbf0c0 112
21dd3734
CW
113 if (atomic_read(&dev_priv->mm.wedged)) {
114 /* GPU is hung, bump the completion count to account for
115 * the token we just consumed so that we never hit zero and
116 * end up waiting upon a subsequent completion event that
117 * will never happen.
118 */
119 spin_lock_irqsave(&x->wait.lock, flags);
120 x->done++;
121 spin_unlock_irqrestore(&x->wait.lock, flags);
122 }
123 return 0;
30dbf0c0
CW
124}
125
54cf91dc 126int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 127{
76c1dec1
CW
128 int ret;
129
21dd3734 130 ret = i915_gem_wait_for_error(dev);
76c1dec1
CW
131 if (ret)
132 return ret;
133
134 ret = mutex_lock_interruptible(&dev->struct_mutex);
135 if (ret)
136 return ret;
137
23bc5982 138 WARN_ON(i915_verify_lists(dev));
76c1dec1
CW
139 return 0;
140}
30dbf0c0 141
7d1c4804 142static inline bool
05394f39 143i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
7d1c4804 144{
6c085a72 145 return obj->gtt_space && !obj->active;
7d1c4804
CW
146}
147
79e53945
JB
148int
149i915_gem_init_ioctl(struct drm_device *dev, void *data,
05394f39 150 struct drm_file *file)
79e53945
JB
151{
152 struct drm_i915_gem_init *args = data;
2021746e 153
7bb6fb8d
DV
154 if (drm_core_check_feature(dev, DRIVER_MODESET))
155 return -ENODEV;
156
2021746e
CW
157 if (args->gtt_start >= args->gtt_end ||
158 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
159 return -EINVAL;
79e53945 160
f534bc0b
DV
161 /* GEM with user mode setting was never supported on ilk and later. */
162 if (INTEL_INFO(dev)->gen >= 5)
163 return -ENODEV;
164
79e53945 165 mutex_lock(&dev->struct_mutex);
644ec02b
DV
166 i915_gem_init_global_gtt(dev, args->gtt_start,
167 args->gtt_end, args->gtt_end);
673a394b
EA
168 mutex_unlock(&dev->struct_mutex);
169
2021746e 170 return 0;
673a394b
EA
171}
172
5a125c3c
EA
173int
174i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 175 struct drm_file *file)
5a125c3c 176{
73aa808f 177 struct drm_i915_private *dev_priv = dev->dev_private;
5a125c3c 178 struct drm_i915_gem_get_aperture *args = data;
6299f992
CW
179 struct drm_i915_gem_object *obj;
180 size_t pinned;
5a125c3c 181
6299f992 182 pinned = 0;
73aa808f 183 mutex_lock(&dev->struct_mutex);
6c085a72 184 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
1b50247a
CW
185 if (obj->pin_count)
186 pinned += obj->gtt_space->size;
73aa808f 187 mutex_unlock(&dev->struct_mutex);
5a125c3c 188
6299f992 189 args->aper_size = dev_priv->mm.gtt_total;
0206e353 190 args->aper_available_size = args->aper_size - pinned;
6299f992 191
5a125c3c
EA
192 return 0;
193}
194
42dcedd4
CW
195void *i915_gem_object_alloc(struct drm_device *dev)
196{
197 struct drm_i915_private *dev_priv = dev->dev_private;
198 return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
199}
200
201void i915_gem_object_free(struct drm_i915_gem_object *obj)
202{
203 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
204 kmem_cache_free(dev_priv->slab, obj);
205}
206
ff72145b
DA
207static int
208i915_gem_create(struct drm_file *file,
209 struct drm_device *dev,
210 uint64_t size,
211 uint32_t *handle_p)
673a394b 212{
05394f39 213 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
214 int ret;
215 u32 handle;
673a394b 216
ff72145b 217 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
218 if (size == 0)
219 return -EINVAL;
673a394b
EA
220
221 /* Allocate the new object */
ff72145b 222 obj = i915_gem_alloc_object(dev, size);
673a394b
EA
223 if (obj == NULL)
224 return -ENOMEM;
225
05394f39 226 ret = drm_gem_handle_create(file, &obj->base, &handle);
1dfd9754 227 if (ret) {
05394f39
CW
228 drm_gem_object_release(&obj->base);
229 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
42dcedd4 230 i915_gem_object_free(obj);
673a394b 231 return ret;
1dfd9754 232 }
673a394b 233
202f2fef 234 /* drop reference from allocate - handle holds it now */
05394f39 235 drm_gem_object_unreference(&obj->base);
202f2fef
CW
236 trace_i915_gem_object_create(obj);
237
ff72145b 238 *handle_p = handle;
673a394b
EA
239 return 0;
240}
241
ff72145b
DA
242int
243i915_gem_dumb_create(struct drm_file *file,
244 struct drm_device *dev,
245 struct drm_mode_create_dumb *args)
246{
247 /* have to work out size/pitch and return them */
ed0291fd 248 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
ff72145b
DA
249 args->size = args->pitch * args->height;
250 return i915_gem_create(file, dev,
251 args->size, &args->handle);
252}
253
254int i915_gem_dumb_destroy(struct drm_file *file,
255 struct drm_device *dev,
256 uint32_t handle)
257{
258 return drm_gem_handle_delete(file, handle);
259}
260
261/**
262 * Creates a new mm object and returns a handle to it.
263 */
264int
265i915_gem_create_ioctl(struct drm_device *dev, void *data,
266 struct drm_file *file)
267{
268 struct drm_i915_gem_create *args = data;
63ed2cb2 269
ff72145b
DA
270 return i915_gem_create(file, dev,
271 args->size, &args->handle);
272}
273
05394f39 274static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
280b713b 275{
05394f39 276 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
280b713b
EA
277
278 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
05394f39 279 obj->tiling_mode != I915_TILING_NONE;
280b713b
EA
280}
281
8461d226
DV
282static inline int
283__copy_to_user_swizzled(char __user *cpu_vaddr,
284 const char *gpu_vaddr, int gpu_offset,
285 int length)
286{
287 int ret, cpu_offset = 0;
288
289 while (length > 0) {
290 int cacheline_end = ALIGN(gpu_offset + 1, 64);
291 int this_length = min(cacheline_end - gpu_offset, length);
292 int swizzled_gpu_offset = gpu_offset ^ 64;
293
294 ret = __copy_to_user(cpu_vaddr + cpu_offset,
295 gpu_vaddr + swizzled_gpu_offset,
296 this_length);
297 if (ret)
298 return ret + length;
299
300 cpu_offset += this_length;
301 gpu_offset += this_length;
302 length -= this_length;
303 }
304
305 return 0;
306}
307
8c59967c 308static inline int
4f0c7cfb
BW
309__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
310 const char __user *cpu_vaddr,
8c59967c
DV
311 int length)
312{
313 int ret, cpu_offset = 0;
314
315 while (length > 0) {
316 int cacheline_end = ALIGN(gpu_offset + 1, 64);
317 int this_length = min(cacheline_end - gpu_offset, length);
318 int swizzled_gpu_offset = gpu_offset ^ 64;
319
320 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
321 cpu_vaddr + cpu_offset,
322 this_length);
323 if (ret)
324 return ret + length;
325
326 cpu_offset += this_length;
327 gpu_offset += this_length;
328 length -= this_length;
329 }
330
331 return 0;
332}
333
d174bd64
DV
334/* Per-page copy function for the shmem pread fastpath.
335 * Flushes invalid cachelines before reading the target if
336 * needs_clflush is set. */
eb01459f 337static int
d174bd64
DV
338shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
339 char __user *user_data,
340 bool page_do_bit17_swizzling, bool needs_clflush)
341{
342 char *vaddr;
343 int ret;
344
e7e58eb5 345 if (unlikely(page_do_bit17_swizzling))
d174bd64
DV
346 return -EINVAL;
347
348 vaddr = kmap_atomic(page);
349 if (needs_clflush)
350 drm_clflush_virt_range(vaddr + shmem_page_offset,
351 page_length);
352 ret = __copy_to_user_inatomic(user_data,
353 vaddr + shmem_page_offset,
354 page_length);
355 kunmap_atomic(vaddr);
356
f60d7f0c 357 return ret ? -EFAULT : 0;
d174bd64
DV
358}
359
23c18c71
DV
360static void
361shmem_clflush_swizzled_range(char *addr, unsigned long length,
362 bool swizzled)
363{
e7e58eb5 364 if (unlikely(swizzled)) {
23c18c71
DV
365 unsigned long start = (unsigned long) addr;
366 unsigned long end = (unsigned long) addr + length;
367
368 /* For swizzling simply ensure that we always flush both
369 * channels. Lame, but simple and it works. Swizzled
370 * pwrite/pread is far from a hotpath - current userspace
371 * doesn't use it at all. */
372 start = round_down(start, 128);
373 end = round_up(end, 128);
374
375 drm_clflush_virt_range((void *)start, end - start);
376 } else {
377 drm_clflush_virt_range(addr, length);
378 }
379
380}
381
d174bd64
DV
382/* Only difference to the fast-path function is that this can handle bit17
383 * and uses non-atomic copy and kmap functions. */
384static int
385shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
386 char __user *user_data,
387 bool page_do_bit17_swizzling, bool needs_clflush)
388{
389 char *vaddr;
390 int ret;
391
392 vaddr = kmap(page);
393 if (needs_clflush)
23c18c71
DV
394 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
395 page_length,
396 page_do_bit17_swizzling);
d174bd64
DV
397
398 if (page_do_bit17_swizzling)
399 ret = __copy_to_user_swizzled(user_data,
400 vaddr, shmem_page_offset,
401 page_length);
402 else
403 ret = __copy_to_user(user_data,
404 vaddr + shmem_page_offset,
405 page_length);
406 kunmap(page);
407
f60d7f0c 408 return ret ? - EFAULT : 0;
d174bd64
DV
409}
410
eb01459f 411static int
dbf7bff0
DV
412i915_gem_shmem_pread(struct drm_device *dev,
413 struct drm_i915_gem_object *obj,
414 struct drm_i915_gem_pread *args,
415 struct drm_file *file)
eb01459f 416{
8461d226 417 char __user *user_data;
eb01459f 418 ssize_t remain;
8461d226 419 loff_t offset;
eb2c0c81 420 int shmem_page_offset, page_length, ret = 0;
8461d226 421 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
96d79b52 422 int prefaulted = 0;
8489731c 423 int needs_clflush = 0;
9da3da66
CW
424 struct scatterlist *sg;
425 int i;
eb01459f 426
8461d226 427 user_data = (char __user *) (uintptr_t) args->data_ptr;
eb01459f
EA
428 remain = args->size;
429
8461d226 430 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 431
8489731c
DV
432 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
433 /* If we're not in the cpu read domain, set ourself into the gtt
434 * read domain and manually flush cachelines (if required). This
435 * optimizes for the case when the gpu will dirty the data
436 * anyway again before the next pread happens. */
437 if (obj->cache_level == I915_CACHE_NONE)
438 needs_clflush = 1;
6c085a72
CW
439 if (obj->gtt_space) {
440 ret = i915_gem_object_set_to_gtt_domain(obj, false);
441 if (ret)
442 return ret;
443 }
8489731c 444 }
eb01459f 445
f60d7f0c
CW
446 ret = i915_gem_object_get_pages(obj);
447 if (ret)
448 return ret;
449
450 i915_gem_object_pin_pages(obj);
451
8461d226 452 offset = args->offset;
eb01459f 453
9da3da66 454 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
e5281ccd
CW
455 struct page *page;
456
9da3da66
CW
457 if (i < offset >> PAGE_SHIFT)
458 continue;
459
460 if (remain <= 0)
461 break;
462
eb01459f
EA
463 /* Operation in this page
464 *
eb01459f 465 * shmem_page_offset = offset within page in shmem file
eb01459f
EA
466 * page_length = bytes to copy for this page
467 */
c8cbbb8b 468 shmem_page_offset = offset_in_page(offset);
eb01459f
EA
469 page_length = remain;
470 if ((shmem_page_offset + page_length) > PAGE_SIZE)
471 page_length = PAGE_SIZE - shmem_page_offset;
eb01459f 472
9da3da66 473 page = sg_page(sg);
8461d226
DV
474 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
475 (page_to_phys(page) & (1 << 17)) != 0;
476
d174bd64
DV
477 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
478 user_data, page_do_bit17_swizzling,
479 needs_clflush);
480 if (ret == 0)
481 goto next_page;
dbf7bff0 482
dbf7bff0
DV
483 mutex_unlock(&dev->struct_mutex);
484
96d79b52 485 if (!prefaulted) {
f56f821f 486 ret = fault_in_multipages_writeable(user_data, remain);
96d79b52
DV
487 /* Userspace is tricking us, but we've already clobbered
488 * its pages with the prefault and promised to write the
489 * data up to the first fault. Hence ignore any errors
490 * and just continue. */
491 (void)ret;
492 prefaulted = 1;
493 }
eb01459f 494
d174bd64
DV
495 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
496 user_data, page_do_bit17_swizzling,
497 needs_clflush);
eb01459f 498
dbf7bff0 499 mutex_lock(&dev->struct_mutex);
f60d7f0c 500
dbf7bff0 501next_page:
e5281ccd 502 mark_page_accessed(page);
e5281ccd 503
f60d7f0c 504 if (ret)
8461d226 505 goto out;
8461d226 506
eb01459f 507 remain -= page_length;
8461d226 508 user_data += page_length;
eb01459f
EA
509 offset += page_length;
510 }
511
4f27b75d 512out:
f60d7f0c
CW
513 i915_gem_object_unpin_pages(obj);
514
eb01459f
EA
515 return ret;
516}
517
673a394b
EA
518/**
519 * Reads data from the object referenced by handle.
520 *
521 * On error, the contents of *data are undefined.
522 */
523int
524i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 525 struct drm_file *file)
673a394b
EA
526{
527 struct drm_i915_gem_pread *args = data;
05394f39 528 struct drm_i915_gem_object *obj;
35b62a89 529 int ret = 0;
673a394b 530
51311d0a
CW
531 if (args->size == 0)
532 return 0;
533
534 if (!access_ok(VERIFY_WRITE,
535 (char __user *)(uintptr_t)args->data_ptr,
536 args->size))
537 return -EFAULT;
538
4f27b75d 539 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 540 if (ret)
4f27b75d 541 return ret;
673a394b 542
05394f39 543 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 544 if (&obj->base == NULL) {
1d7cfea1
CW
545 ret = -ENOENT;
546 goto unlock;
4f27b75d 547 }
673a394b 548
7dcd2499 549 /* Bounds check source. */
05394f39
CW
550 if (args->offset > obj->base.size ||
551 args->size > obj->base.size - args->offset) {
ce9d419d 552 ret = -EINVAL;
35b62a89 553 goto out;
ce9d419d
CW
554 }
555
1286ff73
DV
556 /* prime objects have no backing filp to GEM pread/pwrite
557 * pages from.
558 */
559 if (!obj->base.filp) {
560 ret = -EINVAL;
561 goto out;
562 }
563
db53a302
CW
564 trace_i915_gem_object_pread(obj, args->offset, args->size);
565
dbf7bff0 566 ret = i915_gem_shmem_pread(dev, obj, args, file);
673a394b 567
35b62a89 568out:
05394f39 569 drm_gem_object_unreference(&obj->base);
1d7cfea1 570unlock:
4f27b75d 571 mutex_unlock(&dev->struct_mutex);
eb01459f 572 return ret;
673a394b
EA
573}
574
0839ccb8
KP
575/* This is the fast write path which cannot handle
576 * page faults in the source data
9b7530cc 577 */
0839ccb8
KP
578
579static inline int
580fast_user_write(struct io_mapping *mapping,
581 loff_t page_base, int page_offset,
582 char __user *user_data,
583 int length)
9b7530cc 584{
4f0c7cfb
BW
585 void __iomem *vaddr_atomic;
586 void *vaddr;
0839ccb8 587 unsigned long unwritten;
9b7530cc 588
3e4d3af5 589 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
4f0c7cfb
BW
590 /* We can use the cpu mem copy function because this is X86. */
591 vaddr = (void __force*)vaddr_atomic + page_offset;
592 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 593 user_data, length);
3e4d3af5 594 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 595 return unwritten;
0839ccb8
KP
596}
597
3de09aa3
EA
598/**
599 * This is the fast pwrite path, where we copy the data directly from the
600 * user into the GTT, uncached.
601 */
673a394b 602static int
05394f39
CW
603i915_gem_gtt_pwrite_fast(struct drm_device *dev,
604 struct drm_i915_gem_object *obj,
3de09aa3 605 struct drm_i915_gem_pwrite *args,
05394f39 606 struct drm_file *file)
673a394b 607{
0839ccb8 608 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 609 ssize_t remain;
0839ccb8 610 loff_t offset, page_base;
673a394b 611 char __user *user_data;
935aaa69
DV
612 int page_offset, page_length, ret;
613
86a1ee26 614 ret = i915_gem_object_pin(obj, 0, true, true);
935aaa69
DV
615 if (ret)
616 goto out;
617
618 ret = i915_gem_object_set_to_gtt_domain(obj, true);
619 if (ret)
620 goto out_unpin;
621
622 ret = i915_gem_object_put_fence(obj);
623 if (ret)
624 goto out_unpin;
673a394b
EA
625
626 user_data = (char __user *) (uintptr_t) args->data_ptr;
627 remain = args->size;
673a394b 628
05394f39 629 offset = obj->gtt_offset + args->offset;
673a394b
EA
630
631 while (remain > 0) {
632 /* Operation in this page
633 *
0839ccb8
KP
634 * page_base = page offset within aperture
635 * page_offset = offset within page
636 * page_length = bytes to copy for this page
673a394b 637 */
c8cbbb8b
CW
638 page_base = offset & PAGE_MASK;
639 page_offset = offset_in_page(offset);
0839ccb8
KP
640 page_length = remain;
641 if ((page_offset + remain) > PAGE_SIZE)
642 page_length = PAGE_SIZE - page_offset;
643
0839ccb8 644 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
645 * source page isn't available. Return the error and we'll
646 * retry in the slow path.
0839ccb8 647 */
fbd5a26d 648 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
935aaa69
DV
649 page_offset, user_data, page_length)) {
650 ret = -EFAULT;
651 goto out_unpin;
652 }
673a394b 653
0839ccb8
KP
654 remain -= page_length;
655 user_data += page_length;
656 offset += page_length;
673a394b 657 }
673a394b 658
935aaa69
DV
659out_unpin:
660 i915_gem_object_unpin(obj);
661out:
3de09aa3 662 return ret;
673a394b
EA
663}
664
d174bd64
DV
665/* Per-page copy function for the shmem pwrite fastpath.
666 * Flushes invalid cachelines before writing to the target if
667 * needs_clflush_before is set and flushes out any written cachelines after
668 * writing if needs_clflush is set. */
3043c60c 669static int
d174bd64
DV
670shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
671 char __user *user_data,
672 bool page_do_bit17_swizzling,
673 bool needs_clflush_before,
674 bool needs_clflush_after)
673a394b 675{
d174bd64 676 char *vaddr;
673a394b 677 int ret;
3de09aa3 678
e7e58eb5 679 if (unlikely(page_do_bit17_swizzling))
d174bd64 680 return -EINVAL;
3de09aa3 681
d174bd64
DV
682 vaddr = kmap_atomic(page);
683 if (needs_clflush_before)
684 drm_clflush_virt_range(vaddr + shmem_page_offset,
685 page_length);
686 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
687 user_data,
688 page_length);
689 if (needs_clflush_after)
690 drm_clflush_virt_range(vaddr + shmem_page_offset,
691 page_length);
692 kunmap_atomic(vaddr);
3de09aa3 693
755d2218 694 return ret ? -EFAULT : 0;
3de09aa3
EA
695}
696
d174bd64
DV
697/* Only difference to the fast-path function is that this can handle bit17
698 * and uses non-atomic copy and kmap functions. */
3043c60c 699static int
d174bd64
DV
700shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
701 char __user *user_data,
702 bool page_do_bit17_swizzling,
703 bool needs_clflush_before,
704 bool needs_clflush_after)
673a394b 705{
d174bd64
DV
706 char *vaddr;
707 int ret;
e5281ccd 708
d174bd64 709 vaddr = kmap(page);
e7e58eb5 710 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
23c18c71
DV
711 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
712 page_length,
713 page_do_bit17_swizzling);
d174bd64
DV
714 if (page_do_bit17_swizzling)
715 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
e5281ccd
CW
716 user_data,
717 page_length);
d174bd64
DV
718 else
719 ret = __copy_from_user(vaddr + shmem_page_offset,
720 user_data,
721 page_length);
722 if (needs_clflush_after)
23c18c71
DV
723 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
724 page_length,
725 page_do_bit17_swizzling);
d174bd64 726 kunmap(page);
40123c1f 727
755d2218 728 return ret ? -EFAULT : 0;
40123c1f
EA
729}
730
40123c1f 731static int
e244a443
DV
732i915_gem_shmem_pwrite(struct drm_device *dev,
733 struct drm_i915_gem_object *obj,
734 struct drm_i915_gem_pwrite *args,
735 struct drm_file *file)
40123c1f 736{
40123c1f 737 ssize_t remain;
8c59967c
DV
738 loff_t offset;
739 char __user *user_data;
eb2c0c81 740 int shmem_page_offset, page_length, ret = 0;
8c59967c 741 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
e244a443 742 int hit_slowpath = 0;
58642885
DV
743 int needs_clflush_after = 0;
744 int needs_clflush_before = 0;
9da3da66
CW
745 int i;
746 struct scatterlist *sg;
40123c1f 747
8c59967c 748 user_data = (char __user *) (uintptr_t) args->data_ptr;
40123c1f
EA
749 remain = args->size;
750
8c59967c 751 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 752
58642885
DV
753 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
754 /* If we're not in the cpu write domain, set ourself into the gtt
755 * write domain and manually flush cachelines (if required). This
756 * optimizes for the case when the gpu will use the data
757 * right away and we therefore have to clflush anyway. */
758 if (obj->cache_level == I915_CACHE_NONE)
759 needs_clflush_after = 1;
6c085a72
CW
760 if (obj->gtt_space) {
761 ret = i915_gem_object_set_to_gtt_domain(obj, true);
762 if (ret)
763 return ret;
764 }
58642885
DV
765 }
766 /* Same trick applies for invalidate partially written cachelines before
767 * writing. */
768 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
769 && obj->cache_level == I915_CACHE_NONE)
770 needs_clflush_before = 1;
771
755d2218
CW
772 ret = i915_gem_object_get_pages(obj);
773 if (ret)
774 return ret;
775
776 i915_gem_object_pin_pages(obj);
777
673a394b 778 offset = args->offset;
05394f39 779 obj->dirty = 1;
673a394b 780
9da3da66 781 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
e5281ccd 782 struct page *page;
58642885 783 int partial_cacheline_write;
e5281ccd 784
9da3da66
CW
785 if (i < offset >> PAGE_SHIFT)
786 continue;
787
788 if (remain <= 0)
789 break;
790
40123c1f
EA
791 /* Operation in this page
792 *
40123c1f 793 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
794 * page_length = bytes to copy for this page
795 */
c8cbbb8b 796 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
797
798 page_length = remain;
799 if ((shmem_page_offset + page_length) > PAGE_SIZE)
800 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 801
58642885
DV
802 /* If we don't overwrite a cacheline completely we need to be
803 * careful to have up-to-date data by first clflushing. Don't
804 * overcomplicate things and flush the entire patch. */
805 partial_cacheline_write = needs_clflush_before &&
806 ((shmem_page_offset | page_length)
807 & (boot_cpu_data.x86_clflush_size - 1));
808
9da3da66 809 page = sg_page(sg);
8c59967c
DV
810 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
811 (page_to_phys(page) & (1 << 17)) != 0;
812
d174bd64
DV
813 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
814 user_data, page_do_bit17_swizzling,
815 partial_cacheline_write,
816 needs_clflush_after);
817 if (ret == 0)
818 goto next_page;
e244a443
DV
819
820 hit_slowpath = 1;
e244a443 821 mutex_unlock(&dev->struct_mutex);
d174bd64
DV
822 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
823 user_data, page_do_bit17_swizzling,
824 partial_cacheline_write,
825 needs_clflush_after);
40123c1f 826
e244a443 827 mutex_lock(&dev->struct_mutex);
755d2218 828
e244a443 829next_page:
e5281ccd
CW
830 set_page_dirty(page);
831 mark_page_accessed(page);
e5281ccd 832
755d2218 833 if (ret)
8c59967c 834 goto out;
8c59967c 835
40123c1f 836 remain -= page_length;
8c59967c 837 user_data += page_length;
40123c1f 838 offset += page_length;
673a394b
EA
839 }
840
fbd5a26d 841out:
755d2218
CW
842 i915_gem_object_unpin_pages(obj);
843
e244a443 844 if (hit_slowpath) {
8dcf015e
DV
845 /*
846 * Fixup: Flush cpu caches in case we didn't flush the dirty
847 * cachelines in-line while writing and the object moved
848 * out of the cpu write domain while we've dropped the lock.
849 */
850 if (!needs_clflush_after &&
851 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
e244a443 852 i915_gem_clflush_object(obj);
e76e9aeb 853 i915_gem_chipset_flush(dev);
e244a443 854 }
8c59967c 855 }
673a394b 856
58642885 857 if (needs_clflush_after)
e76e9aeb 858 i915_gem_chipset_flush(dev);
58642885 859
40123c1f 860 return ret;
673a394b
EA
861}
862
863/**
864 * Writes data to the object referenced by handle.
865 *
866 * On error, the contents of the buffer that were to be modified are undefined.
867 */
868int
869i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 870 struct drm_file *file)
673a394b
EA
871{
872 struct drm_i915_gem_pwrite *args = data;
05394f39 873 struct drm_i915_gem_object *obj;
51311d0a
CW
874 int ret;
875
876 if (args->size == 0)
877 return 0;
878
879 if (!access_ok(VERIFY_READ,
880 (char __user *)(uintptr_t)args->data_ptr,
881 args->size))
882 return -EFAULT;
883
f56f821f
DV
884 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
885 args->size);
51311d0a
CW
886 if (ret)
887 return -EFAULT;
673a394b 888
fbd5a26d 889 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 890 if (ret)
fbd5a26d 891 return ret;
1d7cfea1 892
05394f39 893 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 894 if (&obj->base == NULL) {
1d7cfea1
CW
895 ret = -ENOENT;
896 goto unlock;
fbd5a26d 897 }
673a394b 898
7dcd2499 899 /* Bounds check destination. */
05394f39
CW
900 if (args->offset > obj->base.size ||
901 args->size > obj->base.size - args->offset) {
ce9d419d 902 ret = -EINVAL;
35b62a89 903 goto out;
ce9d419d
CW
904 }
905
1286ff73
DV
906 /* prime objects have no backing filp to GEM pread/pwrite
907 * pages from.
908 */
909 if (!obj->base.filp) {
910 ret = -EINVAL;
911 goto out;
912 }
913
db53a302
CW
914 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
915
935aaa69 916 ret = -EFAULT;
673a394b
EA
917 /* We can only do the GTT pwrite on untiled buffers, as otherwise
918 * it would end up going through the fenced access, and we'll get
919 * different detiling behavior between reading and writing.
920 * pread/pwrite currently are reading and writing from the CPU
921 * perspective, requiring manual detiling by the client.
922 */
5c0480f2 923 if (obj->phys_obj) {
fbd5a26d 924 ret = i915_gem_phys_pwrite(dev, obj, args, file);
5c0480f2
DV
925 goto out;
926 }
927
86a1ee26 928 if (obj->cache_level == I915_CACHE_NONE &&
c07496fa 929 obj->tiling_mode == I915_TILING_NONE &&
5c0480f2 930 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
fbd5a26d 931 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
935aaa69
DV
932 /* Note that the gtt paths might fail with non-page-backed user
933 * pointers (e.g. gtt mappings when moving data between
934 * textures). Fallback to the shmem path in that case. */
fbd5a26d 935 }
673a394b 936
86a1ee26 937 if (ret == -EFAULT || ret == -ENOSPC)
935aaa69 938 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
5c0480f2 939
35b62a89 940out:
05394f39 941 drm_gem_object_unreference(&obj->base);
1d7cfea1 942unlock:
fbd5a26d 943 mutex_unlock(&dev->struct_mutex);
673a394b
EA
944 return ret;
945}
946
b361237b
CW
947int
948i915_gem_check_wedge(struct drm_i915_private *dev_priv,
949 bool interruptible)
950{
951 if (atomic_read(&dev_priv->mm.wedged)) {
952 struct completion *x = &dev_priv->error_completion;
953 bool recovery_complete;
954 unsigned long flags;
955
956 /* Give the error handler a chance to run. */
957 spin_lock_irqsave(&x->wait.lock, flags);
958 recovery_complete = x->done > 0;
959 spin_unlock_irqrestore(&x->wait.lock, flags);
960
961 /* Non-interruptible callers can't handle -EAGAIN, hence return
962 * -EIO unconditionally for these. */
963 if (!interruptible)
964 return -EIO;
965
966 /* Recovery complete, but still wedged means reset failure. */
967 if (recovery_complete)
968 return -EIO;
969
970 return -EAGAIN;
971 }
972
973 return 0;
974}
975
976/*
977 * Compare seqno against outstanding lazy request. Emit a request if they are
978 * equal.
979 */
980static int
981i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
982{
983 int ret;
984
985 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
986
987 ret = 0;
988 if (seqno == ring->outstanding_lazy_request)
989 ret = i915_add_request(ring, NULL, NULL);
990
991 return ret;
992}
993
994/**
995 * __wait_seqno - wait until execution of seqno has finished
996 * @ring: the ring expected to report seqno
997 * @seqno: duh!
998 * @interruptible: do an interruptible wait (normally yes)
999 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1000 *
1001 * Returns 0 if the seqno was found within the alloted time. Else returns the
1002 * errno with remaining time filled in timeout argument.
1003 */
1004static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1005 bool interruptible, struct timespec *timeout)
1006{
1007 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1008 struct timespec before, now, wait_time={1,0};
1009 unsigned long timeout_jiffies;
1010 long end;
1011 bool wait_forever = true;
1012 int ret;
1013
1014 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1015 return 0;
1016
1017 trace_i915_gem_request_wait_begin(ring, seqno);
1018
1019 if (timeout != NULL) {
1020 wait_time = *timeout;
1021 wait_forever = false;
1022 }
1023
1024 timeout_jiffies = timespec_to_jiffies(&wait_time);
1025
1026 if (WARN_ON(!ring->irq_get(ring)))
1027 return -ENODEV;
1028
1029 /* Record current time in case interrupted by signal, or wedged * */
1030 getrawmonotonic(&before);
1031
1032#define EXIT_COND \
1033 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
1034 atomic_read(&dev_priv->mm.wedged))
1035 do {
1036 if (interruptible)
1037 end = wait_event_interruptible_timeout(ring->irq_queue,
1038 EXIT_COND,
1039 timeout_jiffies);
1040 else
1041 end = wait_event_timeout(ring->irq_queue, EXIT_COND,
1042 timeout_jiffies);
1043
1044 ret = i915_gem_check_wedge(dev_priv, interruptible);
1045 if (ret)
1046 end = ret;
1047 } while (end == 0 && wait_forever);
1048
1049 getrawmonotonic(&now);
1050
1051 ring->irq_put(ring);
1052 trace_i915_gem_request_wait_end(ring, seqno);
1053#undef EXIT_COND
1054
1055 if (timeout) {
1056 struct timespec sleep_time = timespec_sub(now, before);
1057 *timeout = timespec_sub(*timeout, sleep_time);
1058 }
1059
1060 switch (end) {
1061 case -EIO:
1062 case -EAGAIN: /* Wedged */
1063 case -ERESTARTSYS: /* Signal */
1064 return (int)end;
1065 case 0: /* Timeout */
1066 if (timeout)
1067 set_normalized_timespec(timeout, 0, 0);
1068 return -ETIME;
1069 default: /* Completed */
1070 WARN_ON(end < 0); /* We're not aware of other errors */
1071 return 0;
1072 }
1073}
1074
1075/**
1076 * Waits for a sequence number to be signaled, and cleans up the
1077 * request and object lists appropriately for that event.
1078 */
1079int
1080i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1081{
1082 struct drm_device *dev = ring->dev;
1083 struct drm_i915_private *dev_priv = dev->dev_private;
1084 bool interruptible = dev_priv->mm.interruptible;
1085 int ret;
1086
1087 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1088 BUG_ON(seqno == 0);
1089
1090 ret = i915_gem_check_wedge(dev_priv, interruptible);
1091 if (ret)
1092 return ret;
1093
1094 ret = i915_gem_check_olr(ring, seqno);
1095 if (ret)
1096 return ret;
1097
1098 return __wait_seqno(ring, seqno, interruptible, NULL);
1099}
1100
1101/**
1102 * Ensures that all rendering to the object has completed and the object is
1103 * safe to unbind from the GTT or access from the CPU.
1104 */
1105static __must_check int
1106i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1107 bool readonly)
1108{
1109 struct intel_ring_buffer *ring = obj->ring;
1110 u32 seqno;
1111 int ret;
1112
1113 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1114 if (seqno == 0)
1115 return 0;
1116
1117 ret = i915_wait_seqno(ring, seqno);
1118 if (ret)
1119 return ret;
1120
1121 i915_gem_retire_requests_ring(ring);
1122
1123 /* Manually manage the write flush as we may have not yet
1124 * retired the buffer.
1125 */
1126 if (obj->last_write_seqno &&
1127 i915_seqno_passed(seqno, obj->last_write_seqno)) {
1128 obj->last_write_seqno = 0;
1129 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1130 }
1131
1132 return 0;
1133}
1134
3236f57a
CW
1135/* A nonblocking variant of the above wait. This is a highly dangerous routine
1136 * as the object state may change during this call.
1137 */
1138static __must_check int
1139i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1140 bool readonly)
1141{
1142 struct drm_device *dev = obj->base.dev;
1143 struct drm_i915_private *dev_priv = dev->dev_private;
1144 struct intel_ring_buffer *ring = obj->ring;
1145 u32 seqno;
1146 int ret;
1147
1148 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1149 BUG_ON(!dev_priv->mm.interruptible);
1150
1151 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1152 if (seqno == 0)
1153 return 0;
1154
1155 ret = i915_gem_check_wedge(dev_priv, true);
1156 if (ret)
1157 return ret;
1158
1159 ret = i915_gem_check_olr(ring, seqno);
1160 if (ret)
1161 return ret;
1162
1163 mutex_unlock(&dev->struct_mutex);
1164 ret = __wait_seqno(ring, seqno, true, NULL);
1165 mutex_lock(&dev->struct_mutex);
1166
1167 i915_gem_retire_requests_ring(ring);
1168
1169 /* Manually manage the write flush as we may have not yet
1170 * retired the buffer.
1171 */
1172 if (obj->last_write_seqno &&
1173 i915_seqno_passed(seqno, obj->last_write_seqno)) {
1174 obj->last_write_seqno = 0;
1175 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1176 }
1177
1178 return ret;
1179}
1180
673a394b 1181/**
2ef7eeaa
EA
1182 * Called when user space prepares to use an object with the CPU, either
1183 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
1184 */
1185int
1186i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1187 struct drm_file *file)
673a394b
EA
1188{
1189 struct drm_i915_gem_set_domain *args = data;
05394f39 1190 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1191 uint32_t read_domains = args->read_domains;
1192 uint32_t write_domain = args->write_domain;
673a394b
EA
1193 int ret;
1194
2ef7eeaa 1195 /* Only handle setting domains to types used by the CPU. */
21d509e3 1196 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1197 return -EINVAL;
1198
21d509e3 1199 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1200 return -EINVAL;
1201
1202 /* Having something in the write domain implies it's in the read
1203 * domain, and only that read domain. Enforce that in the request.
1204 */
1205 if (write_domain != 0 && read_domains != write_domain)
1206 return -EINVAL;
1207
76c1dec1 1208 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1209 if (ret)
76c1dec1 1210 return ret;
1d7cfea1 1211
05394f39 1212 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 1213 if (&obj->base == NULL) {
1d7cfea1
CW
1214 ret = -ENOENT;
1215 goto unlock;
76c1dec1 1216 }
673a394b 1217
3236f57a
CW
1218 /* Try to flush the object off the GPU without holding the lock.
1219 * We will repeat the flush holding the lock in the normal manner
1220 * to catch cases where we are gazumped.
1221 */
1222 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
1223 if (ret)
1224 goto unref;
1225
2ef7eeaa
EA
1226 if (read_domains & I915_GEM_DOMAIN_GTT) {
1227 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392
EA
1228
1229 /* Silently promote "you're not bound, there was nothing to do"
1230 * to success, since the client was just asking us to
1231 * make sure everything was done.
1232 */
1233 if (ret == -EINVAL)
1234 ret = 0;
2ef7eeaa 1235 } else {
e47c68e9 1236 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
1237 }
1238
3236f57a 1239unref:
05394f39 1240 drm_gem_object_unreference(&obj->base);
1d7cfea1 1241unlock:
673a394b
EA
1242 mutex_unlock(&dev->struct_mutex);
1243 return ret;
1244}
1245
1246/**
1247 * Called when user space has done writes to this buffer
1248 */
1249int
1250i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1251 struct drm_file *file)
673a394b
EA
1252{
1253 struct drm_i915_gem_sw_finish *args = data;
05394f39 1254 struct drm_i915_gem_object *obj;
673a394b
EA
1255 int ret = 0;
1256
76c1dec1 1257 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1258 if (ret)
76c1dec1 1259 return ret;
1d7cfea1 1260
05394f39 1261 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 1262 if (&obj->base == NULL) {
1d7cfea1
CW
1263 ret = -ENOENT;
1264 goto unlock;
673a394b
EA
1265 }
1266
673a394b 1267 /* Pinned buffers may be scanout, so flush the cache */
05394f39 1268 if (obj->pin_count)
e47c68e9
EA
1269 i915_gem_object_flush_cpu_write_domain(obj);
1270
05394f39 1271 drm_gem_object_unreference(&obj->base);
1d7cfea1 1272unlock:
673a394b
EA
1273 mutex_unlock(&dev->struct_mutex);
1274 return ret;
1275}
1276
1277/**
1278 * Maps the contents of an object, returning the address it is mapped
1279 * into.
1280 *
1281 * While the mapping holds a reference on the contents of the object, it doesn't
1282 * imply a ref on the object itself.
1283 */
1284int
1285i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1286 struct drm_file *file)
673a394b
EA
1287{
1288 struct drm_i915_gem_mmap *args = data;
1289 struct drm_gem_object *obj;
673a394b
EA
1290 unsigned long addr;
1291
05394f39 1292 obj = drm_gem_object_lookup(dev, file, args->handle);
673a394b 1293 if (obj == NULL)
bf79cb91 1294 return -ENOENT;
673a394b 1295
1286ff73
DV
1296 /* prime objects have no backing filp to GEM mmap
1297 * pages from.
1298 */
1299 if (!obj->filp) {
1300 drm_gem_object_unreference_unlocked(obj);
1301 return -EINVAL;
1302 }
1303
6be5ceb0 1304 addr = vm_mmap(obj->filp, 0, args->size,
673a394b
EA
1305 PROT_READ | PROT_WRITE, MAP_SHARED,
1306 args->offset);
bc9025bd 1307 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1308 if (IS_ERR((void *)addr))
1309 return addr;
1310
1311 args->addr_ptr = (uint64_t) addr;
1312
1313 return 0;
1314}
1315
de151cf6
JB
1316/**
1317 * i915_gem_fault - fault a page into the GTT
1318 * vma: VMA in question
1319 * vmf: fault info
1320 *
1321 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1322 * from userspace. The fault handler takes care of binding the object to
1323 * the GTT (if needed), allocating and programming a fence register (again,
1324 * only if needed based on whether the old reg is still valid or the object
1325 * is tiled) and inserting a new PTE into the faulting process.
1326 *
1327 * Note that the faulting process may involve evicting existing objects
1328 * from the GTT and/or fence registers to make room. So performance may
1329 * suffer if the GTT working set is large or there are few fence registers
1330 * left.
1331 */
1332int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1333{
05394f39
CW
1334 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1335 struct drm_device *dev = obj->base.dev;
7d1c4804 1336 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
1337 pgoff_t page_offset;
1338 unsigned long pfn;
1339 int ret = 0;
0f973f27 1340 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1341
1342 /* We don't use vmf->pgoff since that has the fake offset */
1343 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1344 PAGE_SHIFT;
1345
d9bc7e9f
CW
1346 ret = i915_mutex_lock_interruptible(dev);
1347 if (ret)
1348 goto out;
a00b10c3 1349
db53a302
CW
1350 trace_i915_gem_object_fault(obj, page_offset, true, write);
1351
d9bc7e9f 1352 /* Now bind it into the GTT if needed */
c9839303
CW
1353 ret = i915_gem_object_pin(obj, 0, true, false);
1354 if (ret)
1355 goto unlock;
4a684a41 1356
c9839303
CW
1357 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1358 if (ret)
1359 goto unpin;
74898d7e 1360
06d98131 1361 ret = i915_gem_object_get_fence(obj);
d9e86c0e 1362 if (ret)
c9839303 1363 goto unpin;
7d1c4804 1364
6299f992
CW
1365 obj->fault_mappable = true;
1366
dd2757f8 1367 pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) +
de151cf6
JB
1368 page_offset;
1369
1370 /* Finally, remap it using the new GTT offset */
1371 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
c9839303
CW
1372unpin:
1373 i915_gem_object_unpin(obj);
c715089f 1374unlock:
de151cf6 1375 mutex_unlock(&dev->struct_mutex);
d9bc7e9f 1376out:
de151cf6 1377 switch (ret) {
d9bc7e9f 1378 case -EIO:
a9340cca
DV
1379 /* If this -EIO is due to a gpu hang, give the reset code a
1380 * chance to clean up the mess. Otherwise return the proper
1381 * SIGBUS. */
1382 if (!atomic_read(&dev_priv->mm.wedged))
1383 return VM_FAULT_SIGBUS;
045e769a 1384 case -EAGAIN:
d9bc7e9f
CW
1385 /* Give the error handler a chance to run and move the
1386 * objects off the GPU active list. Next time we service the
1387 * fault, we should be able to transition the page into the
1388 * GTT without touching the GPU (and so avoid further
1389 * EIO/EGAIN). If the GPU is wedged, then there is no issue
1390 * with coherency, just lost writes.
1391 */
045e769a 1392 set_need_resched();
c715089f
CW
1393 case 0:
1394 case -ERESTARTSYS:
bed636ab 1395 case -EINTR:
e79e0fe3
DR
1396 case -EBUSY:
1397 /*
1398 * EBUSY is ok: this just means that another thread
1399 * already did the job.
1400 */
c715089f 1401 return VM_FAULT_NOPAGE;
de151cf6 1402 case -ENOMEM:
de151cf6 1403 return VM_FAULT_OOM;
a7c2e1aa
DV
1404 case -ENOSPC:
1405 return VM_FAULT_SIGBUS;
de151cf6 1406 default:
a7c2e1aa 1407 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
c715089f 1408 return VM_FAULT_SIGBUS;
de151cf6
JB
1409 }
1410}
1411
901782b2
CW
1412/**
1413 * i915_gem_release_mmap - remove physical page mappings
1414 * @obj: obj in question
1415 *
af901ca1 1416 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1417 * relinquish ownership of the pages back to the system.
1418 *
1419 * It is vital that we remove the page mapping if we have mapped a tiled
1420 * object through the GTT and then lose the fence register due to
1421 * resource pressure. Similarly if the object has been moved out of the
1422 * aperture, than pages mapped into userspace must be revoked. Removing the
1423 * mapping will then trigger a page fault on the next user access, allowing
1424 * fixup by i915_gem_fault().
1425 */
d05ca301 1426void
05394f39 1427i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1428{
6299f992
CW
1429 if (!obj->fault_mappable)
1430 return;
901782b2 1431
f6e47884
CW
1432 if (obj->base.dev->dev_mapping)
1433 unmap_mapping_range(obj->base.dev->dev_mapping,
1434 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1435 obj->base.size, 1);
fb7d516a 1436
6299f992 1437 obj->fault_mappable = false;
901782b2
CW
1438}
1439
92b88aeb 1440static uint32_t
e28f8711 1441i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
92b88aeb 1442{
e28f8711 1443 uint32_t gtt_size;
92b88aeb
CW
1444
1445 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711
CW
1446 tiling_mode == I915_TILING_NONE)
1447 return size;
92b88aeb
CW
1448
1449 /* Previous chips need a power-of-two fence region when tiling */
1450 if (INTEL_INFO(dev)->gen == 3)
e28f8711 1451 gtt_size = 1024*1024;
92b88aeb 1452 else
e28f8711 1453 gtt_size = 512*1024;
92b88aeb 1454
e28f8711
CW
1455 while (gtt_size < size)
1456 gtt_size <<= 1;
92b88aeb 1457
e28f8711 1458 return gtt_size;
92b88aeb
CW
1459}
1460
de151cf6
JB
1461/**
1462 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1463 * @obj: object to check
1464 *
1465 * Return the required GTT alignment for an object, taking into account
5e783301 1466 * potential fence register mapping.
de151cf6
JB
1467 */
1468static uint32_t
e28f8711
CW
1469i915_gem_get_gtt_alignment(struct drm_device *dev,
1470 uint32_t size,
1471 int tiling_mode)
de151cf6 1472{
de151cf6
JB
1473 /*
1474 * Minimum alignment is 4k (GTT page size), but might be greater
1475 * if a fence register is needed for the object.
1476 */
a00b10c3 1477 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711 1478 tiling_mode == I915_TILING_NONE)
de151cf6
JB
1479 return 4096;
1480
a00b10c3
CW
1481 /*
1482 * Previous chips need to be aligned to the size of the smallest
1483 * fence register that can contain the object.
1484 */
e28f8711 1485 return i915_gem_get_gtt_size(dev, size, tiling_mode);
a00b10c3
CW
1486}
1487
5e783301
DV
1488/**
1489 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1490 * unfenced object
e28f8711
CW
1491 * @dev: the device
1492 * @size: size of the object
1493 * @tiling_mode: tiling mode of the object
5e783301
DV
1494 *
1495 * Return the required GTT alignment for an object, only taking into account
1496 * unfenced tiled surface requirements.
1497 */
467cffba 1498uint32_t
e28f8711
CW
1499i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1500 uint32_t size,
1501 int tiling_mode)
5e783301 1502{
5e783301
DV
1503 /*
1504 * Minimum alignment is 4k (GTT page size) for sane hw.
1505 */
1506 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
e28f8711 1507 tiling_mode == I915_TILING_NONE)
5e783301
DV
1508 return 4096;
1509
e28f8711
CW
1510 /* Previous hardware however needs to be aligned to a power-of-two
1511 * tile height. The simplest method for determining this is to reuse
1512 * the power-of-tile object size.
5e783301 1513 */
e28f8711 1514 return i915_gem_get_gtt_size(dev, size, tiling_mode);
5e783301
DV
1515}
1516
d8cb5086
CW
1517static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1518{
1519 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1520 int ret;
1521
1522 if (obj->base.map_list.map)
1523 return 0;
1524
1525 ret = drm_gem_create_mmap_offset(&obj->base);
1526 if (ret != -ENOSPC)
1527 return ret;
1528
1529 /* Badly fragmented mmap space? The only way we can recover
1530 * space is by destroying unwanted objects. We can't randomly release
1531 * mmap_offsets as userspace expects them to be persistent for the
1532 * lifetime of the objects. The closest we can is to release the
1533 * offsets on purgeable objects by truncating it and marking it purged,
1534 * which prevents userspace from ever using that object again.
1535 */
1536 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
1537 ret = drm_gem_create_mmap_offset(&obj->base);
1538 if (ret != -ENOSPC)
1539 return ret;
1540
1541 i915_gem_shrink_all(dev_priv);
1542 return drm_gem_create_mmap_offset(&obj->base);
1543}
1544
1545static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1546{
1547 if (!obj->base.map_list.map)
1548 return;
1549
1550 drm_gem_free_mmap_offset(&obj->base);
1551}
1552
de151cf6 1553int
ff72145b
DA
1554i915_gem_mmap_gtt(struct drm_file *file,
1555 struct drm_device *dev,
1556 uint32_t handle,
1557 uint64_t *offset)
de151cf6 1558{
da761a6e 1559 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 1560 struct drm_i915_gem_object *obj;
de151cf6
JB
1561 int ret;
1562
76c1dec1 1563 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1564 if (ret)
76c1dec1 1565 return ret;
de151cf6 1566
ff72145b 1567 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
c8725226 1568 if (&obj->base == NULL) {
1d7cfea1
CW
1569 ret = -ENOENT;
1570 goto unlock;
1571 }
de151cf6 1572
05394f39 1573 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
da761a6e 1574 ret = -E2BIG;
ff56b0bc 1575 goto out;
da761a6e
CW
1576 }
1577
05394f39 1578 if (obj->madv != I915_MADV_WILLNEED) {
ab18282d 1579 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1d7cfea1
CW
1580 ret = -EINVAL;
1581 goto out;
ab18282d
CW
1582 }
1583
d8cb5086
CW
1584 ret = i915_gem_object_create_mmap_offset(obj);
1585 if (ret)
1586 goto out;
de151cf6 1587
ff72145b 1588 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
de151cf6 1589
1d7cfea1 1590out:
05394f39 1591 drm_gem_object_unreference(&obj->base);
1d7cfea1 1592unlock:
de151cf6 1593 mutex_unlock(&dev->struct_mutex);
1d7cfea1 1594 return ret;
de151cf6
JB
1595}
1596
ff72145b
DA
1597/**
1598 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1599 * @dev: DRM device
1600 * @data: GTT mapping ioctl data
1601 * @file: GEM object info
1602 *
1603 * Simply returns the fake offset to userspace so it can mmap it.
1604 * The mmap call will end up in drm_gem_mmap(), which will set things
1605 * up so we can get faults in the handler above.
1606 *
1607 * The fault handler will take care of binding the object into the GTT
1608 * (since it may have been evicted to make room for something), allocating
1609 * a fence register, and mapping the appropriate aperture address into
1610 * userspace.
1611 */
1612int
1613i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1614 struct drm_file *file)
1615{
1616 struct drm_i915_gem_mmap_gtt *args = data;
1617
ff72145b
DA
1618 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1619}
1620
225067ee
DV
1621/* Immediately discard the backing storage */
1622static void
1623i915_gem_object_truncate(struct drm_i915_gem_object *obj)
e5281ccd 1624{
e5281ccd 1625 struct inode *inode;
e5281ccd 1626
4d6294bf 1627 i915_gem_object_free_mmap_offset(obj);
1286ff73 1628
4d6294bf
CW
1629 if (obj->base.filp == NULL)
1630 return;
e5281ccd 1631
225067ee
DV
1632 /* Our goal here is to return as much of the memory as
1633 * is possible back to the system as we are called from OOM.
1634 * To do this we must instruct the shmfs to drop all of its
1635 * backing pages, *now*.
1636 */
05394f39 1637 inode = obj->base.filp->f_path.dentry->d_inode;
225067ee 1638 shmem_truncate_range(inode, 0, (loff_t)-1);
e5281ccd 1639
225067ee
DV
1640 obj->madv = __I915_MADV_PURGED;
1641}
e5281ccd 1642
225067ee
DV
1643static inline int
1644i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1645{
1646 return obj->madv == I915_MADV_DONTNEED;
e5281ccd
CW
1647}
1648
5cdf5881 1649static void
05394f39 1650i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 1651{
05394f39 1652 int page_count = obj->base.size / PAGE_SIZE;
9da3da66 1653 struct scatterlist *sg;
6c085a72 1654 int ret, i;
1286ff73 1655
05394f39 1656 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 1657
6c085a72
CW
1658 ret = i915_gem_object_set_to_cpu_domain(obj, true);
1659 if (ret) {
1660 /* In the event of a disaster, abandon all caches and
1661 * hope for the best.
1662 */
1663 WARN_ON(ret != -EIO);
1664 i915_gem_clflush_object(obj);
1665 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1666 }
1667
6dacfd2f 1668 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
1669 i915_gem_object_save_bit_17_swizzle(obj);
1670
05394f39
CW
1671 if (obj->madv == I915_MADV_DONTNEED)
1672 obj->dirty = 0;
3ef94daa 1673
9da3da66
CW
1674 for_each_sg(obj->pages->sgl, sg, page_count, i) {
1675 struct page *page = sg_page(sg);
1676
05394f39 1677 if (obj->dirty)
9da3da66 1678 set_page_dirty(page);
3ef94daa 1679
05394f39 1680 if (obj->madv == I915_MADV_WILLNEED)
9da3da66 1681 mark_page_accessed(page);
3ef94daa 1682
9da3da66 1683 page_cache_release(page);
3ef94daa 1684 }
05394f39 1685 obj->dirty = 0;
673a394b 1686
9da3da66
CW
1687 sg_free_table(obj->pages);
1688 kfree(obj->pages);
37e680a1 1689}
6c085a72 1690
37e680a1
CW
1691static int
1692i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1693{
1694 const struct drm_i915_gem_object_ops *ops = obj->ops;
1695
2f745ad3 1696 if (obj->pages == NULL)
37e680a1
CW
1697 return 0;
1698
1699 BUG_ON(obj->gtt_space);
6c085a72 1700
a5570178
CW
1701 if (obj->pages_pin_count)
1702 return -EBUSY;
1703
37e680a1 1704 ops->put_pages(obj);
05394f39 1705 obj->pages = NULL;
37e680a1
CW
1706
1707 list_del(&obj->gtt_list);
6c085a72
CW
1708 if (i915_gem_object_is_purgeable(obj))
1709 i915_gem_object_truncate(obj);
1710
1711 return 0;
1712}
1713
1714static long
1715i915_gem_purge(struct drm_i915_private *dev_priv, long target)
1716{
1717 struct drm_i915_gem_object *obj, *next;
1718 long count = 0;
1719
1720 list_for_each_entry_safe(obj, next,
1721 &dev_priv->mm.unbound_list,
1722 gtt_list) {
1723 if (i915_gem_object_is_purgeable(obj) &&
37e680a1 1724 i915_gem_object_put_pages(obj) == 0) {
6c085a72
CW
1725 count += obj->base.size >> PAGE_SHIFT;
1726 if (count >= target)
1727 return count;
1728 }
1729 }
1730
1731 list_for_each_entry_safe(obj, next,
1732 &dev_priv->mm.inactive_list,
1733 mm_list) {
1734 if (i915_gem_object_is_purgeable(obj) &&
1735 i915_gem_object_unbind(obj) == 0 &&
37e680a1 1736 i915_gem_object_put_pages(obj) == 0) {
6c085a72
CW
1737 count += obj->base.size >> PAGE_SHIFT;
1738 if (count >= target)
1739 return count;
1740 }
1741 }
1742
1743 return count;
1744}
1745
1746static void
1747i915_gem_shrink_all(struct drm_i915_private *dev_priv)
1748{
1749 struct drm_i915_gem_object *obj, *next;
1750
1751 i915_gem_evict_everything(dev_priv->dev);
1752
1753 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
37e680a1 1754 i915_gem_object_put_pages(obj);
225067ee
DV
1755}
1756
37e680a1 1757static int
6c085a72 1758i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
e5281ccd 1759{
6c085a72 1760 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
e5281ccd
CW
1761 int page_count, i;
1762 struct address_space *mapping;
9da3da66
CW
1763 struct sg_table *st;
1764 struct scatterlist *sg;
e5281ccd 1765 struct page *page;
6c085a72 1766 gfp_t gfp;
e5281ccd 1767
6c085a72
CW
1768 /* Assert that the object is not currently in any GPU domain. As it
1769 * wasn't in the GTT, there shouldn't be any way it could have been in
1770 * a GPU cache
1771 */
1772 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1773 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1774
9da3da66
CW
1775 st = kmalloc(sizeof(*st), GFP_KERNEL);
1776 if (st == NULL)
1777 return -ENOMEM;
1778
05394f39 1779 page_count = obj->base.size / PAGE_SIZE;
9da3da66
CW
1780 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1781 sg_free_table(st);
1782 kfree(st);
e5281ccd 1783 return -ENOMEM;
9da3da66 1784 }
e5281ccd 1785
9da3da66
CW
1786 /* Get the list of pages out of our struct file. They'll be pinned
1787 * at this point until we release them.
1788 *
1789 * Fail silently without starting the shrinker
1790 */
6c085a72
CW
1791 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
1792 gfp = mapping_gfp_mask(mapping);
d7c3b937 1793 gfp |= __GFP_NORETRY | __GFP_NOWARN;
6c085a72 1794 gfp &= ~(__GFP_IO | __GFP_WAIT);
9da3da66 1795 for_each_sg(st->sgl, sg, page_count, i) {
6c085a72
CW
1796 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1797 if (IS_ERR(page)) {
1798 i915_gem_purge(dev_priv, page_count);
1799 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1800 }
1801 if (IS_ERR(page)) {
1802 /* We've tried hard to allocate the memory by reaping
1803 * our own buffer, now let the real VM do its job and
1804 * go down in flames if truly OOM.
1805 */
d7c3b937 1806 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN);
6c085a72
CW
1807 gfp |= __GFP_IO | __GFP_WAIT;
1808
1809 i915_gem_shrink_all(dev_priv);
1810 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1811 if (IS_ERR(page))
1812 goto err_pages;
1813
d7c3b937 1814 gfp |= __GFP_NORETRY | __GFP_NOWARN;
6c085a72
CW
1815 gfp &= ~(__GFP_IO | __GFP_WAIT);
1816 }
e5281ccd 1817
9da3da66 1818 sg_set_page(sg, page, PAGE_SIZE, 0);
e5281ccd
CW
1819 }
1820
74ce6b6c
CW
1821 obj->pages = st;
1822
6dacfd2f 1823 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
1824 i915_gem_object_do_bit_17_swizzle(obj);
1825
1826 return 0;
1827
1828err_pages:
9da3da66
CW
1829 for_each_sg(st->sgl, sg, i, page_count)
1830 page_cache_release(sg_page(sg));
1831 sg_free_table(st);
1832 kfree(st);
e5281ccd 1833 return PTR_ERR(page);
673a394b
EA
1834}
1835
37e680a1
CW
1836/* Ensure that the associated pages are gathered from the backing storage
1837 * and pinned into our object. i915_gem_object_get_pages() may be called
1838 * multiple times before they are released by a single call to
1839 * i915_gem_object_put_pages() - once the pages are no longer referenced
1840 * either as a result of memory pressure (reaping pages under the shrinker)
1841 * or as the object is itself released.
1842 */
1843int
1844i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
1845{
1846 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1847 const struct drm_i915_gem_object_ops *ops = obj->ops;
1848 int ret;
1849
2f745ad3 1850 if (obj->pages)
37e680a1
CW
1851 return 0;
1852
a5570178
CW
1853 BUG_ON(obj->pages_pin_count);
1854
37e680a1
CW
1855 ret = ops->get_pages(obj);
1856 if (ret)
1857 return ret;
1858
1859 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
1860 return 0;
673a394b
EA
1861}
1862
54cf91dc 1863void
05394f39 1864i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
9d773091 1865 struct intel_ring_buffer *ring)
673a394b 1866{
05394f39 1867 struct drm_device *dev = obj->base.dev;
69dc4987 1868 struct drm_i915_private *dev_priv = dev->dev_private;
9d773091 1869 u32 seqno = intel_ring_get_seqno(ring);
617dbe27 1870
852835f3 1871 BUG_ON(ring == NULL);
05394f39 1872 obj->ring = ring;
673a394b
EA
1873
1874 /* Add a reference if we're newly entering the active list. */
05394f39
CW
1875 if (!obj->active) {
1876 drm_gem_object_reference(&obj->base);
1877 obj->active = 1;
673a394b 1878 }
e35a41de 1879
673a394b 1880 /* Move from whatever list we were on to the tail of execution. */
05394f39
CW
1881 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1882 list_move_tail(&obj->ring_list, &ring->active_list);
caea7476 1883
0201f1ec 1884 obj->last_read_seqno = seqno;
caea7476 1885
7dd49065 1886 if (obj->fenced_gpu_access) {
caea7476 1887 obj->last_fenced_seqno = seqno;
caea7476 1888
7dd49065
CW
1889 /* Bump MRU to take account of the delayed flush */
1890 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1891 struct drm_i915_fence_reg *reg;
1892
1893 reg = &dev_priv->fence_regs[obj->fence_reg];
1894 list_move_tail(&reg->lru_list,
1895 &dev_priv->mm.fence_list);
1896 }
caea7476
CW
1897 }
1898}
1899
1900static void
caea7476 1901i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
ce44b0ea 1902{
05394f39 1903 struct drm_device *dev = obj->base.dev;
caea7476 1904 struct drm_i915_private *dev_priv = dev->dev_private;
ce44b0ea 1905
65ce3027 1906 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
05394f39 1907 BUG_ON(!obj->active);
caea7476 1908
f047e395
CW
1909 if (obj->pin_count) /* are we a framebuffer? */
1910 intel_mark_fb_idle(obj);
caea7476 1911
1b50247a 1912 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
caea7476 1913
65ce3027 1914 list_del_init(&obj->ring_list);
caea7476
CW
1915 obj->ring = NULL;
1916
65ce3027
CW
1917 obj->last_read_seqno = 0;
1918 obj->last_write_seqno = 0;
1919 obj->base.write_domain = 0;
1920
1921 obj->last_fenced_seqno = 0;
caea7476 1922 obj->fenced_gpu_access = false;
caea7476
CW
1923
1924 obj->active = 0;
1925 drm_gem_object_unreference(&obj->base);
1926
1927 WARN_ON(i915_verify_lists(dev));
ce44b0ea 1928}
673a394b 1929
9d773091
CW
1930static int
1931i915_gem_handle_seqno_wrap(struct drm_device *dev)
53d227f2 1932{
9d773091
CW
1933 struct drm_i915_private *dev_priv = dev->dev_private;
1934 struct intel_ring_buffer *ring;
1935 int ret, i, j;
53d227f2 1936
9d773091
CW
1937 /* The hardware uses various monotonic 32-bit counters, if we
1938 * detect that they will wraparound we need to idle the GPU
1939 * and reset those counters.
1940 */
1941 ret = 0;
1942 for_each_ring(ring, dev_priv, i) {
1943 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1944 ret |= ring->sync_seqno[j] != 0;
1945 }
1946 if (ret == 0)
1947 return ret;
1948
1949 ret = i915_gpu_idle(dev);
1950 if (ret)
1951 return ret;
1952
1953 i915_gem_retire_requests(dev);
1954 for_each_ring(ring, dev_priv, i) {
1955 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1956 ring->sync_seqno[j] = 0;
1957 }
53d227f2 1958
9d773091 1959 return 0;
53d227f2
DV
1960}
1961
9d773091
CW
1962int
1963i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
53d227f2 1964{
9d773091
CW
1965 struct drm_i915_private *dev_priv = dev->dev_private;
1966
1967 /* reserve 0 for non-seqno */
1968 if (dev_priv->next_seqno == 0) {
1969 int ret = i915_gem_handle_seqno_wrap(dev);
1970 if (ret)
1971 return ret;
1972
1973 dev_priv->next_seqno = 1;
1974 }
53d227f2 1975
9d773091
CW
1976 *seqno = dev_priv->next_seqno++;
1977 return 0;
53d227f2
DV
1978}
1979
3cce469c 1980int
db53a302 1981i915_add_request(struct intel_ring_buffer *ring,
f787a5f5 1982 struct drm_file *file,
acb868d3 1983 u32 *out_seqno)
673a394b 1984{
db53a302 1985 drm_i915_private_t *dev_priv = ring->dev->dev_private;
acb868d3 1986 struct drm_i915_gem_request *request;
a71d8d94 1987 u32 request_ring_position;
673a394b 1988 int was_empty;
3cce469c
CW
1989 int ret;
1990
cc889e0f
DV
1991 /*
1992 * Emit any outstanding flushes - execbuf can fail to emit the flush
1993 * after having emitted the batchbuffer command. Hence we need to fix
1994 * things up similar to emitting the lazy request. The difference here
1995 * is that the flush _must_ happen before the next request, no matter
1996 * what.
1997 */
a7b9761d
CW
1998 ret = intel_ring_flush_all_caches(ring);
1999 if (ret)
2000 return ret;
cc889e0f 2001
acb868d3
CW
2002 request = kmalloc(sizeof(*request), GFP_KERNEL);
2003 if (request == NULL)
2004 return -ENOMEM;
cc889e0f 2005
673a394b 2006
a71d8d94
CW
2007 /* Record the position of the start of the request so that
2008 * should we detect the updated seqno part-way through the
2009 * GPU processing the request, we never over-estimate the
2010 * position of the head.
2011 */
2012 request_ring_position = intel_ring_get_tail(ring);
2013
9d773091 2014 ret = ring->add_request(ring);
3bb73aba
CW
2015 if (ret) {
2016 kfree(request);
2017 return ret;
2018 }
673a394b 2019
9d773091 2020 request->seqno = intel_ring_get_seqno(ring);
852835f3 2021 request->ring = ring;
a71d8d94 2022 request->tail = request_ring_position;
673a394b 2023 request->emitted_jiffies = jiffies;
852835f3
ZN
2024 was_empty = list_empty(&ring->request_list);
2025 list_add_tail(&request->list, &ring->request_list);
3bb73aba 2026 request->file_priv = NULL;
852835f3 2027
db53a302
CW
2028 if (file) {
2029 struct drm_i915_file_private *file_priv = file->driver_priv;
2030
1c25595f 2031 spin_lock(&file_priv->mm.lock);
f787a5f5 2032 request->file_priv = file_priv;
b962442e 2033 list_add_tail(&request->client_list,
f787a5f5 2034 &file_priv->mm.request_list);
1c25595f 2035 spin_unlock(&file_priv->mm.lock);
b962442e 2036 }
673a394b 2037
9d773091 2038 trace_i915_gem_request_add(ring, request->seqno);
5391d0cf 2039 ring->outstanding_lazy_request = 0;
db53a302 2040
f65d9421 2041 if (!dev_priv->mm.suspended) {
3e0dc6b0
BW
2042 if (i915_enable_hangcheck) {
2043 mod_timer(&dev_priv->hangcheck_timer,
cecc21fe 2044 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
3e0dc6b0 2045 }
f047e395 2046 if (was_empty) {
b3b079db 2047 queue_delayed_work(dev_priv->wq,
bcb45086
CW
2048 &dev_priv->mm.retire_work,
2049 round_jiffies_up_relative(HZ));
f047e395
CW
2050 intel_mark_busy(dev_priv->dev);
2051 }
f65d9421 2052 }
cc889e0f 2053
acb868d3 2054 if (out_seqno)
9d773091 2055 *out_seqno = request->seqno;
3cce469c 2056 return 0;
673a394b
EA
2057}
2058
f787a5f5
CW
2059static inline void
2060i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
673a394b 2061{
1c25595f 2062 struct drm_i915_file_private *file_priv = request->file_priv;
673a394b 2063
1c25595f
CW
2064 if (!file_priv)
2065 return;
1c5d22f7 2066
1c25595f 2067 spin_lock(&file_priv->mm.lock);
09bfa517
HRK
2068 if (request->file_priv) {
2069 list_del(&request->client_list);
2070 request->file_priv = NULL;
2071 }
1c25595f 2072 spin_unlock(&file_priv->mm.lock);
673a394b 2073}
673a394b 2074
dfaae392
CW
2075static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
2076 struct intel_ring_buffer *ring)
9375e446 2077{
dfaae392
CW
2078 while (!list_empty(&ring->request_list)) {
2079 struct drm_i915_gem_request *request;
673a394b 2080
dfaae392
CW
2081 request = list_first_entry(&ring->request_list,
2082 struct drm_i915_gem_request,
2083 list);
de151cf6 2084
dfaae392 2085 list_del(&request->list);
f787a5f5 2086 i915_gem_request_remove_from_client(request);
dfaae392
CW
2087 kfree(request);
2088 }
673a394b 2089
dfaae392 2090 while (!list_empty(&ring->active_list)) {
05394f39 2091 struct drm_i915_gem_object *obj;
9375e446 2092
05394f39
CW
2093 obj = list_first_entry(&ring->active_list,
2094 struct drm_i915_gem_object,
2095 ring_list);
9375e446 2096
05394f39 2097 i915_gem_object_move_to_inactive(obj);
673a394b
EA
2098 }
2099}
2100
312817a3
CW
2101static void i915_gem_reset_fences(struct drm_device *dev)
2102{
2103 struct drm_i915_private *dev_priv = dev->dev_private;
2104 int i;
2105
4b9de737 2106 for (i = 0; i < dev_priv->num_fence_regs; i++) {
312817a3 2107 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
7d2cb39c 2108
ada726c7 2109 i915_gem_write_fence(dev, i, NULL);
7d2cb39c 2110
ada726c7
CW
2111 if (reg->obj)
2112 i915_gem_object_fence_lost(reg->obj);
7d2cb39c 2113
ada726c7
CW
2114 reg->pin_count = 0;
2115 reg->obj = NULL;
2116 INIT_LIST_HEAD(&reg->lru_list);
312817a3 2117 }
ada726c7
CW
2118
2119 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
312817a3
CW
2120}
2121
069efc1d 2122void i915_gem_reset(struct drm_device *dev)
673a394b 2123{
77f01230 2124 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 2125 struct drm_i915_gem_object *obj;
b4519513 2126 struct intel_ring_buffer *ring;
1ec14ad3 2127 int i;
673a394b 2128
b4519513
CW
2129 for_each_ring(ring, dev_priv, i)
2130 i915_gem_reset_ring_lists(dev_priv, ring);
dfaae392 2131
dfaae392
CW
2132 /* Move everything out of the GPU domains to ensure we do any
2133 * necessary invalidation upon reuse.
2134 */
05394f39 2135 list_for_each_entry(obj,
77f01230 2136 &dev_priv->mm.inactive_list,
69dc4987 2137 mm_list)
77f01230 2138 {
05394f39 2139 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
77f01230 2140 }
069efc1d
CW
2141
2142 /* The fence registers are invalidated so clear them out */
312817a3 2143 i915_gem_reset_fences(dev);
673a394b
EA
2144}
2145
2146/**
2147 * This function clears the request list as sequence numbers are passed.
2148 */
a71d8d94 2149void
db53a302 2150i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
673a394b 2151{
673a394b
EA
2152 uint32_t seqno;
2153
db53a302 2154 if (list_empty(&ring->request_list))
6c0594a3
KW
2155 return;
2156
db53a302 2157 WARN_ON(i915_verify_lists(ring->dev));
673a394b 2158
b2eadbc8 2159 seqno = ring->get_seqno(ring, true);
1ec14ad3 2160
852835f3 2161 while (!list_empty(&ring->request_list)) {
673a394b 2162 struct drm_i915_gem_request *request;
673a394b 2163
852835f3 2164 request = list_first_entry(&ring->request_list,
673a394b
EA
2165 struct drm_i915_gem_request,
2166 list);
673a394b 2167
dfaae392 2168 if (!i915_seqno_passed(seqno, request->seqno))
b84d5f0c
CW
2169 break;
2170
db53a302 2171 trace_i915_gem_request_retire(ring, request->seqno);
a71d8d94
CW
2172 /* We know the GPU must have read the request to have
2173 * sent us the seqno + interrupt, so use the position
2174 * of tail of the request to update the last known position
2175 * of the GPU head.
2176 */
2177 ring->last_retired_head = request->tail;
b84d5f0c
CW
2178
2179 list_del(&request->list);
f787a5f5 2180 i915_gem_request_remove_from_client(request);
b84d5f0c
CW
2181 kfree(request);
2182 }
673a394b 2183
b84d5f0c
CW
2184 /* Move any buffers on the active list that are no longer referenced
2185 * by the ringbuffer to the flushing/inactive lists as appropriate.
2186 */
2187 while (!list_empty(&ring->active_list)) {
05394f39 2188 struct drm_i915_gem_object *obj;
b84d5f0c 2189
0206e353 2190 obj = list_first_entry(&ring->active_list,
05394f39
CW
2191 struct drm_i915_gem_object,
2192 ring_list);
673a394b 2193
0201f1ec 2194 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
673a394b 2195 break;
b84d5f0c 2196
65ce3027 2197 i915_gem_object_move_to_inactive(obj);
673a394b 2198 }
9d34e5db 2199
db53a302
CW
2200 if (unlikely(ring->trace_irq_seqno &&
2201 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1ec14ad3 2202 ring->irq_put(ring);
db53a302 2203 ring->trace_irq_seqno = 0;
9d34e5db 2204 }
23bc5982 2205
db53a302 2206 WARN_ON(i915_verify_lists(ring->dev));
673a394b
EA
2207}
2208
b09a1fec
CW
2209void
2210i915_gem_retire_requests(struct drm_device *dev)
2211{
2212 drm_i915_private_t *dev_priv = dev->dev_private;
b4519513 2213 struct intel_ring_buffer *ring;
1ec14ad3 2214 int i;
b09a1fec 2215
b4519513
CW
2216 for_each_ring(ring, dev_priv, i)
2217 i915_gem_retire_requests_ring(ring);
b09a1fec
CW
2218}
2219
75ef9da2 2220static void
673a394b
EA
2221i915_gem_retire_work_handler(struct work_struct *work)
2222{
2223 drm_i915_private_t *dev_priv;
2224 struct drm_device *dev;
b4519513 2225 struct intel_ring_buffer *ring;
0a58705b
CW
2226 bool idle;
2227 int i;
673a394b
EA
2228
2229 dev_priv = container_of(work, drm_i915_private_t,
2230 mm.retire_work.work);
2231 dev = dev_priv->dev;
2232
891b48cf
CW
2233 /* Come back later if the device is busy... */
2234 if (!mutex_trylock(&dev->struct_mutex)) {
bcb45086
CW
2235 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2236 round_jiffies_up_relative(HZ));
891b48cf
CW
2237 return;
2238 }
673a394b 2239
b09a1fec 2240 i915_gem_retire_requests(dev);
673a394b 2241
0a58705b
CW
2242 /* Send a periodic flush down the ring so we don't hold onto GEM
2243 * objects indefinitely.
673a394b 2244 */
0a58705b 2245 idle = true;
b4519513 2246 for_each_ring(ring, dev_priv, i) {
3bb73aba
CW
2247 if (ring->gpu_caches_dirty)
2248 i915_add_request(ring, NULL, NULL);
0a58705b
CW
2249
2250 idle &= list_empty(&ring->request_list);
673a394b
EA
2251 }
2252
0a58705b 2253 if (!dev_priv->mm.suspended && !idle)
bcb45086
CW
2254 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2255 round_jiffies_up_relative(HZ));
f047e395
CW
2256 if (idle)
2257 intel_mark_idle(dev);
0a58705b 2258
673a394b 2259 mutex_unlock(&dev->struct_mutex);
673a394b
EA
2260}
2261
30dfebf3
DV
2262/**
2263 * Ensures that an object will eventually get non-busy by flushing any required
2264 * write domains, emitting any outstanding lazy request and retiring and
2265 * completed requests.
2266 */
2267static int
2268i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2269{
2270 int ret;
2271
2272 if (obj->active) {
0201f1ec 2273 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
30dfebf3
DV
2274 if (ret)
2275 return ret;
2276
30dfebf3
DV
2277 i915_gem_retire_requests_ring(obj->ring);
2278 }
2279
2280 return 0;
2281}
2282
23ba4fd0
BW
2283/**
2284 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2285 * @DRM_IOCTL_ARGS: standard ioctl arguments
2286 *
2287 * Returns 0 if successful, else an error is returned with the remaining time in
2288 * the timeout parameter.
2289 * -ETIME: object is still busy after timeout
2290 * -ERESTARTSYS: signal interrupted the wait
2291 * -ENONENT: object doesn't exist
2292 * Also possible, but rare:
2293 * -EAGAIN: GPU wedged
2294 * -ENOMEM: damn
2295 * -ENODEV: Internal IRQ fail
2296 * -E?: The add request failed
2297 *
2298 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2299 * non-zero timeout parameter the wait ioctl will wait for the given number of
2300 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2301 * without holding struct_mutex the object may become re-busied before this
2302 * function completes. A similar but shorter * race condition exists in the busy
2303 * ioctl
2304 */
2305int
2306i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2307{
2308 struct drm_i915_gem_wait *args = data;
2309 struct drm_i915_gem_object *obj;
2310 struct intel_ring_buffer *ring = NULL;
eac1f14f 2311 struct timespec timeout_stack, *timeout = NULL;
23ba4fd0
BW
2312 u32 seqno = 0;
2313 int ret = 0;
2314
eac1f14f
BW
2315 if (args->timeout_ns >= 0) {
2316 timeout_stack = ns_to_timespec(args->timeout_ns);
2317 timeout = &timeout_stack;
2318 }
23ba4fd0
BW
2319
2320 ret = i915_mutex_lock_interruptible(dev);
2321 if (ret)
2322 return ret;
2323
2324 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2325 if (&obj->base == NULL) {
2326 mutex_unlock(&dev->struct_mutex);
2327 return -ENOENT;
2328 }
2329
30dfebf3
DV
2330 /* Need to make sure the object gets inactive eventually. */
2331 ret = i915_gem_object_flush_active(obj);
23ba4fd0
BW
2332 if (ret)
2333 goto out;
2334
2335 if (obj->active) {
0201f1ec 2336 seqno = obj->last_read_seqno;
23ba4fd0
BW
2337 ring = obj->ring;
2338 }
2339
2340 if (seqno == 0)
2341 goto out;
2342
23ba4fd0
BW
2343 /* Do this after OLR check to make sure we make forward progress polling
2344 * on this IOCTL with a 0 timeout (like busy ioctl)
2345 */
2346 if (!args->timeout_ns) {
2347 ret = -ETIME;
2348 goto out;
2349 }
2350
2351 drm_gem_object_unreference(&obj->base);
2352 mutex_unlock(&dev->struct_mutex);
2353
eac1f14f
BW
2354 ret = __wait_seqno(ring, seqno, true, timeout);
2355 if (timeout) {
2356 WARN_ON(!timespec_valid(timeout));
2357 args->timeout_ns = timespec_to_ns(timeout);
2358 }
23ba4fd0
BW
2359 return ret;
2360
2361out:
2362 drm_gem_object_unreference(&obj->base);
2363 mutex_unlock(&dev->struct_mutex);
2364 return ret;
2365}
2366
5816d648
BW
2367/**
2368 * i915_gem_object_sync - sync an object to a ring.
2369 *
2370 * @obj: object which may be in use on another ring.
2371 * @to: ring we wish to use the object on. May be NULL.
2372 *
2373 * This code is meant to abstract object synchronization with the GPU.
2374 * Calling with NULL implies synchronizing the object with the CPU
2375 * rather than a particular GPU ring.
2376 *
2377 * Returns 0 if successful, else propagates up the lower layer error.
2378 */
2911a35b
BW
2379int
2380i915_gem_object_sync(struct drm_i915_gem_object *obj,
2381 struct intel_ring_buffer *to)
2382{
2383 struct intel_ring_buffer *from = obj->ring;
2384 u32 seqno;
2385 int ret, idx;
2386
2387 if (from == NULL || to == from)
2388 return 0;
2389
5816d648 2390 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
0201f1ec 2391 return i915_gem_object_wait_rendering(obj, false);
2911a35b
BW
2392
2393 idx = intel_ring_sync_index(from, to);
2394
0201f1ec 2395 seqno = obj->last_read_seqno;
2911a35b
BW
2396 if (seqno <= from->sync_seqno[idx])
2397 return 0;
2398
b4aca010
BW
2399 ret = i915_gem_check_olr(obj->ring, seqno);
2400 if (ret)
2401 return ret;
2911a35b 2402
1500f7ea 2403 ret = to->sync_to(to, from, seqno);
e3a5a225 2404 if (!ret)
7b01e260
MK
2405 /* We use last_read_seqno because sync_to()
2406 * might have just caused seqno wrap under
2407 * the radar.
2408 */
2409 from->sync_seqno[idx] = obj->last_read_seqno;
2911a35b 2410
e3a5a225 2411 return ret;
2911a35b
BW
2412}
2413
b5ffc9bc
CW
2414static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2415{
2416 u32 old_write_domain, old_read_domains;
2417
b5ffc9bc
CW
2418 /* Act a barrier for all accesses through the GTT */
2419 mb();
2420
2421 /* Force a pagefault for domain tracking on next user access */
2422 i915_gem_release_mmap(obj);
2423
b97c3d9c
KP
2424 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2425 return;
2426
b5ffc9bc
CW
2427 old_read_domains = obj->base.read_domains;
2428 old_write_domain = obj->base.write_domain;
2429
2430 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2431 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2432
2433 trace_i915_gem_object_change_domain(obj,
2434 old_read_domains,
2435 old_write_domain);
2436}
2437
673a394b
EA
2438/**
2439 * Unbinds an object from the GTT aperture.
2440 */
0f973f27 2441int
05394f39 2442i915_gem_object_unbind(struct drm_i915_gem_object *obj)
673a394b 2443{
7bddb01f 2444 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
673a394b
EA
2445 int ret = 0;
2446
05394f39 2447 if (obj->gtt_space == NULL)
673a394b
EA
2448 return 0;
2449
31d8d651
CW
2450 if (obj->pin_count)
2451 return -EBUSY;
673a394b 2452
c4670ad0
CW
2453 BUG_ON(obj->pages == NULL);
2454
a8198eea 2455 ret = i915_gem_object_finish_gpu(obj);
1488fc08 2456 if (ret)
a8198eea
CW
2457 return ret;
2458 /* Continue on if we fail due to EIO, the GPU is hung so we
2459 * should be safe and we need to cleanup or else we might
2460 * cause memory corruption through use-after-free.
2461 */
2462
b5ffc9bc 2463 i915_gem_object_finish_gtt(obj);
5323fd04 2464
96b47b65 2465 /* release the fence reg _after_ flushing */
d9e86c0e 2466 ret = i915_gem_object_put_fence(obj);
1488fc08 2467 if (ret)
d9e86c0e 2468 return ret;
96b47b65 2469
db53a302
CW
2470 trace_i915_gem_object_unbind(obj);
2471
74898d7e
DV
2472 if (obj->has_global_gtt_mapping)
2473 i915_gem_gtt_unbind_object(obj);
7bddb01f
DV
2474 if (obj->has_aliasing_ppgtt_mapping) {
2475 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2476 obj->has_aliasing_ppgtt_mapping = 0;
2477 }
74163907 2478 i915_gem_gtt_finish_object(obj);
7bddb01f 2479
6c085a72
CW
2480 list_del(&obj->mm_list);
2481 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
75e9e915 2482 /* Avoid an unnecessary call to unbind on rebind. */
05394f39 2483 obj->map_and_fenceable = true;
673a394b 2484
05394f39
CW
2485 drm_mm_put_block(obj->gtt_space);
2486 obj->gtt_space = NULL;
2487 obj->gtt_offset = 0;
673a394b 2488
88241785 2489 return 0;
54cf91dc
CW
2490}
2491
b2da9fe5 2492int i915_gpu_idle(struct drm_device *dev)
4df2faf4
DV
2493{
2494 drm_i915_private_t *dev_priv = dev->dev_private;
b4519513 2495 struct intel_ring_buffer *ring;
1ec14ad3 2496 int ret, i;
4df2faf4 2497
4df2faf4 2498 /* Flush everything onto the inactive list. */
b4519513 2499 for_each_ring(ring, dev_priv, i) {
b6c7488d
BW
2500 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2501 if (ret)
2502 return ret;
2503
3e960501 2504 ret = intel_ring_idle(ring);
1ec14ad3
CW
2505 if (ret)
2506 return ret;
2507 }
4df2faf4 2508
8a1a49f9 2509 return 0;
4df2faf4
DV
2510}
2511
9ce079e4
CW
2512static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2513 struct drm_i915_gem_object *obj)
4e901fdc 2514{
4e901fdc 2515 drm_i915_private_t *dev_priv = dev->dev_private;
4e901fdc
EA
2516 uint64_t val;
2517
9ce079e4
CW
2518 if (obj) {
2519 u32 size = obj->gtt_space->size;
4e901fdc 2520
9ce079e4
CW
2521 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2522 0xfffff000) << 32;
2523 val |= obj->gtt_offset & 0xfffff000;
2524 val |= (uint64_t)((obj->stride / 128) - 1) <<
2525 SANDYBRIDGE_FENCE_PITCH_SHIFT;
4e901fdc 2526
9ce079e4
CW
2527 if (obj->tiling_mode == I915_TILING_Y)
2528 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2529 val |= I965_FENCE_REG_VALID;
2530 } else
2531 val = 0;
c6642782 2532
9ce079e4
CW
2533 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2534 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
4e901fdc
EA
2535}
2536
9ce079e4
CW
2537static void i965_write_fence_reg(struct drm_device *dev, int reg,
2538 struct drm_i915_gem_object *obj)
de151cf6 2539{
de151cf6 2540 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
2541 uint64_t val;
2542
9ce079e4
CW
2543 if (obj) {
2544 u32 size = obj->gtt_space->size;
de151cf6 2545
9ce079e4
CW
2546 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2547 0xfffff000) << 32;
2548 val |= obj->gtt_offset & 0xfffff000;
2549 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2550 if (obj->tiling_mode == I915_TILING_Y)
2551 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2552 val |= I965_FENCE_REG_VALID;
2553 } else
2554 val = 0;
c6642782 2555
9ce079e4
CW
2556 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2557 POSTING_READ(FENCE_REG_965_0 + reg * 8);
de151cf6
JB
2558}
2559
9ce079e4
CW
2560static void i915_write_fence_reg(struct drm_device *dev, int reg,
2561 struct drm_i915_gem_object *obj)
de151cf6 2562{
de151cf6 2563 drm_i915_private_t *dev_priv = dev->dev_private;
9ce079e4 2564 u32 val;
de151cf6 2565
9ce079e4
CW
2566 if (obj) {
2567 u32 size = obj->gtt_space->size;
2568 int pitch_val;
2569 int tile_width;
c6642782 2570
9ce079e4
CW
2571 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2572 (size & -size) != size ||
2573 (obj->gtt_offset & (size - 1)),
2574 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2575 obj->gtt_offset, obj->map_and_fenceable, size);
c6642782 2576
9ce079e4
CW
2577 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2578 tile_width = 128;
2579 else
2580 tile_width = 512;
2581
2582 /* Note: pitch better be a power of two tile widths */
2583 pitch_val = obj->stride / tile_width;
2584 pitch_val = ffs(pitch_val) - 1;
2585
2586 val = obj->gtt_offset;
2587 if (obj->tiling_mode == I915_TILING_Y)
2588 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2589 val |= I915_FENCE_SIZE_BITS(size);
2590 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2591 val |= I830_FENCE_REG_VALID;
2592 } else
2593 val = 0;
2594
2595 if (reg < 8)
2596 reg = FENCE_REG_830_0 + reg * 4;
2597 else
2598 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2599
2600 I915_WRITE(reg, val);
2601 POSTING_READ(reg);
de151cf6
JB
2602}
2603
9ce079e4
CW
2604static void i830_write_fence_reg(struct drm_device *dev, int reg,
2605 struct drm_i915_gem_object *obj)
de151cf6 2606{
de151cf6 2607 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6 2608 uint32_t val;
de151cf6 2609
9ce079e4
CW
2610 if (obj) {
2611 u32 size = obj->gtt_space->size;
2612 uint32_t pitch_val;
de151cf6 2613
9ce079e4
CW
2614 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2615 (size & -size) != size ||
2616 (obj->gtt_offset & (size - 1)),
2617 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2618 obj->gtt_offset, size);
e76a16de 2619
9ce079e4
CW
2620 pitch_val = obj->stride / 128;
2621 pitch_val = ffs(pitch_val) - 1;
de151cf6 2622
9ce079e4
CW
2623 val = obj->gtt_offset;
2624 if (obj->tiling_mode == I915_TILING_Y)
2625 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2626 val |= I830_FENCE_SIZE_BITS(size);
2627 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2628 val |= I830_FENCE_REG_VALID;
2629 } else
2630 val = 0;
c6642782 2631
9ce079e4
CW
2632 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2633 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2634}
2635
2636static void i915_gem_write_fence(struct drm_device *dev, int reg,
2637 struct drm_i915_gem_object *obj)
2638{
2639 switch (INTEL_INFO(dev)->gen) {
2640 case 7:
2641 case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2642 case 5:
2643 case 4: i965_write_fence_reg(dev, reg, obj); break;
2644 case 3: i915_write_fence_reg(dev, reg, obj); break;
2645 case 2: i830_write_fence_reg(dev, reg, obj); break;
2646 default: break;
2647 }
de151cf6
JB
2648}
2649
61050808
CW
2650static inline int fence_number(struct drm_i915_private *dev_priv,
2651 struct drm_i915_fence_reg *fence)
2652{
2653 return fence - dev_priv->fence_regs;
2654}
2655
2656static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2657 struct drm_i915_fence_reg *fence,
2658 bool enable)
2659{
2660 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2661 int reg = fence_number(dev_priv, fence);
2662
2663 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2664
2665 if (enable) {
2666 obj->fence_reg = reg;
2667 fence->obj = obj;
2668 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2669 } else {
2670 obj->fence_reg = I915_FENCE_REG_NONE;
2671 fence->obj = NULL;
2672 list_del_init(&fence->lru_list);
2673 }
2674}
2675
d9e86c0e 2676static int
a360bb1a 2677i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
d9e86c0e 2678{
1c293ea3 2679 if (obj->last_fenced_seqno) {
86d5bc37 2680 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
18991845
CW
2681 if (ret)
2682 return ret;
d9e86c0e
CW
2683
2684 obj->last_fenced_seqno = 0;
d9e86c0e
CW
2685 }
2686
63256ec5
CW
2687 /* Ensure that all CPU reads are completed before installing a fence
2688 * and all writes before removing the fence.
2689 */
2690 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2691 mb();
2692
86d5bc37 2693 obj->fenced_gpu_access = false;
d9e86c0e
CW
2694 return 0;
2695}
2696
2697int
2698i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2699{
61050808 2700 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
d9e86c0e
CW
2701 int ret;
2702
a360bb1a 2703 ret = i915_gem_object_flush_fence(obj);
d9e86c0e
CW
2704 if (ret)
2705 return ret;
2706
61050808
CW
2707 if (obj->fence_reg == I915_FENCE_REG_NONE)
2708 return 0;
d9e86c0e 2709
61050808
CW
2710 i915_gem_object_update_fence(obj,
2711 &dev_priv->fence_regs[obj->fence_reg],
2712 false);
2713 i915_gem_object_fence_lost(obj);
d9e86c0e
CW
2714
2715 return 0;
2716}
2717
2718static struct drm_i915_fence_reg *
a360bb1a 2719i915_find_fence_reg(struct drm_device *dev)
ae3db24a 2720{
ae3db24a 2721 struct drm_i915_private *dev_priv = dev->dev_private;
8fe301ad 2722 struct drm_i915_fence_reg *reg, *avail;
d9e86c0e 2723 int i;
ae3db24a
DV
2724
2725 /* First try to find a free reg */
d9e86c0e 2726 avail = NULL;
ae3db24a
DV
2727 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2728 reg = &dev_priv->fence_regs[i];
2729 if (!reg->obj)
d9e86c0e 2730 return reg;
ae3db24a 2731
1690e1eb 2732 if (!reg->pin_count)
d9e86c0e 2733 avail = reg;
ae3db24a
DV
2734 }
2735
d9e86c0e
CW
2736 if (avail == NULL)
2737 return NULL;
ae3db24a
DV
2738
2739 /* None available, try to steal one or wait for a user to finish */
d9e86c0e 2740 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
1690e1eb 2741 if (reg->pin_count)
ae3db24a
DV
2742 continue;
2743
8fe301ad 2744 return reg;
ae3db24a
DV
2745 }
2746
8fe301ad 2747 return NULL;
ae3db24a
DV
2748}
2749
de151cf6 2750/**
9a5a53b3 2751 * i915_gem_object_get_fence - set up fencing for an object
de151cf6
JB
2752 * @obj: object to map through a fence reg
2753 *
2754 * When mapping objects through the GTT, userspace wants to be able to write
2755 * to them without having to worry about swizzling if the object is tiled.
de151cf6
JB
2756 * This function walks the fence regs looking for a free one for @obj,
2757 * stealing one if it can't find any.
2758 *
2759 * It then sets up the reg based on the object's properties: address, pitch
2760 * and tiling format.
9a5a53b3
CW
2761 *
2762 * For an untiled surface, this removes any existing fence.
de151cf6 2763 */
8c4b8c3f 2764int
06d98131 2765i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
de151cf6 2766{
05394f39 2767 struct drm_device *dev = obj->base.dev;
79e53945 2768 struct drm_i915_private *dev_priv = dev->dev_private;
14415745 2769 bool enable = obj->tiling_mode != I915_TILING_NONE;
d9e86c0e 2770 struct drm_i915_fence_reg *reg;
ae3db24a 2771 int ret;
de151cf6 2772
14415745
CW
2773 /* Have we updated the tiling parameters upon the object and so
2774 * will need to serialise the write to the associated fence register?
2775 */
5d82e3e6 2776 if (obj->fence_dirty) {
14415745
CW
2777 ret = i915_gem_object_flush_fence(obj);
2778 if (ret)
2779 return ret;
2780 }
9a5a53b3 2781
d9e86c0e 2782 /* Just update our place in the LRU if our fence is getting reused. */
05394f39
CW
2783 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2784 reg = &dev_priv->fence_regs[obj->fence_reg];
5d82e3e6 2785 if (!obj->fence_dirty) {
14415745
CW
2786 list_move_tail(&reg->lru_list,
2787 &dev_priv->mm.fence_list);
2788 return 0;
2789 }
2790 } else if (enable) {
2791 reg = i915_find_fence_reg(dev);
2792 if (reg == NULL)
2793 return -EDEADLK;
d9e86c0e 2794
14415745
CW
2795 if (reg->obj) {
2796 struct drm_i915_gem_object *old = reg->obj;
2797
2798 ret = i915_gem_object_flush_fence(old);
29c5a587
CW
2799 if (ret)
2800 return ret;
2801
14415745 2802 i915_gem_object_fence_lost(old);
29c5a587 2803 }
14415745 2804 } else
a09ba7fa 2805 return 0;
a09ba7fa 2806
14415745 2807 i915_gem_object_update_fence(obj, reg, enable);
5d82e3e6 2808 obj->fence_dirty = false;
14415745 2809
9ce079e4 2810 return 0;
de151cf6
JB
2811}
2812
42d6ab48
CW
2813static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2814 struct drm_mm_node *gtt_space,
2815 unsigned long cache_level)
2816{
2817 struct drm_mm_node *other;
2818
2819 /* On non-LLC machines we have to be careful when putting differing
2820 * types of snoopable memory together to avoid the prefetcher
2821 * crossing memory domains and dieing.
2822 */
2823 if (HAS_LLC(dev))
2824 return true;
2825
2826 if (gtt_space == NULL)
2827 return true;
2828
2829 if (list_empty(&gtt_space->node_list))
2830 return true;
2831
2832 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2833 if (other->allocated && !other->hole_follows && other->color != cache_level)
2834 return false;
2835
2836 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2837 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2838 return false;
2839
2840 return true;
2841}
2842
2843static void i915_gem_verify_gtt(struct drm_device *dev)
2844{
2845#if WATCH_GTT
2846 struct drm_i915_private *dev_priv = dev->dev_private;
2847 struct drm_i915_gem_object *obj;
2848 int err = 0;
2849
2850 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
2851 if (obj->gtt_space == NULL) {
2852 printk(KERN_ERR "object found on GTT list with no space reserved\n");
2853 err++;
2854 continue;
2855 }
2856
2857 if (obj->cache_level != obj->gtt_space->color) {
2858 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
2859 obj->gtt_space->start,
2860 obj->gtt_space->start + obj->gtt_space->size,
2861 obj->cache_level,
2862 obj->gtt_space->color);
2863 err++;
2864 continue;
2865 }
2866
2867 if (!i915_gem_valid_gtt_space(dev,
2868 obj->gtt_space,
2869 obj->cache_level)) {
2870 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
2871 obj->gtt_space->start,
2872 obj->gtt_space->start + obj->gtt_space->size,
2873 obj->cache_level);
2874 err++;
2875 continue;
2876 }
2877 }
2878
2879 WARN_ON(err);
2880#endif
2881}
2882
673a394b
EA
2883/**
2884 * Finds free space in the GTT aperture and binds the object there.
2885 */
2886static int
05394f39 2887i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
920afa77 2888 unsigned alignment,
86a1ee26
CW
2889 bool map_and_fenceable,
2890 bool nonblocking)
673a394b 2891{
05394f39 2892 struct drm_device *dev = obj->base.dev;
673a394b 2893 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 2894 struct drm_mm_node *free_space;
5e783301 2895 u32 size, fence_size, fence_alignment, unfenced_alignment;
75e9e915 2896 bool mappable, fenceable;
07f73f69 2897 int ret;
673a394b 2898
05394f39 2899 if (obj->madv != I915_MADV_WILLNEED) {
3ef94daa
CW
2900 DRM_ERROR("Attempting to bind a purgeable object\n");
2901 return -EINVAL;
2902 }
2903
e28f8711
CW
2904 fence_size = i915_gem_get_gtt_size(dev,
2905 obj->base.size,
2906 obj->tiling_mode);
2907 fence_alignment = i915_gem_get_gtt_alignment(dev,
2908 obj->base.size,
2909 obj->tiling_mode);
2910 unfenced_alignment =
2911 i915_gem_get_unfenced_gtt_alignment(dev,
2912 obj->base.size,
2913 obj->tiling_mode);
a00b10c3 2914
673a394b 2915 if (alignment == 0)
5e783301
DV
2916 alignment = map_and_fenceable ? fence_alignment :
2917 unfenced_alignment;
75e9e915 2918 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
673a394b
EA
2919 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2920 return -EINVAL;
2921 }
2922
05394f39 2923 size = map_and_fenceable ? fence_size : obj->base.size;
a00b10c3 2924
654fc607
CW
2925 /* If the object is bigger than the entire aperture, reject it early
2926 * before evicting everything in a vain attempt to find space.
2927 */
05394f39 2928 if (obj->base.size >
75e9e915 2929 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
654fc607
CW
2930 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2931 return -E2BIG;
2932 }
2933
37e680a1 2934 ret = i915_gem_object_get_pages(obj);
6c085a72
CW
2935 if (ret)
2936 return ret;
2937
fbdda6fb
CW
2938 i915_gem_object_pin_pages(obj);
2939
673a394b 2940 search_free:
75e9e915 2941 if (map_and_fenceable)
8742267a
CW
2942 free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
2943 size, alignment, obj->cache_level,
2944 0, dev_priv->mm.gtt_mappable_end,
2945 false);
920afa77 2946 else
42d6ab48
CW
2947 free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
2948 size, alignment, obj->cache_level,
2949 false);
920afa77
DV
2950
2951 if (free_space != NULL) {
75e9e915 2952 if (map_and_fenceable)
8742267a 2953 free_space =
920afa77 2954 drm_mm_get_block_range_generic(free_space,
42d6ab48 2955 size, alignment, obj->cache_level,
6b9d89b4 2956 0, dev_priv->mm.gtt_mappable_end,
42d6ab48 2957 false);
920afa77 2958 else
8742267a 2959 free_space =
42d6ab48
CW
2960 drm_mm_get_block_generic(free_space,
2961 size, alignment, obj->cache_level,
2962 false);
920afa77 2963 }
8742267a 2964 if (free_space == NULL) {
75e9e915 2965 ret = i915_gem_evict_something(dev, size, alignment,
42d6ab48 2966 obj->cache_level,
86a1ee26
CW
2967 map_and_fenceable,
2968 nonblocking);
fbdda6fb
CW
2969 if (ret) {
2970 i915_gem_object_unpin_pages(obj);
673a394b 2971 return ret;
fbdda6fb 2972 }
9731129c 2973
673a394b
EA
2974 goto search_free;
2975 }
42d6ab48 2976 if (WARN_ON(!i915_gem_valid_gtt_space(dev,
8742267a 2977 free_space,
42d6ab48 2978 obj->cache_level))) {
fbdda6fb 2979 i915_gem_object_unpin_pages(obj);
8742267a 2980 drm_mm_put_block(free_space);
42d6ab48 2981 return -EINVAL;
673a394b
EA
2982 }
2983
74163907 2984 ret = i915_gem_gtt_prepare_object(obj);
7c2e6fdf 2985 if (ret) {
fbdda6fb 2986 i915_gem_object_unpin_pages(obj);
8742267a 2987 drm_mm_put_block(free_space);
6c085a72 2988 return ret;
673a394b 2989 }
673a394b 2990
6c085a72 2991 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
05394f39 2992 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
bf1a1092 2993
8742267a
CW
2994 obj->gtt_space = free_space;
2995 obj->gtt_offset = free_space->start;
1c5d22f7 2996
75e9e915 2997 fenceable =
8742267a
CW
2998 free_space->size == fence_size &&
2999 (free_space->start & (fence_alignment - 1)) == 0;
a00b10c3 3000
75e9e915 3001 mappable =
05394f39 3002 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
a00b10c3 3003
05394f39 3004 obj->map_and_fenceable = mappable && fenceable;
75e9e915 3005
fbdda6fb 3006 i915_gem_object_unpin_pages(obj);
db53a302 3007 trace_i915_gem_object_bind(obj, map_and_fenceable);
42d6ab48 3008 i915_gem_verify_gtt(dev);
673a394b
EA
3009 return 0;
3010}
3011
3012void
05394f39 3013i915_gem_clflush_object(struct drm_i915_gem_object *obj)
673a394b 3014{
673a394b
EA
3015 /* If we don't have a page list set up, then we're not pinned
3016 * to GPU, and we can ignore the cache flush because it'll happen
3017 * again at bind time.
3018 */
05394f39 3019 if (obj->pages == NULL)
673a394b
EA
3020 return;
3021
9c23f7fc
CW
3022 /* If the GPU is snooping the contents of the CPU cache,
3023 * we do not need to manually clear the CPU cache lines. However,
3024 * the caches are only snooped when the render cache is
3025 * flushed/invalidated. As we always have to emit invalidations
3026 * and flushes when moving into and out of the RENDER domain, correct
3027 * snooping behaviour occurs naturally as the result of our domain
3028 * tracking.
3029 */
3030 if (obj->cache_level != I915_CACHE_NONE)
3031 return;
3032
1c5d22f7 3033 trace_i915_gem_object_clflush(obj);
cfa16a0d 3034
9da3da66 3035 drm_clflush_sg(obj->pages);
e47c68e9
EA
3036}
3037
3038/** Flushes the GTT write domain for the object if it's dirty. */
3039static void
05394f39 3040i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3041{
1c5d22f7
CW
3042 uint32_t old_write_domain;
3043
05394f39 3044 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
3045 return;
3046
63256ec5 3047 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
3048 * to it immediately go to main memory as far as we know, so there's
3049 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
3050 *
3051 * However, we do have to enforce the order so that all writes through
3052 * the GTT land before any writes to the device, such as updates to
3053 * the GATT itself.
e47c68e9 3054 */
63256ec5
CW
3055 wmb();
3056
05394f39
CW
3057 old_write_domain = obj->base.write_domain;
3058 obj->base.write_domain = 0;
1c5d22f7
CW
3059
3060 trace_i915_gem_object_change_domain(obj,
05394f39 3061 obj->base.read_domains,
1c5d22f7 3062 old_write_domain);
e47c68e9
EA
3063}
3064
3065/** Flushes the CPU write domain for the object if it's dirty. */
3066static void
05394f39 3067i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3068{
1c5d22f7 3069 uint32_t old_write_domain;
e47c68e9 3070
05394f39 3071 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
3072 return;
3073
3074 i915_gem_clflush_object(obj);
e76e9aeb 3075 i915_gem_chipset_flush(obj->base.dev);
05394f39
CW
3076 old_write_domain = obj->base.write_domain;
3077 obj->base.write_domain = 0;
1c5d22f7
CW
3078
3079 trace_i915_gem_object_change_domain(obj,
05394f39 3080 obj->base.read_domains,
1c5d22f7 3081 old_write_domain);
e47c68e9
EA
3082}
3083
2ef7eeaa
EA
3084/**
3085 * Moves a single object to the GTT read, and possibly write domain.
3086 *
3087 * This function returns when the move is complete, including waiting on
3088 * flushes to occur.
3089 */
79e53945 3090int
2021746e 3091i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 3092{
8325a09d 3093 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
1c5d22f7 3094 uint32_t old_write_domain, old_read_domains;
e47c68e9 3095 int ret;
2ef7eeaa 3096
02354392 3097 /* Not valid to be called on unbound objects. */
05394f39 3098 if (obj->gtt_space == NULL)
02354392
EA
3099 return -EINVAL;
3100
8d7e3de1
CW
3101 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3102 return 0;
3103
0201f1ec 3104 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3105 if (ret)
3106 return ret;
3107
7213342d 3108 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 3109
05394f39
CW
3110 old_write_domain = obj->base.write_domain;
3111 old_read_domains = obj->base.read_domains;
1c5d22f7 3112
e47c68e9
EA
3113 /* It should now be out of any other write domains, and we can update
3114 * the domain values for our changes.
3115 */
05394f39
CW
3116 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3117 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 3118 if (write) {
05394f39
CW
3119 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3120 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3121 obj->dirty = 1;
2ef7eeaa
EA
3122 }
3123
1c5d22f7
CW
3124 trace_i915_gem_object_change_domain(obj,
3125 old_read_domains,
3126 old_write_domain);
3127
8325a09d
CW
3128 /* And bump the LRU for this access */
3129 if (i915_gem_object_is_inactive(obj))
3130 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3131
e47c68e9
EA
3132 return 0;
3133}
3134
e4ffd173
CW
3135int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3136 enum i915_cache_level cache_level)
3137{
7bddb01f
DV
3138 struct drm_device *dev = obj->base.dev;
3139 drm_i915_private_t *dev_priv = dev->dev_private;
e4ffd173
CW
3140 int ret;
3141
3142 if (obj->cache_level == cache_level)
3143 return 0;
3144
3145 if (obj->pin_count) {
3146 DRM_DEBUG("can not change the cache level of pinned objects\n");
3147 return -EBUSY;
3148 }
3149
42d6ab48
CW
3150 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
3151 ret = i915_gem_object_unbind(obj);
3152 if (ret)
3153 return ret;
3154 }
3155
e4ffd173
CW
3156 if (obj->gtt_space) {
3157 ret = i915_gem_object_finish_gpu(obj);
3158 if (ret)
3159 return ret;
3160
3161 i915_gem_object_finish_gtt(obj);
3162
3163 /* Before SandyBridge, you could not use tiling or fence
3164 * registers with snooped memory, so relinquish any fences
3165 * currently pointing to our region in the aperture.
3166 */
42d6ab48 3167 if (INTEL_INFO(dev)->gen < 6) {
e4ffd173
CW
3168 ret = i915_gem_object_put_fence(obj);
3169 if (ret)
3170 return ret;
3171 }
3172
74898d7e
DV
3173 if (obj->has_global_gtt_mapping)
3174 i915_gem_gtt_bind_object(obj, cache_level);
7bddb01f
DV
3175 if (obj->has_aliasing_ppgtt_mapping)
3176 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
3177 obj, cache_level);
42d6ab48
CW
3178
3179 obj->gtt_space->color = cache_level;
e4ffd173
CW
3180 }
3181
3182 if (cache_level == I915_CACHE_NONE) {
3183 u32 old_read_domains, old_write_domain;
3184
3185 /* If we're coming from LLC cached, then we haven't
3186 * actually been tracking whether the data is in the
3187 * CPU cache or not, since we only allow one bit set
3188 * in obj->write_domain and have been skipping the clflushes.
3189 * Just set it to the CPU cache for now.
3190 */
3191 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
3192 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
3193
3194 old_read_domains = obj->base.read_domains;
3195 old_write_domain = obj->base.write_domain;
3196
3197 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3198 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3199
3200 trace_i915_gem_object_change_domain(obj,
3201 old_read_domains,
3202 old_write_domain);
3203 }
3204
3205 obj->cache_level = cache_level;
42d6ab48 3206 i915_gem_verify_gtt(dev);
e4ffd173
CW
3207 return 0;
3208}
3209
199adf40
BW
3210int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3211 struct drm_file *file)
e6994aee 3212{
199adf40 3213 struct drm_i915_gem_caching *args = data;
e6994aee
CW
3214 struct drm_i915_gem_object *obj;
3215 int ret;
3216
3217 ret = i915_mutex_lock_interruptible(dev);
3218 if (ret)
3219 return ret;
3220
3221 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3222 if (&obj->base == NULL) {
3223 ret = -ENOENT;
3224 goto unlock;
3225 }
3226
199adf40 3227 args->caching = obj->cache_level != I915_CACHE_NONE;
e6994aee
CW
3228
3229 drm_gem_object_unreference(&obj->base);
3230unlock:
3231 mutex_unlock(&dev->struct_mutex);
3232 return ret;
3233}
3234
199adf40
BW
3235int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3236 struct drm_file *file)
e6994aee 3237{
199adf40 3238 struct drm_i915_gem_caching *args = data;
e6994aee
CW
3239 struct drm_i915_gem_object *obj;
3240 enum i915_cache_level level;
3241 int ret;
3242
199adf40
BW
3243 switch (args->caching) {
3244 case I915_CACHING_NONE:
e6994aee
CW
3245 level = I915_CACHE_NONE;
3246 break;
199adf40 3247 case I915_CACHING_CACHED:
e6994aee
CW
3248 level = I915_CACHE_LLC;
3249 break;
3250 default:
3251 return -EINVAL;
3252 }
3253
3bc2913e
BW
3254 ret = i915_mutex_lock_interruptible(dev);
3255 if (ret)
3256 return ret;
3257
e6994aee
CW
3258 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3259 if (&obj->base == NULL) {
3260 ret = -ENOENT;
3261 goto unlock;
3262 }
3263
3264 ret = i915_gem_object_set_cache_level(obj, level);
3265
3266 drm_gem_object_unreference(&obj->base);
3267unlock:
3268 mutex_unlock(&dev->struct_mutex);
3269 return ret;
3270}
3271
b9241ea3 3272/*
2da3b9b9
CW
3273 * Prepare buffer for display plane (scanout, cursors, etc).
3274 * Can be called from an uninterruptible phase (modesetting) and allows
3275 * any flushes to be pipelined (for pageflips).
b9241ea3
ZW
3276 */
3277int
2da3b9b9
CW
3278i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3279 u32 alignment,
919926ae 3280 struct intel_ring_buffer *pipelined)
b9241ea3 3281{
2da3b9b9 3282 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
3283 int ret;
3284
0be73284 3285 if (pipelined != obj->ring) {
2911a35b
BW
3286 ret = i915_gem_object_sync(obj, pipelined);
3287 if (ret)
b9241ea3
ZW
3288 return ret;
3289 }
3290
a7ef0640
EA
3291 /* The display engine is not coherent with the LLC cache on gen6. As
3292 * a result, we make sure that the pinning that is about to occur is
3293 * done with uncached PTEs. This is lowest common denominator for all
3294 * chipsets.
3295 *
3296 * However for gen6+, we could do better by using the GFDT bit instead
3297 * of uncaching, which would allow us to flush all the LLC-cached data
3298 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3299 */
3300 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
3301 if (ret)
3302 return ret;
3303
2da3b9b9
CW
3304 /* As the user may map the buffer once pinned in the display plane
3305 * (e.g. libkms for the bootup splash), we have to ensure that we
3306 * always use map_and_fenceable for all scanout buffers.
3307 */
86a1ee26 3308 ret = i915_gem_object_pin(obj, alignment, true, false);
2da3b9b9
CW
3309 if (ret)
3310 return ret;
3311
b118c1e3
CW
3312 i915_gem_object_flush_cpu_write_domain(obj);
3313
2da3b9b9 3314 old_write_domain = obj->base.write_domain;
05394f39 3315 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
3316
3317 /* It should now be out of any other write domains, and we can update
3318 * the domain values for our changes.
3319 */
e5f1d962 3320 obj->base.write_domain = 0;
05394f39 3321 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
3322
3323 trace_i915_gem_object_change_domain(obj,
3324 old_read_domains,
2da3b9b9 3325 old_write_domain);
b9241ea3
ZW
3326
3327 return 0;
3328}
3329
85345517 3330int
a8198eea 3331i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
85345517 3332{
88241785
CW
3333 int ret;
3334
a8198eea 3335 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
85345517
CW
3336 return 0;
3337
0201f1ec 3338 ret = i915_gem_object_wait_rendering(obj, false);
c501ae7f
CW
3339 if (ret)
3340 return ret;
3341
a8198eea
CW
3342 /* Ensure that we invalidate the GPU's caches and TLBs. */
3343 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
c501ae7f 3344 return 0;
85345517
CW
3345}
3346
e47c68e9
EA
3347/**
3348 * Moves a single object to the CPU read, and possibly write domain.
3349 *
3350 * This function returns when the move is complete, including waiting on
3351 * flushes to occur.
3352 */
dabdfe02 3353int
919926ae 3354i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 3355{
1c5d22f7 3356 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
3357 int ret;
3358
8d7e3de1
CW
3359 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3360 return 0;
3361
0201f1ec 3362 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3363 if (ret)
3364 return ret;
3365
e47c68e9 3366 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 3367
05394f39
CW
3368 old_write_domain = obj->base.write_domain;
3369 old_read_domains = obj->base.read_domains;
1c5d22f7 3370
e47c68e9 3371 /* Flush the CPU cache if it's still invalid. */
05394f39 3372 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 3373 i915_gem_clflush_object(obj);
2ef7eeaa 3374
05394f39 3375 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
3376 }
3377
3378 /* It should now be out of any other write domains, and we can update
3379 * the domain values for our changes.
3380 */
05394f39 3381 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
3382
3383 /* If we're writing through the CPU, then the GPU read domains will
3384 * need to be invalidated at next use.
3385 */
3386 if (write) {
05394f39
CW
3387 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3388 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 3389 }
2ef7eeaa 3390
1c5d22f7
CW
3391 trace_i915_gem_object_change_domain(obj,
3392 old_read_domains,
3393 old_write_domain);
3394
2ef7eeaa
EA
3395 return 0;
3396}
3397
673a394b
EA
3398/* Throttle our rendering by waiting until the ring has completed our requests
3399 * emitted over 20 msec ago.
3400 *
b962442e
EA
3401 * Note that if we were to use the current jiffies each time around the loop,
3402 * we wouldn't escape the function with any frames outstanding if the time to
3403 * render a frame was over 20ms.
3404 *
673a394b
EA
3405 * This should get us reasonable parallelism between CPU and GPU but also
3406 * relatively low latency when blocking on a particular request to finish.
3407 */
40a5f0de 3408static int
f787a5f5 3409i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3410{
f787a5f5
CW
3411 struct drm_i915_private *dev_priv = dev->dev_private;
3412 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e 3413 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
f787a5f5
CW
3414 struct drm_i915_gem_request *request;
3415 struct intel_ring_buffer *ring = NULL;
3416 u32 seqno = 0;
3417 int ret;
93533c29 3418
e110e8d6
CW
3419 if (atomic_read(&dev_priv->mm.wedged))
3420 return -EIO;
3421
1c25595f 3422 spin_lock(&file_priv->mm.lock);
f787a5f5 3423 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3424 if (time_after_eq(request->emitted_jiffies, recent_enough))
3425 break;
40a5f0de 3426
f787a5f5
CW
3427 ring = request->ring;
3428 seqno = request->seqno;
b962442e 3429 }
1c25595f 3430 spin_unlock(&file_priv->mm.lock);
40a5f0de 3431
f787a5f5
CW
3432 if (seqno == 0)
3433 return 0;
2bc43b5c 3434
5c81fe85 3435 ret = __wait_seqno(ring, seqno, true, NULL);
f787a5f5
CW
3436 if (ret == 0)
3437 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
40a5f0de
EA
3438
3439 return ret;
3440}
3441
673a394b 3442int
05394f39
CW
3443i915_gem_object_pin(struct drm_i915_gem_object *obj,
3444 uint32_t alignment,
86a1ee26
CW
3445 bool map_and_fenceable,
3446 bool nonblocking)
673a394b 3447{
673a394b
EA
3448 int ret;
3449
7e81a42e
CW
3450 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3451 return -EBUSY;
ac0c6b5a 3452
05394f39
CW
3453 if (obj->gtt_space != NULL) {
3454 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3455 (map_and_fenceable && !obj->map_and_fenceable)) {
3456 WARN(obj->pin_count,
ae7d49d8 3457 "bo is already pinned with incorrect alignment:"
75e9e915
DV
3458 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3459 " obj->map_and_fenceable=%d\n",
05394f39 3460 obj->gtt_offset, alignment,
75e9e915 3461 map_and_fenceable,
05394f39 3462 obj->map_and_fenceable);
ac0c6b5a
CW
3463 ret = i915_gem_object_unbind(obj);
3464 if (ret)
3465 return ret;
3466 }
3467 }
3468
05394f39 3469 if (obj->gtt_space == NULL) {
8742267a
CW
3470 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3471
a00b10c3 3472 ret = i915_gem_object_bind_to_gtt(obj, alignment,
86a1ee26
CW
3473 map_and_fenceable,
3474 nonblocking);
9731129c 3475 if (ret)
673a394b 3476 return ret;
8742267a
CW
3477
3478 if (!dev_priv->mm.aliasing_ppgtt)
3479 i915_gem_gtt_bind_object(obj, obj->cache_level);
22c344e9 3480 }
76446cac 3481
74898d7e
DV
3482 if (!obj->has_global_gtt_mapping && map_and_fenceable)
3483 i915_gem_gtt_bind_object(obj, obj->cache_level);
3484
1b50247a 3485 obj->pin_count++;
6299f992 3486 obj->pin_mappable |= map_and_fenceable;
673a394b
EA
3487
3488 return 0;
3489}
3490
3491void
05394f39 3492i915_gem_object_unpin(struct drm_i915_gem_object *obj)
673a394b 3493{
05394f39
CW
3494 BUG_ON(obj->pin_count == 0);
3495 BUG_ON(obj->gtt_space == NULL);
673a394b 3496
1b50247a 3497 if (--obj->pin_count == 0)
6299f992 3498 obj->pin_mappable = false;
673a394b
EA
3499}
3500
3501int
3502i915_gem_pin_ioctl(struct drm_device *dev, void *data,
05394f39 3503 struct drm_file *file)
673a394b
EA
3504{
3505 struct drm_i915_gem_pin *args = data;
05394f39 3506 struct drm_i915_gem_object *obj;
673a394b
EA
3507 int ret;
3508
1d7cfea1
CW
3509 ret = i915_mutex_lock_interruptible(dev);
3510 if (ret)
3511 return ret;
673a394b 3512
05394f39 3513 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3514 if (&obj->base == NULL) {
1d7cfea1
CW
3515 ret = -ENOENT;
3516 goto unlock;
673a394b 3517 }
673a394b 3518
05394f39 3519 if (obj->madv != I915_MADV_WILLNEED) {
bb6baf76 3520 DRM_ERROR("Attempting to pin a purgeable buffer\n");
1d7cfea1
CW
3521 ret = -EINVAL;
3522 goto out;
3ef94daa
CW
3523 }
3524
05394f39 3525 if (obj->pin_filp != NULL && obj->pin_filp != file) {
79e53945
JB
3526 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3527 args->handle);
1d7cfea1
CW
3528 ret = -EINVAL;
3529 goto out;
79e53945
JB
3530 }
3531
05394f39
CW
3532 obj->user_pin_count++;
3533 obj->pin_filp = file;
3534 if (obj->user_pin_count == 1) {
86a1ee26 3535 ret = i915_gem_object_pin(obj, args->alignment, true, false);
1d7cfea1
CW
3536 if (ret)
3537 goto out;
673a394b
EA
3538 }
3539
3540 /* XXX - flush the CPU caches for pinned objects
3541 * as the X server doesn't manage domains yet
3542 */
e47c68e9 3543 i915_gem_object_flush_cpu_write_domain(obj);
05394f39 3544 args->offset = obj->gtt_offset;
1d7cfea1 3545out:
05394f39 3546 drm_gem_object_unreference(&obj->base);
1d7cfea1 3547unlock:
673a394b 3548 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3549 return ret;
673a394b
EA
3550}
3551
3552int
3553i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
05394f39 3554 struct drm_file *file)
673a394b
EA
3555{
3556 struct drm_i915_gem_pin *args = data;
05394f39 3557 struct drm_i915_gem_object *obj;
76c1dec1 3558 int ret;
673a394b 3559
1d7cfea1
CW
3560 ret = i915_mutex_lock_interruptible(dev);
3561 if (ret)
3562 return ret;
673a394b 3563
05394f39 3564 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3565 if (&obj->base == NULL) {
1d7cfea1
CW
3566 ret = -ENOENT;
3567 goto unlock;
673a394b 3568 }
76c1dec1 3569
05394f39 3570 if (obj->pin_filp != file) {
79e53945
JB
3571 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3572 args->handle);
1d7cfea1
CW
3573 ret = -EINVAL;
3574 goto out;
79e53945 3575 }
05394f39
CW
3576 obj->user_pin_count--;
3577 if (obj->user_pin_count == 0) {
3578 obj->pin_filp = NULL;
79e53945
JB
3579 i915_gem_object_unpin(obj);
3580 }
673a394b 3581
1d7cfea1 3582out:
05394f39 3583 drm_gem_object_unreference(&obj->base);
1d7cfea1 3584unlock:
673a394b 3585 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3586 return ret;
673a394b
EA
3587}
3588
3589int
3590i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 3591 struct drm_file *file)
673a394b
EA
3592{
3593 struct drm_i915_gem_busy *args = data;
05394f39 3594 struct drm_i915_gem_object *obj;
30dbf0c0
CW
3595 int ret;
3596
76c1dec1 3597 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 3598 if (ret)
76c1dec1 3599 return ret;
673a394b 3600
05394f39 3601 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3602 if (&obj->base == NULL) {
1d7cfea1
CW
3603 ret = -ENOENT;
3604 goto unlock;
673a394b 3605 }
d1b851fc 3606
0be555b6
CW
3607 /* Count all active objects as busy, even if they are currently not used
3608 * by the gpu. Users of this interface expect objects to eventually
3609 * become non-busy without any further actions, therefore emit any
3610 * necessary flushes here.
c4de0a5d 3611 */
30dfebf3 3612 ret = i915_gem_object_flush_active(obj);
0be555b6 3613
30dfebf3 3614 args->busy = obj->active;
e9808edd
CW
3615 if (obj->ring) {
3616 BUILD_BUG_ON(I915_NUM_RINGS > 16);
3617 args->busy |= intel_ring_flag(obj->ring) << 16;
3618 }
673a394b 3619
05394f39 3620 drm_gem_object_unreference(&obj->base);
1d7cfea1 3621unlock:
673a394b 3622 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3623 return ret;
673a394b
EA
3624}
3625
3626int
3627i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3628 struct drm_file *file_priv)
3629{
0206e353 3630 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
3631}
3632
3ef94daa
CW
3633int
3634i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3635 struct drm_file *file_priv)
3636{
3637 struct drm_i915_gem_madvise *args = data;
05394f39 3638 struct drm_i915_gem_object *obj;
76c1dec1 3639 int ret;
3ef94daa
CW
3640
3641 switch (args->madv) {
3642 case I915_MADV_DONTNEED:
3643 case I915_MADV_WILLNEED:
3644 break;
3645 default:
3646 return -EINVAL;
3647 }
3648
1d7cfea1
CW
3649 ret = i915_mutex_lock_interruptible(dev);
3650 if (ret)
3651 return ret;
3652
05394f39 3653 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
c8725226 3654 if (&obj->base == NULL) {
1d7cfea1
CW
3655 ret = -ENOENT;
3656 goto unlock;
3ef94daa 3657 }
3ef94daa 3658
05394f39 3659 if (obj->pin_count) {
1d7cfea1
CW
3660 ret = -EINVAL;
3661 goto out;
3ef94daa
CW
3662 }
3663
05394f39
CW
3664 if (obj->madv != __I915_MADV_PURGED)
3665 obj->madv = args->madv;
3ef94daa 3666
6c085a72
CW
3667 /* if the object is no longer attached, discard its backing storage */
3668 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
2d7ef395
CW
3669 i915_gem_object_truncate(obj);
3670
05394f39 3671 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 3672
1d7cfea1 3673out:
05394f39 3674 drm_gem_object_unreference(&obj->base);
1d7cfea1 3675unlock:
3ef94daa 3676 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3677 return ret;
3ef94daa
CW
3678}
3679
37e680a1
CW
3680void i915_gem_object_init(struct drm_i915_gem_object *obj,
3681 const struct drm_i915_gem_object_ops *ops)
0327d6ba 3682{
0327d6ba
CW
3683 INIT_LIST_HEAD(&obj->mm_list);
3684 INIT_LIST_HEAD(&obj->gtt_list);
3685 INIT_LIST_HEAD(&obj->ring_list);
3686 INIT_LIST_HEAD(&obj->exec_list);
3687
37e680a1
CW
3688 obj->ops = ops;
3689
0327d6ba
CW
3690 obj->fence_reg = I915_FENCE_REG_NONE;
3691 obj->madv = I915_MADV_WILLNEED;
3692 /* Avoid an unnecessary call to unbind on the first bind. */
3693 obj->map_and_fenceable = true;
3694
3695 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
3696}
3697
37e680a1
CW
3698static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3699 .get_pages = i915_gem_object_get_pages_gtt,
3700 .put_pages = i915_gem_object_put_pages_gtt,
3701};
3702
05394f39
CW
3703struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3704 size_t size)
ac52bc56 3705{
c397b908 3706 struct drm_i915_gem_object *obj;
5949eac4 3707 struct address_space *mapping;
bed1ea95 3708 u32 mask;
ac52bc56 3709
42dcedd4 3710 obj = i915_gem_object_alloc(dev);
c397b908
DV
3711 if (obj == NULL)
3712 return NULL;
673a394b 3713
c397b908 3714 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
42dcedd4 3715 i915_gem_object_free(obj);
c397b908
DV
3716 return NULL;
3717 }
673a394b 3718
bed1ea95
CW
3719 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3720 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
3721 /* 965gm cannot relocate objects above 4GiB. */
3722 mask &= ~__GFP_HIGHMEM;
3723 mask |= __GFP_DMA32;
3724 }
3725
5949eac4 3726 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
bed1ea95 3727 mapping_set_gfp_mask(mapping, mask);
5949eac4 3728
37e680a1 3729 i915_gem_object_init(obj, &i915_gem_object_ops);
73aa808f 3730
c397b908
DV
3731 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3732 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 3733
3d29b842
ED
3734 if (HAS_LLC(dev)) {
3735 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
3736 * cache) for about a 10% performance improvement
3737 * compared to uncached. Graphics requests other than
3738 * display scanout are coherent with the CPU in
3739 * accessing this cache. This means in this mode we
3740 * don't need to clflush on the CPU side, and on the
3741 * GPU side we only need to flush internal caches to
3742 * get data visible to the CPU.
3743 *
3744 * However, we maintain the display planes as UC, and so
3745 * need to rebind when first used as such.
3746 */
3747 obj->cache_level = I915_CACHE_LLC;
3748 } else
3749 obj->cache_level = I915_CACHE_NONE;
3750
05394f39 3751 return obj;
c397b908
DV
3752}
3753
3754int i915_gem_init_object(struct drm_gem_object *obj)
3755{
3756 BUG();
de151cf6 3757
673a394b
EA
3758 return 0;
3759}
3760
1488fc08 3761void i915_gem_free_object(struct drm_gem_object *gem_obj)
673a394b 3762{
1488fc08 3763 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
05394f39 3764 struct drm_device *dev = obj->base.dev;
be72615b 3765 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 3766
26e12f89
CW
3767 trace_i915_gem_object_destroy(obj);
3768
1488fc08
CW
3769 if (obj->phys_obj)
3770 i915_gem_detach_phys_object(dev, obj);
3771
3772 obj->pin_count = 0;
3773 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
3774 bool was_interruptible;
3775
3776 was_interruptible = dev_priv->mm.interruptible;
3777 dev_priv->mm.interruptible = false;
3778
3779 WARN_ON(i915_gem_object_unbind(obj));
3780
3781 dev_priv->mm.interruptible = was_interruptible;
3782 }
3783
a5570178 3784 obj->pages_pin_count = 0;
37e680a1 3785 i915_gem_object_put_pages(obj);
d8cb5086 3786 i915_gem_object_free_mmap_offset(obj);
0104fdbb 3787 i915_gem_object_release_stolen(obj);
de151cf6 3788
9da3da66
CW
3789 BUG_ON(obj->pages);
3790
2f745ad3
CW
3791 if (obj->base.import_attach)
3792 drm_prime_gem_destroy(&obj->base, NULL);
de151cf6 3793
05394f39
CW
3794 drm_gem_object_release(&obj->base);
3795 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 3796
05394f39 3797 kfree(obj->bit_17);
42dcedd4 3798 i915_gem_object_free(obj);
673a394b
EA
3799}
3800
29105ccc
CW
3801int
3802i915_gem_idle(struct drm_device *dev)
3803{
3804 drm_i915_private_t *dev_priv = dev->dev_private;
3805 int ret;
28dfe52a 3806
29105ccc 3807 mutex_lock(&dev->struct_mutex);
1c5d22f7 3808
87acb0a5 3809 if (dev_priv->mm.suspended) {
29105ccc
CW
3810 mutex_unlock(&dev->struct_mutex);
3811 return 0;
28dfe52a
EA
3812 }
3813
b2da9fe5 3814 ret = i915_gpu_idle(dev);
6dbe2772
KP
3815 if (ret) {
3816 mutex_unlock(&dev->struct_mutex);
673a394b 3817 return ret;
6dbe2772 3818 }
b2da9fe5 3819 i915_gem_retire_requests(dev);
673a394b 3820
29105ccc 3821 /* Under UMS, be paranoid and evict. */
a39d7efc 3822 if (!drm_core_check_feature(dev, DRIVER_MODESET))
6c085a72 3823 i915_gem_evict_everything(dev);
29105ccc 3824
312817a3
CW
3825 i915_gem_reset_fences(dev);
3826
29105ccc
CW
3827 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3828 * We need to replace this with a semaphore, or something.
3829 * And not confound mm.suspended!
3830 */
3831 dev_priv->mm.suspended = 1;
bc0c7f14 3832 del_timer_sync(&dev_priv->hangcheck_timer);
29105ccc
CW
3833
3834 i915_kernel_lost_context(dev);
6dbe2772 3835 i915_gem_cleanup_ringbuffer(dev);
29105ccc 3836
6dbe2772
KP
3837 mutex_unlock(&dev->struct_mutex);
3838
29105ccc
CW
3839 /* Cancel the retire work handler, which should be idle now. */
3840 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3841
673a394b
EA
3842 return 0;
3843}
3844
b9524a1e
BW
3845void i915_gem_l3_remap(struct drm_device *dev)
3846{
3847 drm_i915_private_t *dev_priv = dev->dev_private;
3848 u32 misccpctl;
3849 int i;
3850
3851 if (!IS_IVYBRIDGE(dev))
3852 return;
3853
a4da4fa4 3854 if (!dev_priv->l3_parity.remap_info)
b9524a1e
BW
3855 return;
3856
3857 misccpctl = I915_READ(GEN7_MISCCPCTL);
3858 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
3859 POSTING_READ(GEN7_MISCCPCTL);
3860
3861 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
3862 u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
a4da4fa4 3863 if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
b9524a1e
BW
3864 DRM_DEBUG("0x%x was already programmed to %x\n",
3865 GEN7_L3LOG_BASE + i, remap);
a4da4fa4 3866 if (remap && !dev_priv->l3_parity.remap_info[i/4])
b9524a1e 3867 DRM_DEBUG_DRIVER("Clearing remapped register\n");
a4da4fa4 3868 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
b9524a1e
BW
3869 }
3870
3871 /* Make sure all the writes land before disabling dop clock gating */
3872 POSTING_READ(GEN7_L3LOG_BASE);
3873
3874 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
3875}
3876
f691e2f4
DV
3877void i915_gem_init_swizzling(struct drm_device *dev)
3878{
3879 drm_i915_private_t *dev_priv = dev->dev_private;
3880
11782b02 3881 if (INTEL_INFO(dev)->gen < 5 ||
f691e2f4
DV
3882 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3883 return;
3884
3885 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3886 DISP_TILE_SURFACE_SWIZZLING);
3887
11782b02
DV
3888 if (IS_GEN5(dev))
3889 return;
3890
f691e2f4
DV
3891 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3892 if (IS_GEN6(dev))
6b26c86d 3893 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
f691e2f4 3894 else
6b26c86d 3895 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
f691e2f4 3896}
e21af88d 3897
67b1b571
CW
3898static bool
3899intel_enable_blt(struct drm_device *dev)
3900{
3901 if (!HAS_BLT(dev))
3902 return false;
3903
3904 /* The blitter was dysfunctional on early prototypes */
3905 if (IS_GEN6(dev) && dev->pdev->revision < 8) {
3906 DRM_INFO("BLT not supported on this pre-production hardware;"
3907 " graphics performance will be degraded.\n");
3908 return false;
3909 }
3910
3911 return true;
3912}
3913
8187a2b7 3914int
f691e2f4 3915i915_gem_init_hw(struct drm_device *dev)
8187a2b7
ZN
3916{
3917 drm_i915_private_t *dev_priv = dev->dev_private;
3918 int ret;
68f95ba9 3919
e76e9aeb 3920 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
8ecd1a66
DV
3921 return -EIO;
3922
eda2d7f5
RV
3923 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
3924 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
3925
b9524a1e
BW
3926 i915_gem_l3_remap(dev);
3927
f691e2f4
DV
3928 i915_gem_init_swizzling(dev);
3929
5c1143bb 3930 ret = intel_init_render_ring_buffer(dev);
68f95ba9 3931 if (ret)
b6913e4b 3932 return ret;
68f95ba9
CW
3933
3934 if (HAS_BSD(dev)) {
5c1143bb 3935 ret = intel_init_bsd_ring_buffer(dev);
68f95ba9
CW
3936 if (ret)
3937 goto cleanup_render_ring;
d1b851fc 3938 }
68f95ba9 3939
67b1b571 3940 if (intel_enable_blt(dev)) {
549f7365
CW
3941 ret = intel_init_blt_ring_buffer(dev);
3942 if (ret)
3943 goto cleanup_bsd_ring;
3944 }
3945
6f392d54
CW
3946 dev_priv->next_seqno = 1;
3947
254f965c
BW
3948 /*
3949 * XXX: There was some w/a described somewhere suggesting loading
3950 * contexts before PPGTT.
3951 */
3952 i915_gem_context_init(dev);
e21af88d
DV
3953 i915_gem_init_ppgtt(dev);
3954
68f95ba9
CW
3955 return 0;
3956
549f7365 3957cleanup_bsd_ring:
1ec14ad3 3958 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
68f95ba9 3959cleanup_render_ring:
1ec14ad3 3960 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
8187a2b7
ZN
3961 return ret;
3962}
3963
1070a42b
CW
3964static bool
3965intel_enable_ppgtt(struct drm_device *dev)
3966{
3967 if (i915_enable_ppgtt >= 0)
3968 return i915_enable_ppgtt;
3969
3970#ifdef CONFIG_INTEL_IOMMU
3971 /* Disable ppgtt on SNB if VT-d is on. */
3972 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
3973 return false;
3974#endif
3975
3976 return true;
3977}
3978
3979int i915_gem_init(struct drm_device *dev)
3980{
3981 struct drm_i915_private *dev_priv = dev->dev_private;
3982 unsigned long gtt_size, mappable_size;
3983 int ret;
3984
3985 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
3986 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
3987
3988 mutex_lock(&dev->struct_mutex);
3989 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3990 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
3991 * aperture accordingly when using aliasing ppgtt. */
3992 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3993
3994 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
3995
3996 ret = i915_gem_init_aliasing_ppgtt(dev);
3997 if (ret) {
3998 mutex_unlock(&dev->struct_mutex);
3999 return ret;
4000 }
4001 } else {
4002 /* Let GEM Manage all of the aperture.
4003 *
4004 * However, leave one page at the end still bound to the scratch
4005 * page. There are a number of places where the hardware
4006 * apparently prefetches past the end of the object, and we've
4007 * seen multiple hangs with the GPU head pointer stuck in a
4008 * batchbuffer bound at the last page of the aperture. One page
4009 * should be enough to keep any prefetching inside of the
4010 * aperture.
4011 */
4012 i915_gem_init_global_gtt(dev, 0, mappable_size,
4013 gtt_size);
4014 }
4015
4016 ret = i915_gem_init_hw(dev);
4017 mutex_unlock(&dev->struct_mutex);
4018 if (ret) {
4019 i915_gem_cleanup_aliasing_ppgtt(dev);
4020 return ret;
4021 }
4022
53ca26ca
DV
4023 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
4024 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4025 dev_priv->dri1.allow_batchbuffer = 1;
1070a42b
CW
4026 return 0;
4027}
4028
8187a2b7
ZN
4029void
4030i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4031{
4032 drm_i915_private_t *dev_priv = dev->dev_private;
b4519513 4033 struct intel_ring_buffer *ring;
1ec14ad3 4034 int i;
8187a2b7 4035
b4519513
CW
4036 for_each_ring(ring, dev_priv, i)
4037 intel_cleanup_ring_buffer(ring);
8187a2b7
ZN
4038}
4039
673a394b
EA
4040int
4041i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4042 struct drm_file *file_priv)
4043{
4044 drm_i915_private_t *dev_priv = dev->dev_private;
b4519513 4045 int ret;
673a394b 4046
79e53945
JB
4047 if (drm_core_check_feature(dev, DRIVER_MODESET))
4048 return 0;
4049
ba1234d1 4050 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 4051 DRM_ERROR("Reenabling wedged hardware, good luck\n");
ba1234d1 4052 atomic_set(&dev_priv->mm.wedged, 0);
673a394b
EA
4053 }
4054
673a394b 4055 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
4056 dev_priv->mm.suspended = 0;
4057
f691e2f4 4058 ret = i915_gem_init_hw(dev);
d816f6ac
WF
4059 if (ret != 0) {
4060 mutex_unlock(&dev->struct_mutex);
9bb2d6f9 4061 return ret;
d816f6ac 4062 }
9bb2d6f9 4063
69dc4987 4064 BUG_ON(!list_empty(&dev_priv->mm.active_list));
673a394b 4065 mutex_unlock(&dev->struct_mutex);
dbb19d30 4066
5f35308b
CW
4067 ret = drm_irq_install(dev);
4068 if (ret)
4069 goto cleanup_ringbuffer;
dbb19d30 4070
673a394b 4071 return 0;
5f35308b
CW
4072
4073cleanup_ringbuffer:
4074 mutex_lock(&dev->struct_mutex);
4075 i915_gem_cleanup_ringbuffer(dev);
4076 dev_priv->mm.suspended = 1;
4077 mutex_unlock(&dev->struct_mutex);
4078
4079 return ret;
673a394b
EA
4080}
4081
4082int
4083i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4084 struct drm_file *file_priv)
4085{
79e53945
JB
4086 if (drm_core_check_feature(dev, DRIVER_MODESET))
4087 return 0;
4088
dbb19d30 4089 drm_irq_uninstall(dev);
e6890f6f 4090 return i915_gem_idle(dev);
673a394b
EA
4091}
4092
4093void
4094i915_gem_lastclose(struct drm_device *dev)
4095{
4096 int ret;
673a394b 4097
e806b495
EA
4098 if (drm_core_check_feature(dev, DRIVER_MODESET))
4099 return;
4100
6dbe2772
KP
4101 ret = i915_gem_idle(dev);
4102 if (ret)
4103 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
4104}
4105
64193406
CW
4106static void
4107init_ring_lists(struct intel_ring_buffer *ring)
4108{
4109 INIT_LIST_HEAD(&ring->active_list);
4110 INIT_LIST_HEAD(&ring->request_list);
64193406
CW
4111}
4112
673a394b
EA
4113void
4114i915_gem_load(struct drm_device *dev)
4115{
4116 drm_i915_private_t *dev_priv = dev->dev_private;
42dcedd4
CW
4117 int i;
4118
4119 dev_priv->slab =
4120 kmem_cache_create("i915_gem_object",
4121 sizeof(struct drm_i915_gem_object), 0,
4122 SLAB_HWCACHE_ALIGN,
4123 NULL);
673a394b 4124
69dc4987 4125 INIT_LIST_HEAD(&dev_priv->mm.active_list);
673a394b 4126 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
6c085a72
CW
4127 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4128 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
a09ba7fa 4129 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
1ec14ad3
CW
4130 for (i = 0; i < I915_NUM_RINGS; i++)
4131 init_ring_lists(&dev_priv->ring[i]);
4b9de737 4132 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 4133 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
673a394b
EA
4134 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4135 i915_gem_retire_work_handler);
30dbf0c0 4136 init_completion(&dev_priv->error_completion);
31169714 4137
94400120
DA
4138 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4139 if (IS_GEN3(dev)) {
50743298
DV
4140 I915_WRITE(MI_ARB_STATE,
4141 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
94400120
DA
4142 }
4143
72bfa19c
CW
4144 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4145
de151cf6 4146 /* Old X drivers will take 0-2 for front, back, depth buffers */
b397c836
EA
4147 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4148 dev_priv->fence_reg_start = 3;
de151cf6 4149
a6c45cf0 4150 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
4151 dev_priv->num_fence_regs = 16;
4152 else
4153 dev_priv->num_fence_regs = 8;
4154
b5aa8a0f 4155 /* Initialize fence registers to zero */
ada726c7 4156 i915_gem_reset_fences(dev);
10ed13e4 4157
673a394b 4158 i915_gem_detect_bit_6_swizzle(dev);
6b95a207 4159 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 4160
ce453d81
CW
4161 dev_priv->mm.interruptible = true;
4162
17250b71
CW
4163 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
4164 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4165 register_shrinker(&dev_priv->mm.inactive_shrinker);
673a394b 4166}
71acb5eb
DA
4167
4168/*
4169 * Create a physically contiguous memory object for this object
4170 * e.g. for cursor + overlay regs
4171 */
995b6762
CW
4172static int i915_gem_init_phys_object(struct drm_device *dev,
4173 int id, int size, int align)
71acb5eb
DA
4174{
4175 drm_i915_private_t *dev_priv = dev->dev_private;
4176 struct drm_i915_gem_phys_object *phys_obj;
4177 int ret;
4178
4179 if (dev_priv->mm.phys_objs[id - 1] || !size)
4180 return 0;
4181
9a298b2a 4182 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
71acb5eb
DA
4183 if (!phys_obj)
4184 return -ENOMEM;
4185
4186 phys_obj->id = id;
4187
6eeefaf3 4188 phys_obj->handle = drm_pci_alloc(dev, size, align);
71acb5eb
DA
4189 if (!phys_obj->handle) {
4190 ret = -ENOMEM;
4191 goto kfree_obj;
4192 }
4193#ifdef CONFIG_X86
4194 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4195#endif
4196
4197 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4198
4199 return 0;
4200kfree_obj:
9a298b2a 4201 kfree(phys_obj);
71acb5eb
DA
4202 return ret;
4203}
4204
995b6762 4205static void i915_gem_free_phys_object(struct drm_device *dev, int id)
71acb5eb
DA
4206{
4207 drm_i915_private_t *dev_priv = dev->dev_private;
4208 struct drm_i915_gem_phys_object *phys_obj;
4209
4210 if (!dev_priv->mm.phys_objs[id - 1])
4211 return;
4212
4213 phys_obj = dev_priv->mm.phys_objs[id - 1];
4214 if (phys_obj->cur_obj) {
4215 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4216 }
4217
4218#ifdef CONFIG_X86
4219 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4220#endif
4221 drm_pci_free(dev, phys_obj->handle);
4222 kfree(phys_obj);
4223 dev_priv->mm.phys_objs[id - 1] = NULL;
4224}
4225
4226void i915_gem_free_all_phys_object(struct drm_device *dev)
4227{
4228 int i;
4229
260883c8 4230 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
4231 i915_gem_free_phys_object(dev, i);
4232}
4233
4234void i915_gem_detach_phys_object(struct drm_device *dev,
05394f39 4235 struct drm_i915_gem_object *obj)
71acb5eb 4236{
05394f39 4237 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
e5281ccd 4238 char *vaddr;
71acb5eb 4239 int i;
71acb5eb
DA
4240 int page_count;
4241
05394f39 4242 if (!obj->phys_obj)
71acb5eb 4243 return;
05394f39 4244 vaddr = obj->phys_obj->handle->vaddr;
71acb5eb 4245
05394f39 4246 page_count = obj->base.size / PAGE_SIZE;
71acb5eb 4247 for (i = 0; i < page_count; i++) {
5949eac4 4248 struct page *page = shmem_read_mapping_page(mapping, i);
e5281ccd
CW
4249 if (!IS_ERR(page)) {
4250 char *dst = kmap_atomic(page);
4251 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4252 kunmap_atomic(dst);
4253
4254 drm_clflush_pages(&page, 1);
4255
4256 set_page_dirty(page);
4257 mark_page_accessed(page);
4258 page_cache_release(page);
4259 }
71acb5eb 4260 }
e76e9aeb 4261 i915_gem_chipset_flush(dev);
d78b47b9 4262
05394f39
CW
4263 obj->phys_obj->cur_obj = NULL;
4264 obj->phys_obj = NULL;
71acb5eb
DA
4265}
4266
4267int
4268i915_gem_attach_phys_object(struct drm_device *dev,
05394f39 4269 struct drm_i915_gem_object *obj,
6eeefaf3
CW
4270 int id,
4271 int align)
71acb5eb 4272{
05394f39 4273 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
71acb5eb 4274 drm_i915_private_t *dev_priv = dev->dev_private;
71acb5eb
DA
4275 int ret = 0;
4276 int page_count;
4277 int i;
4278
4279 if (id > I915_MAX_PHYS_OBJECT)
4280 return -EINVAL;
4281
05394f39
CW
4282 if (obj->phys_obj) {
4283 if (obj->phys_obj->id == id)
71acb5eb
DA
4284 return 0;
4285 i915_gem_detach_phys_object(dev, obj);
4286 }
4287
71acb5eb
DA
4288 /* create a new object */
4289 if (!dev_priv->mm.phys_objs[id - 1]) {
4290 ret = i915_gem_init_phys_object(dev, id,
05394f39 4291 obj->base.size, align);
71acb5eb 4292 if (ret) {
05394f39
CW
4293 DRM_ERROR("failed to init phys object %d size: %zu\n",
4294 id, obj->base.size);
e5281ccd 4295 return ret;
71acb5eb
DA
4296 }
4297 }
4298
4299 /* bind to the object */
05394f39
CW
4300 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4301 obj->phys_obj->cur_obj = obj;
71acb5eb 4302
05394f39 4303 page_count = obj->base.size / PAGE_SIZE;
71acb5eb
DA
4304
4305 for (i = 0; i < page_count; i++) {
e5281ccd
CW
4306 struct page *page;
4307 char *dst, *src;
4308
5949eac4 4309 page = shmem_read_mapping_page(mapping, i);
e5281ccd
CW
4310 if (IS_ERR(page))
4311 return PTR_ERR(page);
71acb5eb 4312
ff75b9bc 4313 src = kmap_atomic(page);
05394f39 4314 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
71acb5eb 4315 memcpy(dst, src, PAGE_SIZE);
3e4d3af5 4316 kunmap_atomic(src);
71acb5eb 4317
e5281ccd
CW
4318 mark_page_accessed(page);
4319 page_cache_release(page);
4320 }
d78b47b9 4321
71acb5eb 4322 return 0;
71acb5eb
DA
4323}
4324
4325static int
05394f39
CW
4326i915_gem_phys_pwrite(struct drm_device *dev,
4327 struct drm_i915_gem_object *obj,
71acb5eb
DA
4328 struct drm_i915_gem_pwrite *args,
4329 struct drm_file *file_priv)
4330{
05394f39 4331 void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
b47b30cc 4332 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
71acb5eb 4333
b47b30cc
CW
4334 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4335 unsigned long unwritten;
4336
4337 /* The physical object once assigned is fixed for the lifetime
4338 * of the obj, so we can safely drop the lock and continue
4339 * to access vaddr.
4340 */
4341 mutex_unlock(&dev->struct_mutex);
4342 unwritten = copy_from_user(vaddr, user_data, args->size);
4343 mutex_lock(&dev->struct_mutex);
4344 if (unwritten)
4345 return -EFAULT;
4346 }
71acb5eb 4347
e76e9aeb 4348 i915_gem_chipset_flush(dev);
71acb5eb
DA
4349 return 0;
4350}
b962442e 4351
f787a5f5 4352void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 4353{
f787a5f5 4354 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e
EA
4355
4356 /* Clean up our request list when the client is going away, so that
4357 * later retire_requests won't dereference our soon-to-be-gone
4358 * file_priv.
4359 */
1c25595f 4360 spin_lock(&file_priv->mm.lock);
f787a5f5
CW
4361 while (!list_empty(&file_priv->mm.request_list)) {
4362 struct drm_i915_gem_request *request;
4363
4364 request = list_first_entry(&file_priv->mm.request_list,
4365 struct drm_i915_gem_request,
4366 client_list);
4367 list_del(&request->client_list);
4368 request->file_priv = NULL;
4369 }
1c25595f 4370 spin_unlock(&file_priv->mm.lock);
b962442e 4371}
31169714 4372
5774506f
CW
4373static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
4374{
4375 if (!mutex_is_locked(mutex))
4376 return false;
4377
4378#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
4379 return mutex->owner == task;
4380#else
4381 /* Since UP may be pre-empted, we cannot assume that we own the lock */
4382 return false;
4383#endif
4384}
4385
31169714 4386static int
1495f230 4387i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
31169714 4388{
17250b71
CW
4389 struct drm_i915_private *dev_priv =
4390 container_of(shrinker,
4391 struct drm_i915_private,
4392 mm.inactive_shrinker);
4393 struct drm_device *dev = dev_priv->dev;
6c085a72 4394 struct drm_i915_gem_object *obj;
1495f230 4395 int nr_to_scan = sc->nr_to_scan;
5774506f 4396 bool unlock = true;
17250b71
CW
4397 int cnt;
4398
5774506f
CW
4399 if (!mutex_trylock(&dev->struct_mutex)) {
4400 if (!mutex_is_locked_by(&dev->struct_mutex, current))
4401 return 0;
4402
4403 unlock = false;
4404 }
31169714 4405
6c085a72
CW
4406 if (nr_to_scan) {
4407 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
4408 if (nr_to_scan > 0)
4409 i915_gem_shrink_all(dev_priv);
31169714
CW
4410 }
4411
17250b71 4412 cnt = 0;
6c085a72 4413 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
a5570178
CW
4414 if (obj->pages_pin_count == 0)
4415 cnt += obj->base.size >> PAGE_SHIFT;
6c085a72 4416 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
a5570178 4417 if (obj->pin_count == 0 && obj->pages_pin_count == 0)
6c085a72 4418 cnt += obj->base.size >> PAGE_SHIFT;
17250b71 4419
5774506f
CW
4420 if (unlock)
4421 mutex_unlock(&dev->struct_mutex);
6c085a72 4422 return cnt;
31169714 4423}
This page took 0.751404 seconds and 5 git commands to generate.