drm/i915: Defer reporting EIO until we try to use the GPU
[linux-2.6/x86.git] / drivers / gpu / drm / i915 / i915_gem.c
blob52dd77b1bb7ceddaf34bd1a145aa51d6520de953
1 /*
2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
38 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
39 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
40 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
41 static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
42 bool write);
43 static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
44 uint64_t offset,
45 uint64_t size);
46 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
47 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
48 unsigned alignment,
49 bool map_and_fenceable);
50 static void i915_gem_clear_fence_reg(struct drm_device *dev,
51 struct drm_i915_fence_reg *reg);
52 static int i915_gem_phys_pwrite(struct drm_device *dev,
53 struct drm_i915_gem_object *obj,
54 struct drm_i915_gem_pwrite *args,
55 struct drm_file *file);
56 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
58 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
59 int nr_to_scan,
60 gfp_t gfp_mask);
63 /* some bookkeeping */
64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
65 size_t size)
67 dev_priv->mm.object_count++;
68 dev_priv->mm.object_memory += size;
71 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
72 size_t size)
74 dev_priv->mm.object_count--;
75 dev_priv->mm.object_memory -= size;
78 static int
79 i915_gem_wait_for_error(struct drm_device *dev)
81 struct drm_i915_private *dev_priv = dev->dev_private;
82 struct completion *x = &dev_priv->error_completion;
83 unsigned long flags;
84 int ret;
86 if (!atomic_read(&dev_priv->mm.wedged))
87 return 0;
89 ret = wait_for_completion_interruptible(x);
90 if (ret)
91 return ret;
93 if (atomic_read(&dev_priv->mm.wedged)) {
94 /* GPU is hung, bump the completion count to account for
95 * the token we just consumed so that we never hit zero and
96 * end up waiting upon a subsequent completion event that
97 * will never happen.
99 spin_lock_irqsave(&x->wait.lock, flags);
100 x->done++;
101 spin_unlock_irqrestore(&x->wait.lock, flags);
103 return 0;
106 int i915_mutex_lock_interruptible(struct drm_device *dev)
108 int ret;
110 ret = i915_gem_wait_for_error(dev);
111 if (ret)
112 return ret;
114 ret = mutex_lock_interruptible(&dev->struct_mutex);
115 if (ret)
116 return ret;
118 WARN_ON(i915_verify_lists(dev));
119 return 0;
122 static inline bool
123 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
125 return obj->gtt_space && !obj->active && obj->pin_count == 0;
128 void i915_gem_do_init(struct drm_device *dev,
129 unsigned long start,
130 unsigned long mappable_end,
131 unsigned long end)
133 drm_i915_private_t *dev_priv = dev->dev_private;
135 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
137 dev_priv->mm.gtt_start = start;
138 dev_priv->mm.gtt_mappable_end = mappable_end;
139 dev_priv->mm.gtt_end = end;
140 dev_priv->mm.gtt_total = end - start;
141 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
143 /* Take over this portion of the GTT */
144 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
148 i915_gem_init_ioctl(struct drm_device *dev, void *data,
149 struct drm_file *file)
151 struct drm_i915_gem_init *args = data;
153 if (args->gtt_start >= args->gtt_end ||
154 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
155 return -EINVAL;
157 mutex_lock(&dev->struct_mutex);
158 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
159 mutex_unlock(&dev->struct_mutex);
161 return 0;
165 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
166 struct drm_file *file)
168 struct drm_i915_private *dev_priv = dev->dev_private;
169 struct drm_i915_gem_get_aperture *args = data;
170 struct drm_i915_gem_object *obj;
171 size_t pinned;
173 if (!(dev->driver->driver_features & DRIVER_GEM))
174 return -ENODEV;
176 pinned = 0;
177 mutex_lock(&dev->struct_mutex);
178 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
179 pinned += obj->gtt_space->size;
180 mutex_unlock(&dev->struct_mutex);
182 args->aper_size = dev_priv->mm.gtt_total;
183 args->aper_available_size = args->aper_size -pinned;
185 return 0;
189 * Creates a new mm object and returns a handle to it.
192 i915_gem_create_ioctl(struct drm_device *dev, void *data,
193 struct drm_file *file)
195 struct drm_i915_gem_create *args = data;
196 struct drm_i915_gem_object *obj;
197 int ret;
198 u32 handle;
200 args->size = roundup(args->size, PAGE_SIZE);
202 /* Allocate the new object */
203 obj = i915_gem_alloc_object(dev, args->size);
204 if (obj == NULL)
205 return -ENOMEM;
207 ret = drm_gem_handle_create(file, &obj->base, &handle);
208 if (ret) {
209 drm_gem_object_release(&obj->base);
210 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
211 kfree(obj);
212 return ret;
215 /* drop reference from allocate - handle holds it now */
216 drm_gem_object_unreference(&obj->base);
217 trace_i915_gem_object_create(obj);
219 args->handle = handle;
220 return 0;
223 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
225 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
227 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
228 obj->tiling_mode != I915_TILING_NONE;
231 static inline void
232 slow_shmem_copy(struct page *dst_page,
233 int dst_offset,
234 struct page *src_page,
235 int src_offset,
236 int length)
238 char *dst_vaddr, *src_vaddr;
240 dst_vaddr = kmap(dst_page);
241 src_vaddr = kmap(src_page);
243 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
245 kunmap(src_page);
246 kunmap(dst_page);
249 static inline void
250 slow_shmem_bit17_copy(struct page *gpu_page,
251 int gpu_offset,
252 struct page *cpu_page,
253 int cpu_offset,
254 int length,
255 int is_read)
257 char *gpu_vaddr, *cpu_vaddr;
259 /* Use the unswizzled path if this page isn't affected. */
260 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
261 if (is_read)
262 return slow_shmem_copy(cpu_page, cpu_offset,
263 gpu_page, gpu_offset, length);
264 else
265 return slow_shmem_copy(gpu_page, gpu_offset,
266 cpu_page, cpu_offset, length);
269 gpu_vaddr = kmap(gpu_page);
270 cpu_vaddr = kmap(cpu_page);
272 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
273 * XORing with the other bits (A9 for Y, A9 and A10 for X)
275 while (length > 0) {
276 int cacheline_end = ALIGN(gpu_offset + 1, 64);
277 int this_length = min(cacheline_end - gpu_offset, length);
278 int swizzled_gpu_offset = gpu_offset ^ 64;
280 if (is_read) {
281 memcpy(cpu_vaddr + cpu_offset,
282 gpu_vaddr + swizzled_gpu_offset,
283 this_length);
284 } else {
285 memcpy(gpu_vaddr + swizzled_gpu_offset,
286 cpu_vaddr + cpu_offset,
287 this_length);
289 cpu_offset += this_length;
290 gpu_offset += this_length;
291 length -= this_length;
294 kunmap(cpu_page);
295 kunmap(gpu_page);
299 * This is the fast shmem pread path, which attempts to copy_from_user directly
300 * from the backing pages of the object to the user's address space. On a
301 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
303 static int
304 i915_gem_shmem_pread_fast(struct drm_device *dev,
305 struct drm_i915_gem_object *obj,
306 struct drm_i915_gem_pread *args,
307 struct drm_file *file)
309 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
310 ssize_t remain;
311 loff_t offset;
312 char __user *user_data;
313 int page_offset, page_length;
315 user_data = (char __user *) (uintptr_t) args->data_ptr;
316 remain = args->size;
318 offset = args->offset;
320 while (remain > 0) {
321 struct page *page;
322 char *vaddr;
323 int ret;
325 /* Operation in this page
327 * page_offset = offset within page
328 * page_length = bytes to copy for this page
330 page_offset = offset & (PAGE_SIZE-1);
331 page_length = remain;
332 if ((page_offset + remain) > PAGE_SIZE)
333 page_length = PAGE_SIZE - page_offset;
335 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
336 GFP_HIGHUSER | __GFP_RECLAIMABLE);
337 if (IS_ERR(page))
338 return PTR_ERR(page);
340 vaddr = kmap_atomic(page);
341 ret = __copy_to_user_inatomic(user_data,
342 vaddr + page_offset,
343 page_length);
344 kunmap_atomic(vaddr);
346 mark_page_accessed(page);
347 page_cache_release(page);
348 if (ret)
349 return -EFAULT;
351 remain -= page_length;
352 user_data += page_length;
353 offset += page_length;
356 return 0;
360 * This is the fallback shmem pread path, which allocates temporary storage
361 * in kernel space to copy_to_user into outside of the struct_mutex, so we
362 * can copy out of the object's backing pages while holding the struct mutex
363 * and not take page faults.
365 static int
366 i915_gem_shmem_pread_slow(struct drm_device *dev,
367 struct drm_i915_gem_object *obj,
368 struct drm_i915_gem_pread *args,
369 struct drm_file *file)
371 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
372 struct mm_struct *mm = current->mm;
373 struct page **user_pages;
374 ssize_t remain;
375 loff_t offset, pinned_pages, i;
376 loff_t first_data_page, last_data_page, num_pages;
377 int shmem_page_offset;
378 int data_page_index, data_page_offset;
379 int page_length;
380 int ret;
381 uint64_t data_ptr = args->data_ptr;
382 int do_bit17_swizzling;
384 remain = args->size;
386 /* Pin the user pages containing the data. We can't fault while
387 * holding the struct mutex, yet we want to hold it while
388 * dereferencing the user data.
390 first_data_page = data_ptr / PAGE_SIZE;
391 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
392 num_pages = last_data_page - first_data_page + 1;
394 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
395 if (user_pages == NULL)
396 return -ENOMEM;
398 mutex_unlock(&dev->struct_mutex);
399 down_read(&mm->mmap_sem);
400 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
401 num_pages, 1, 0, user_pages, NULL);
402 up_read(&mm->mmap_sem);
403 mutex_lock(&dev->struct_mutex);
404 if (pinned_pages < num_pages) {
405 ret = -EFAULT;
406 goto out;
409 ret = i915_gem_object_set_cpu_read_domain_range(obj,
410 args->offset,
411 args->size);
412 if (ret)
413 goto out;
415 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
417 offset = args->offset;
419 while (remain > 0) {
420 struct page *page;
422 /* Operation in this page
424 * shmem_page_offset = offset within page in shmem file
425 * data_page_index = page number in get_user_pages return
426 * data_page_offset = offset with data_page_index page.
427 * page_length = bytes to copy for this page
429 shmem_page_offset = offset & ~PAGE_MASK;
430 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
431 data_page_offset = data_ptr & ~PAGE_MASK;
433 page_length = remain;
434 if ((shmem_page_offset + page_length) > PAGE_SIZE)
435 page_length = PAGE_SIZE - shmem_page_offset;
436 if ((data_page_offset + page_length) > PAGE_SIZE)
437 page_length = PAGE_SIZE - data_page_offset;
439 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
440 GFP_HIGHUSER | __GFP_RECLAIMABLE);
441 if (IS_ERR(page))
442 return PTR_ERR(page);
444 if (do_bit17_swizzling) {
445 slow_shmem_bit17_copy(page,
446 shmem_page_offset,
447 user_pages[data_page_index],
448 data_page_offset,
449 page_length,
451 } else {
452 slow_shmem_copy(user_pages[data_page_index],
453 data_page_offset,
454 page,
455 shmem_page_offset,
456 page_length);
459 mark_page_accessed(page);
460 page_cache_release(page);
462 remain -= page_length;
463 data_ptr += page_length;
464 offset += page_length;
467 out:
468 for (i = 0; i < pinned_pages; i++) {
469 SetPageDirty(user_pages[i]);
470 mark_page_accessed(user_pages[i]);
471 page_cache_release(user_pages[i]);
473 drm_free_large(user_pages);
475 return ret;
479 * Reads data from the object referenced by handle.
481 * On error, the contents of *data are undefined.
484 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
485 struct drm_file *file)
487 struct drm_i915_gem_pread *args = data;
488 struct drm_i915_gem_object *obj;
489 int ret = 0;
491 if (args->size == 0)
492 return 0;
494 if (!access_ok(VERIFY_WRITE,
495 (char __user *)(uintptr_t)args->data_ptr,
496 args->size))
497 return -EFAULT;
499 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
500 args->size);
501 if (ret)
502 return -EFAULT;
504 ret = i915_mutex_lock_interruptible(dev);
505 if (ret)
506 return ret;
508 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
509 if (obj == NULL) {
510 ret = -ENOENT;
511 goto unlock;
514 /* Bounds check source. */
515 if (args->offset > obj->base.size ||
516 args->size > obj->base.size - args->offset) {
517 ret = -EINVAL;
518 goto out;
521 ret = i915_gem_object_set_cpu_read_domain_range(obj,
522 args->offset,
523 args->size);
524 if (ret)
525 goto out;
527 ret = -EFAULT;
528 if (!i915_gem_object_needs_bit17_swizzle(obj))
529 ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
530 if (ret == -EFAULT)
531 ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
533 out:
534 drm_gem_object_unreference(&obj->base);
535 unlock:
536 mutex_unlock(&dev->struct_mutex);
537 return ret;
540 /* This is the fast write path which cannot handle
541 * page faults in the source data
544 static inline int
545 fast_user_write(struct io_mapping *mapping,
546 loff_t page_base, int page_offset,
547 char __user *user_data,
548 int length)
550 char *vaddr_atomic;
551 unsigned long unwritten;
553 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
554 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
555 user_data, length);
556 io_mapping_unmap_atomic(vaddr_atomic);
557 return unwritten;
560 /* Here's the write path which can sleep for
561 * page faults
564 static inline void
565 slow_kernel_write(struct io_mapping *mapping,
566 loff_t gtt_base, int gtt_offset,
567 struct page *user_page, int user_offset,
568 int length)
570 char __iomem *dst_vaddr;
571 char *src_vaddr;
573 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
574 src_vaddr = kmap(user_page);
576 memcpy_toio(dst_vaddr + gtt_offset,
577 src_vaddr + user_offset,
578 length);
580 kunmap(user_page);
581 io_mapping_unmap(dst_vaddr);
585 * This is the fast pwrite path, where we copy the data directly from the
586 * user into the GTT, uncached.
588 static int
589 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
590 struct drm_i915_gem_object *obj,
591 struct drm_i915_gem_pwrite *args,
592 struct drm_file *file)
594 drm_i915_private_t *dev_priv = dev->dev_private;
595 ssize_t remain;
596 loff_t offset, page_base;
597 char __user *user_data;
598 int page_offset, page_length;
600 user_data = (char __user *) (uintptr_t) args->data_ptr;
601 remain = args->size;
603 offset = obj->gtt_offset + args->offset;
605 while (remain > 0) {
606 /* Operation in this page
608 * page_base = page offset within aperture
609 * page_offset = offset within page
610 * page_length = bytes to copy for this page
612 page_base = (offset & ~(PAGE_SIZE-1));
613 page_offset = offset & (PAGE_SIZE-1);
614 page_length = remain;
615 if ((page_offset + remain) > PAGE_SIZE)
616 page_length = PAGE_SIZE - page_offset;
618 /* If we get a fault while copying data, then (presumably) our
619 * source page isn't available. Return the error and we'll
620 * retry in the slow path.
622 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
623 page_offset, user_data, page_length))
625 return -EFAULT;
627 remain -= page_length;
628 user_data += page_length;
629 offset += page_length;
632 return 0;
636 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
637 * the memory and maps it using kmap_atomic for copying.
639 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
640 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
642 static int
643 i915_gem_gtt_pwrite_slow(struct drm_device *dev,
644 struct drm_i915_gem_object *obj,
645 struct drm_i915_gem_pwrite *args,
646 struct drm_file *file)
648 drm_i915_private_t *dev_priv = dev->dev_private;
649 ssize_t remain;
650 loff_t gtt_page_base, offset;
651 loff_t first_data_page, last_data_page, num_pages;
652 loff_t pinned_pages, i;
653 struct page **user_pages;
654 struct mm_struct *mm = current->mm;
655 int gtt_page_offset, data_page_offset, data_page_index, page_length;
656 int ret;
657 uint64_t data_ptr = args->data_ptr;
659 remain = args->size;
661 /* Pin the user pages containing the data. We can't fault while
662 * holding the struct mutex, and all of the pwrite implementations
663 * want to hold it while dereferencing the user data.
665 first_data_page = data_ptr / PAGE_SIZE;
666 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
667 num_pages = last_data_page - first_data_page + 1;
669 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
670 if (user_pages == NULL)
671 return -ENOMEM;
673 mutex_unlock(&dev->struct_mutex);
674 down_read(&mm->mmap_sem);
675 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
676 num_pages, 0, 0, user_pages, NULL);
677 up_read(&mm->mmap_sem);
678 mutex_lock(&dev->struct_mutex);
679 if (pinned_pages < num_pages) {
680 ret = -EFAULT;
681 goto out_unpin_pages;
684 ret = i915_gem_object_set_to_gtt_domain(obj, true);
685 if (ret)
686 goto out_unpin_pages;
688 ret = i915_gem_object_put_fence(obj);
689 if (ret)
690 goto out_unpin_pages;
692 offset = obj->gtt_offset + args->offset;
694 while (remain > 0) {
695 /* Operation in this page
697 * gtt_page_base = page offset within aperture
698 * gtt_page_offset = offset within page in aperture
699 * data_page_index = page number in get_user_pages return
700 * data_page_offset = offset with data_page_index page.
701 * page_length = bytes to copy for this page
703 gtt_page_base = offset & PAGE_MASK;
704 gtt_page_offset = offset & ~PAGE_MASK;
705 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
706 data_page_offset = data_ptr & ~PAGE_MASK;
708 page_length = remain;
709 if ((gtt_page_offset + page_length) > PAGE_SIZE)
710 page_length = PAGE_SIZE - gtt_page_offset;
711 if ((data_page_offset + page_length) > PAGE_SIZE)
712 page_length = PAGE_SIZE - data_page_offset;
714 slow_kernel_write(dev_priv->mm.gtt_mapping,
715 gtt_page_base, gtt_page_offset,
716 user_pages[data_page_index],
717 data_page_offset,
718 page_length);
720 remain -= page_length;
721 offset += page_length;
722 data_ptr += page_length;
725 out_unpin_pages:
726 for (i = 0; i < pinned_pages; i++)
727 page_cache_release(user_pages[i]);
728 drm_free_large(user_pages);
730 return ret;
734 * This is the fast shmem pwrite path, which attempts to directly
735 * copy_from_user into the kmapped pages backing the object.
737 static int
738 i915_gem_shmem_pwrite_fast(struct drm_device *dev,
739 struct drm_i915_gem_object *obj,
740 struct drm_i915_gem_pwrite *args,
741 struct drm_file *file)
743 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
744 ssize_t remain;
745 loff_t offset;
746 char __user *user_data;
747 int page_offset, page_length;
749 user_data = (char __user *) (uintptr_t) args->data_ptr;
750 remain = args->size;
752 offset = args->offset;
753 obj->dirty = 1;
755 while (remain > 0) {
756 struct page *page;
757 char *vaddr;
758 int ret;
760 /* Operation in this page
762 * page_offset = offset within page
763 * page_length = bytes to copy for this page
765 page_offset = offset & (PAGE_SIZE-1);
766 page_length = remain;
767 if ((page_offset + remain) > PAGE_SIZE)
768 page_length = PAGE_SIZE - page_offset;
770 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
771 GFP_HIGHUSER | __GFP_RECLAIMABLE);
772 if (IS_ERR(page))
773 return PTR_ERR(page);
775 vaddr = kmap_atomic(page, KM_USER0);
776 ret = __copy_from_user_inatomic(vaddr + page_offset,
777 user_data,
778 page_length);
779 kunmap_atomic(vaddr, KM_USER0);
781 set_page_dirty(page);
782 mark_page_accessed(page);
783 page_cache_release(page);
785 /* If we get a fault while copying data, then (presumably) our
786 * source page isn't available. Return the error and we'll
787 * retry in the slow path.
789 if (ret)
790 return -EFAULT;
792 remain -= page_length;
793 user_data += page_length;
794 offset += page_length;
797 return 0;
801 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
802 * the memory and maps it using kmap_atomic for copying.
804 * This avoids taking mmap_sem for faulting on the user's address while the
805 * struct_mutex is held.
807 static int
808 i915_gem_shmem_pwrite_slow(struct drm_device *dev,
809 struct drm_i915_gem_object *obj,
810 struct drm_i915_gem_pwrite *args,
811 struct drm_file *file)
813 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
814 struct mm_struct *mm = current->mm;
815 struct page **user_pages;
816 ssize_t remain;
817 loff_t offset, pinned_pages, i;
818 loff_t first_data_page, last_data_page, num_pages;
819 int shmem_page_offset;
820 int data_page_index, data_page_offset;
821 int page_length;
822 int ret;
823 uint64_t data_ptr = args->data_ptr;
824 int do_bit17_swizzling;
826 remain = args->size;
828 /* Pin the user pages containing the data. We can't fault while
829 * holding the struct mutex, and all of the pwrite implementations
830 * want to hold it while dereferencing the user data.
832 first_data_page = data_ptr / PAGE_SIZE;
833 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
834 num_pages = last_data_page - first_data_page + 1;
836 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
837 if (user_pages == NULL)
838 return -ENOMEM;
840 mutex_unlock(&dev->struct_mutex);
841 down_read(&mm->mmap_sem);
842 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
843 num_pages, 0, 0, user_pages, NULL);
844 up_read(&mm->mmap_sem);
845 mutex_lock(&dev->struct_mutex);
846 if (pinned_pages < num_pages) {
847 ret = -EFAULT;
848 goto out;
851 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
852 if (ret)
853 goto out;
855 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
857 offset = args->offset;
858 obj->dirty = 1;
860 while (remain > 0) {
861 struct page *page;
863 /* Operation in this page
865 * shmem_page_offset = offset within page in shmem file
866 * data_page_index = page number in get_user_pages return
867 * data_page_offset = offset with data_page_index page.
868 * page_length = bytes to copy for this page
870 shmem_page_offset = offset & ~PAGE_MASK;
871 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
872 data_page_offset = data_ptr & ~PAGE_MASK;
874 page_length = remain;
875 if ((shmem_page_offset + page_length) > PAGE_SIZE)
876 page_length = PAGE_SIZE - shmem_page_offset;
877 if ((data_page_offset + page_length) > PAGE_SIZE)
878 page_length = PAGE_SIZE - data_page_offset;
880 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
881 GFP_HIGHUSER | __GFP_RECLAIMABLE);
882 if (IS_ERR(page)) {
883 ret = PTR_ERR(page);
884 goto out;
887 if (do_bit17_swizzling) {
888 slow_shmem_bit17_copy(page,
889 shmem_page_offset,
890 user_pages[data_page_index],
891 data_page_offset,
892 page_length,
894 } else {
895 slow_shmem_copy(page,
896 shmem_page_offset,
897 user_pages[data_page_index],
898 data_page_offset,
899 page_length);
902 set_page_dirty(page);
903 mark_page_accessed(page);
904 page_cache_release(page);
906 remain -= page_length;
907 data_ptr += page_length;
908 offset += page_length;
911 out:
912 for (i = 0; i < pinned_pages; i++)
913 page_cache_release(user_pages[i]);
914 drm_free_large(user_pages);
916 return ret;
920 * Writes data to the object referenced by handle.
922 * On error, the contents of the buffer that were to be modified are undefined.
925 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
926 struct drm_file *file)
928 struct drm_i915_gem_pwrite *args = data;
929 struct drm_i915_gem_object *obj;
930 int ret;
932 if (args->size == 0)
933 return 0;
935 if (!access_ok(VERIFY_READ,
936 (char __user *)(uintptr_t)args->data_ptr,
937 args->size))
938 return -EFAULT;
940 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
941 args->size);
942 if (ret)
943 return -EFAULT;
945 ret = i915_mutex_lock_interruptible(dev);
946 if (ret)
947 return ret;
949 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
950 if (obj == NULL) {
951 ret = -ENOENT;
952 goto unlock;
955 /* Bounds check destination. */
956 if (args->offset > obj->base.size ||
957 args->size > obj->base.size - args->offset) {
958 ret = -EINVAL;
959 goto out;
962 /* We can only do the GTT pwrite on untiled buffers, as otherwise
963 * it would end up going through the fenced access, and we'll get
964 * different detiling behavior between reading and writing.
965 * pread/pwrite currently are reading and writing from the CPU
966 * perspective, requiring manual detiling by the client.
968 if (obj->phys_obj)
969 ret = i915_gem_phys_pwrite(dev, obj, args, file);
970 else if (obj->gtt_space &&
971 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
972 ret = i915_gem_object_pin(obj, 0, true);
973 if (ret)
974 goto out;
976 ret = i915_gem_object_set_to_gtt_domain(obj, true);
977 if (ret)
978 goto out_unpin;
980 ret = i915_gem_object_put_fence(obj);
981 if (ret)
982 goto out_unpin;
984 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
985 if (ret == -EFAULT)
986 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
988 out_unpin:
989 i915_gem_object_unpin(obj);
990 } else {
991 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
992 if (ret)
993 goto out;
995 ret = -EFAULT;
996 if (!i915_gem_object_needs_bit17_swizzle(obj))
997 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
998 if (ret == -EFAULT)
999 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1002 out:
1003 drm_gem_object_unreference(&obj->base);
1004 unlock:
1005 mutex_unlock(&dev->struct_mutex);
1006 return ret;
1010 * Called when user space prepares to use an object with the CPU, either
1011 * through the mmap ioctl's mapping or a GTT mapping.
1014 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1015 struct drm_file *file)
1017 struct drm_i915_gem_set_domain *args = data;
1018 struct drm_i915_gem_object *obj;
1019 uint32_t read_domains = args->read_domains;
1020 uint32_t write_domain = args->write_domain;
1021 int ret;
1023 if (!(dev->driver->driver_features & DRIVER_GEM))
1024 return -ENODEV;
1026 /* Only handle setting domains to types used by the CPU. */
1027 if (write_domain & I915_GEM_GPU_DOMAINS)
1028 return -EINVAL;
1030 if (read_domains & I915_GEM_GPU_DOMAINS)
1031 return -EINVAL;
1033 /* Having something in the write domain implies it's in the read
1034 * domain, and only that read domain. Enforce that in the request.
1036 if (write_domain != 0 && read_domains != write_domain)
1037 return -EINVAL;
1039 ret = i915_mutex_lock_interruptible(dev);
1040 if (ret)
1041 return ret;
1043 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1044 if (obj == NULL) {
1045 ret = -ENOENT;
1046 goto unlock;
1049 if (read_domains & I915_GEM_DOMAIN_GTT) {
1050 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1052 /* Silently promote "you're not bound, there was nothing to do"
1053 * to success, since the client was just asking us to
1054 * make sure everything was done.
1056 if (ret == -EINVAL)
1057 ret = 0;
1058 } else {
1059 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1062 drm_gem_object_unreference(&obj->base);
1063 unlock:
1064 mutex_unlock(&dev->struct_mutex);
1065 return ret;
1069 * Called when user space has done writes to this buffer
1072 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1073 struct drm_file *file)
1075 struct drm_i915_gem_sw_finish *args = data;
1076 struct drm_i915_gem_object *obj;
1077 int ret = 0;
1079 if (!(dev->driver->driver_features & DRIVER_GEM))
1080 return -ENODEV;
1082 ret = i915_mutex_lock_interruptible(dev);
1083 if (ret)
1084 return ret;
1086 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1087 if (obj == NULL) {
1088 ret = -ENOENT;
1089 goto unlock;
1092 /* Pinned buffers may be scanout, so flush the cache */
1093 if (obj->pin_count)
1094 i915_gem_object_flush_cpu_write_domain(obj);
1096 drm_gem_object_unreference(&obj->base);
1097 unlock:
1098 mutex_unlock(&dev->struct_mutex);
1099 return ret;
1103 * Maps the contents of an object, returning the address it is mapped
1104 * into.
1106 * While the mapping holds a reference on the contents of the object, it doesn't
1107 * imply a ref on the object itself.
1110 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1111 struct drm_file *file)
1113 struct drm_i915_private *dev_priv = dev->dev_private;
1114 struct drm_i915_gem_mmap *args = data;
1115 struct drm_gem_object *obj;
1116 unsigned long addr;
1118 if (!(dev->driver->driver_features & DRIVER_GEM))
1119 return -ENODEV;
1121 obj = drm_gem_object_lookup(dev, file, args->handle);
1122 if (obj == NULL)
1123 return -ENOENT;
1125 if (obj->size > dev_priv->mm.gtt_mappable_end) {
1126 drm_gem_object_unreference_unlocked(obj);
1127 return -E2BIG;
1130 down_write(&current->mm->mmap_sem);
1131 addr = do_mmap(obj->filp, 0, args->size,
1132 PROT_READ | PROT_WRITE, MAP_SHARED,
1133 args->offset);
1134 up_write(&current->mm->mmap_sem);
1135 drm_gem_object_unreference_unlocked(obj);
1136 if (IS_ERR((void *)addr))
1137 return addr;
1139 args->addr_ptr = (uint64_t) addr;
1141 return 0;
1145 * i915_gem_fault - fault a page into the GTT
1146 * vma: VMA in question
1147 * vmf: fault info
1149 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1150 * from userspace. The fault handler takes care of binding the object to
1151 * the GTT (if needed), allocating and programming a fence register (again,
1152 * only if needed based on whether the old reg is still valid or the object
1153 * is tiled) and inserting a new PTE into the faulting process.
1155 * Note that the faulting process may involve evicting existing objects
1156 * from the GTT and/or fence registers to make room. So performance may
1157 * suffer if the GTT working set is large or there are few fence registers
1158 * left.
1160 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1162 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1163 struct drm_device *dev = obj->base.dev;
1164 drm_i915_private_t *dev_priv = dev->dev_private;
1165 pgoff_t page_offset;
1166 unsigned long pfn;
1167 int ret = 0;
1168 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1170 /* We don't use vmf->pgoff since that has the fake offset */
1171 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1172 PAGE_SHIFT;
1174 /* Now bind it into the GTT if needed */
1175 mutex_lock(&dev->struct_mutex);
1177 if (!obj->map_and_fenceable) {
1178 ret = i915_gem_object_unbind(obj);
1179 if (ret)
1180 goto unlock;
1182 if (!obj->gtt_space) {
1183 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1184 if (ret)
1185 goto unlock;
1188 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1189 if (ret)
1190 goto unlock;
1192 if (obj->tiling_mode == I915_TILING_NONE)
1193 ret = i915_gem_object_put_fence(obj);
1194 else
1195 ret = i915_gem_object_get_fence(obj, NULL, true);
1196 if (ret)
1197 goto unlock;
1199 if (i915_gem_object_is_inactive(obj))
1200 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1202 obj->fault_mappable = true;
1204 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
1205 page_offset;
1207 /* Finally, remap it using the new GTT offset */
1208 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1209 unlock:
1210 mutex_unlock(&dev->struct_mutex);
1212 switch (ret) {
1213 case -EAGAIN:
1214 set_need_resched();
1215 case 0:
1216 case -ERESTARTSYS:
1217 return VM_FAULT_NOPAGE;
1218 case -ENOMEM:
1219 return VM_FAULT_OOM;
1220 default:
1221 return VM_FAULT_SIGBUS;
1226 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1227 * @obj: obj in question
1229 * GEM memory mapping works by handing back to userspace a fake mmap offset
1230 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1231 * up the object based on the offset and sets up the various memory mapping
1232 * structures.
1234 * This routine allocates and attaches a fake offset for @obj.
1236 static int
1237 i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj)
1239 struct drm_device *dev = obj->base.dev;
1240 struct drm_gem_mm *mm = dev->mm_private;
1241 struct drm_map_list *list;
1242 struct drm_local_map *map;
1243 int ret = 0;
1245 /* Set the object up for mmap'ing */
1246 list = &obj->base.map_list;
1247 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1248 if (!list->map)
1249 return -ENOMEM;
1251 map = list->map;
1252 map->type = _DRM_GEM;
1253 map->size = obj->base.size;
1254 map->handle = obj;
1256 /* Get a DRM GEM mmap offset allocated... */
1257 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1258 obj->base.size / PAGE_SIZE,
1259 0, 0);
1260 if (!list->file_offset_node) {
1261 DRM_ERROR("failed to allocate offset for bo %d\n",
1262 obj->base.name);
1263 ret = -ENOSPC;
1264 goto out_free_list;
1267 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1268 obj->base.size / PAGE_SIZE,
1270 if (!list->file_offset_node) {
1271 ret = -ENOMEM;
1272 goto out_free_list;
1275 list->hash.key = list->file_offset_node->start;
1276 ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
1277 if (ret) {
1278 DRM_ERROR("failed to add to map hash\n");
1279 goto out_free_mm;
1282 return 0;
1284 out_free_mm:
1285 drm_mm_put_block(list->file_offset_node);
1286 out_free_list:
1287 kfree(list->map);
1288 list->map = NULL;
1290 return ret;
1294 * i915_gem_release_mmap - remove physical page mappings
1295 * @obj: obj in question
1297 * Preserve the reservation of the mmapping with the DRM core code, but
1298 * relinquish ownership of the pages back to the system.
1300 * It is vital that we remove the page mapping if we have mapped a tiled
1301 * object through the GTT and then lose the fence register due to
1302 * resource pressure. Similarly if the object has been moved out of the
1303 * aperture, than pages mapped into userspace must be revoked. Removing the
1304 * mapping will then trigger a page fault on the next user access, allowing
1305 * fixup by i915_gem_fault().
1307 void
1308 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1310 if (!obj->fault_mappable)
1311 return;
1313 unmap_mapping_range(obj->base.dev->dev_mapping,
1314 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1315 obj->base.size, 1);
1317 obj->fault_mappable = false;
1320 static void
1321 i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
1323 struct drm_device *dev = obj->base.dev;
1324 struct drm_gem_mm *mm = dev->mm_private;
1325 struct drm_map_list *list = &obj->base.map_list;
1327 drm_ht_remove_item(&mm->offset_hash, &list->hash);
1328 drm_mm_put_block(list->file_offset_node);
1329 kfree(list->map);
1330 list->map = NULL;
1333 static uint32_t
1334 i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
1336 struct drm_device *dev = obj->base.dev;
1337 uint32_t size;
1339 if (INTEL_INFO(dev)->gen >= 4 ||
1340 obj->tiling_mode == I915_TILING_NONE)
1341 return obj->base.size;
1343 /* Previous chips need a power-of-two fence region when tiling */
1344 if (INTEL_INFO(dev)->gen == 3)
1345 size = 1024*1024;
1346 else
1347 size = 512*1024;
1349 while (size < obj->base.size)
1350 size <<= 1;
1352 return size;
1356 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1357 * @obj: object to check
1359 * Return the required GTT alignment for an object, taking into account
1360 * potential fence register mapping.
1362 static uint32_t
1363 i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
1365 struct drm_device *dev = obj->base.dev;
1368 * Minimum alignment is 4k (GTT page size), but might be greater
1369 * if a fence register is needed for the object.
1371 if (INTEL_INFO(dev)->gen >= 4 ||
1372 obj->tiling_mode == I915_TILING_NONE)
1373 return 4096;
1376 * Previous chips need to be aligned to the size of the smallest
1377 * fence register that can contain the object.
1379 return i915_gem_get_gtt_size(obj);
1383 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1384 * unfenced object
1385 * @obj: object to check
1387 * Return the required GTT alignment for an object, only taking into account
1388 * unfenced tiled surface requirements.
1390 static uint32_t
1391 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
1393 struct drm_device *dev = obj->base.dev;
1394 int tile_height;
1397 * Minimum alignment is 4k (GTT page size) for sane hw.
1399 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1400 obj->tiling_mode == I915_TILING_NONE)
1401 return 4096;
1404 * Older chips need unfenced tiled buffers to be aligned to the left
1405 * edge of an even tile row (where tile rows are counted as if the bo is
1406 * placed in a fenced gtt region).
1408 if (IS_GEN2(dev) ||
1409 (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
1410 tile_height = 32;
1411 else
1412 tile_height = 8;
1414 return tile_height * obj->stride * 2;
1418 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1419 * @dev: DRM device
1420 * @data: GTT mapping ioctl data
1421 * @file: GEM object info
1423 * Simply returns the fake offset to userspace so it can mmap it.
1424 * The mmap call will end up in drm_gem_mmap(), which will set things
1425 * up so we can get faults in the handler above.
1427 * The fault handler will take care of binding the object into the GTT
1428 * (since it may have been evicted to make room for something), allocating
1429 * a fence register, and mapping the appropriate aperture address into
1430 * userspace.
1433 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1434 struct drm_file *file)
1436 struct drm_i915_private *dev_priv = dev->dev_private;
1437 struct drm_i915_gem_mmap_gtt *args = data;
1438 struct drm_i915_gem_object *obj;
1439 int ret;
1441 if (!(dev->driver->driver_features & DRIVER_GEM))
1442 return -ENODEV;
1444 ret = i915_mutex_lock_interruptible(dev);
1445 if (ret)
1446 return ret;
1448 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1449 if (obj == NULL) {
1450 ret = -ENOENT;
1451 goto unlock;
1454 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1455 ret = -E2BIG;
1456 goto unlock;
1459 if (obj->madv != I915_MADV_WILLNEED) {
1460 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1461 ret = -EINVAL;
1462 goto out;
1465 if (!obj->base.map_list.map) {
1466 ret = i915_gem_create_mmap_offset(obj);
1467 if (ret)
1468 goto out;
1471 args->offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1473 out:
1474 drm_gem_object_unreference(&obj->base);
1475 unlock:
1476 mutex_unlock(&dev->struct_mutex);
1477 return ret;
1480 static int
1481 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1482 gfp_t gfpmask)
1484 int page_count, i;
1485 struct address_space *mapping;
1486 struct inode *inode;
1487 struct page *page;
1489 /* Get the list of pages out of our struct file. They'll be pinned
1490 * at this point until we release them.
1492 page_count = obj->base.size / PAGE_SIZE;
1493 BUG_ON(obj->pages != NULL);
1494 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1495 if (obj->pages == NULL)
1496 return -ENOMEM;
1498 inode = obj->base.filp->f_path.dentry->d_inode;
1499 mapping = inode->i_mapping;
1500 for (i = 0; i < page_count; i++) {
1501 page = read_cache_page_gfp(mapping, i,
1502 GFP_HIGHUSER |
1503 __GFP_COLD |
1504 __GFP_RECLAIMABLE |
1505 gfpmask);
1506 if (IS_ERR(page))
1507 goto err_pages;
1509 obj->pages[i] = page;
1512 if (obj->tiling_mode != I915_TILING_NONE)
1513 i915_gem_object_do_bit_17_swizzle(obj);
1515 return 0;
1517 err_pages:
1518 while (i--)
1519 page_cache_release(obj->pages[i]);
1521 drm_free_large(obj->pages);
1522 obj->pages = NULL;
1523 return PTR_ERR(page);
1526 static void
1527 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1529 int page_count = obj->base.size / PAGE_SIZE;
1530 int i;
1532 BUG_ON(obj->madv == __I915_MADV_PURGED);
1534 if (obj->tiling_mode != I915_TILING_NONE)
1535 i915_gem_object_save_bit_17_swizzle(obj);
1537 if (obj->madv == I915_MADV_DONTNEED)
1538 obj->dirty = 0;
1540 for (i = 0; i < page_count; i++) {
1541 if (obj->dirty)
1542 set_page_dirty(obj->pages[i]);
1544 if (obj->madv == I915_MADV_WILLNEED)
1545 mark_page_accessed(obj->pages[i]);
1547 page_cache_release(obj->pages[i]);
1549 obj->dirty = 0;
1551 drm_free_large(obj->pages);
1552 obj->pages = NULL;
1555 void
1556 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1557 struct intel_ring_buffer *ring,
1558 u32 seqno)
1560 struct drm_device *dev = obj->base.dev;
1561 struct drm_i915_private *dev_priv = dev->dev_private;
1563 BUG_ON(ring == NULL);
1564 obj->ring = ring;
1566 /* Add a reference if we're newly entering the active list. */
1567 if (!obj->active) {
1568 drm_gem_object_reference(&obj->base);
1569 obj->active = 1;
1572 /* Move from whatever list we were on to the tail of execution. */
1573 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1574 list_move_tail(&obj->ring_list, &ring->active_list);
1576 obj->last_rendering_seqno = seqno;
1577 if (obj->fenced_gpu_access) {
1578 struct drm_i915_fence_reg *reg;
1580 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
1582 obj->last_fenced_seqno = seqno;
1583 obj->last_fenced_ring = ring;
1585 reg = &dev_priv->fence_regs[obj->fence_reg];
1586 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
1590 static void
1591 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1593 list_del_init(&obj->ring_list);
1594 obj->last_rendering_seqno = 0;
1597 static void
1598 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1600 struct drm_device *dev = obj->base.dev;
1601 drm_i915_private_t *dev_priv = dev->dev_private;
1603 BUG_ON(!obj->active);
1604 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1606 i915_gem_object_move_off_active(obj);
1609 static void
1610 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1612 struct drm_device *dev = obj->base.dev;
1613 struct drm_i915_private *dev_priv = dev->dev_private;
1615 if (obj->pin_count != 0)
1616 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1617 else
1618 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1620 BUG_ON(!list_empty(&obj->gpu_write_list));
1621 BUG_ON(!obj->active);
1622 obj->ring = NULL;
1624 i915_gem_object_move_off_active(obj);
1625 obj->fenced_gpu_access = false;
1627 obj->active = 0;
1628 obj->pending_gpu_write = false;
1629 drm_gem_object_unreference(&obj->base);
1631 WARN_ON(i915_verify_lists(dev));
1634 /* Immediately discard the backing storage */
1635 static void
1636 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1638 struct inode *inode;
1640 /* Our goal here is to return as much of the memory as
1641 * is possible back to the system as we are called from OOM.
1642 * To do this we must instruct the shmfs to drop all of its
1643 * backing pages, *now*. Here we mirror the actions taken
1644 * when by shmem_delete_inode() to release the backing store.
1646 inode = obj->base.filp->f_path.dentry->d_inode;
1647 truncate_inode_pages(inode->i_mapping, 0);
1648 if (inode->i_op->truncate_range)
1649 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1651 obj->madv = __I915_MADV_PURGED;
1654 static inline int
1655 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1657 return obj->madv == I915_MADV_DONTNEED;
1660 static void
1661 i915_gem_process_flushing_list(struct drm_device *dev,
1662 uint32_t flush_domains,
1663 struct intel_ring_buffer *ring)
1665 struct drm_i915_gem_object *obj, *next;
1667 list_for_each_entry_safe(obj, next,
1668 &ring->gpu_write_list,
1669 gpu_write_list) {
1670 if (obj->base.write_domain & flush_domains) {
1671 uint32_t old_write_domain = obj->base.write_domain;
1673 obj->base.write_domain = 0;
1674 list_del_init(&obj->gpu_write_list);
1675 i915_gem_object_move_to_active(obj, ring,
1676 i915_gem_next_request_seqno(dev, ring));
1678 trace_i915_gem_object_change_domain(obj,
1679 obj->base.read_domains,
1680 old_write_domain);
1686 i915_add_request(struct drm_device *dev,
1687 struct drm_file *file,
1688 struct drm_i915_gem_request *request,
1689 struct intel_ring_buffer *ring)
1691 drm_i915_private_t *dev_priv = dev->dev_private;
1692 struct drm_i915_file_private *file_priv = NULL;
1693 uint32_t seqno;
1694 int was_empty;
1695 int ret;
1697 BUG_ON(request == NULL);
1699 if (file != NULL)
1700 file_priv = file->driver_priv;
1702 ret = ring->add_request(ring, &seqno);
1703 if (ret)
1704 return ret;
1706 ring->outstanding_lazy_request = false;
1708 request->seqno = seqno;
1709 request->ring = ring;
1710 request->emitted_jiffies = jiffies;
1711 was_empty = list_empty(&ring->request_list);
1712 list_add_tail(&request->list, &ring->request_list);
1714 if (file_priv) {
1715 spin_lock(&file_priv->mm.lock);
1716 request->file_priv = file_priv;
1717 list_add_tail(&request->client_list,
1718 &file_priv->mm.request_list);
1719 spin_unlock(&file_priv->mm.lock);
1722 if (!dev_priv->mm.suspended) {
1723 mod_timer(&dev_priv->hangcheck_timer,
1724 jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1725 if (was_empty)
1726 queue_delayed_work(dev_priv->wq,
1727 &dev_priv->mm.retire_work, HZ);
1729 return 0;
1732 static inline void
1733 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1735 struct drm_i915_file_private *file_priv = request->file_priv;
1737 if (!file_priv)
1738 return;
1740 spin_lock(&file_priv->mm.lock);
1741 list_del(&request->client_list);
1742 request->file_priv = NULL;
1743 spin_unlock(&file_priv->mm.lock);
1746 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1747 struct intel_ring_buffer *ring)
1749 while (!list_empty(&ring->request_list)) {
1750 struct drm_i915_gem_request *request;
1752 request = list_first_entry(&ring->request_list,
1753 struct drm_i915_gem_request,
1754 list);
1756 list_del(&request->list);
1757 i915_gem_request_remove_from_client(request);
1758 kfree(request);
1761 while (!list_empty(&ring->active_list)) {
1762 struct drm_i915_gem_object *obj;
1764 obj = list_first_entry(&ring->active_list,
1765 struct drm_i915_gem_object,
1766 ring_list);
1768 obj->base.write_domain = 0;
1769 list_del_init(&obj->gpu_write_list);
1770 i915_gem_object_move_to_inactive(obj);
1774 static void i915_gem_reset_fences(struct drm_device *dev)
1776 struct drm_i915_private *dev_priv = dev->dev_private;
1777 int i;
1779 for (i = 0; i < 16; i++) {
1780 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1781 struct drm_i915_gem_object *obj = reg->obj;
1783 if (!obj)
1784 continue;
1786 if (obj->tiling_mode)
1787 i915_gem_release_mmap(obj);
1789 reg->obj->fence_reg = I915_FENCE_REG_NONE;
1790 reg->obj->fenced_gpu_access = false;
1791 reg->obj->last_fenced_seqno = 0;
1792 reg->obj->last_fenced_ring = NULL;
1793 i915_gem_clear_fence_reg(dev, reg);
1797 void i915_gem_reset(struct drm_device *dev)
1799 struct drm_i915_private *dev_priv = dev->dev_private;
1800 struct drm_i915_gem_object *obj;
1801 int i;
1803 for (i = 0; i < I915_NUM_RINGS; i++)
1804 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]);
1806 /* Remove anything from the flushing lists. The GPU cache is likely
1807 * to be lost on reset along with the data, so simply move the
1808 * lost bo to the inactive list.
1810 while (!list_empty(&dev_priv->mm.flushing_list)) {
1811 obj= list_first_entry(&dev_priv->mm.flushing_list,
1812 struct drm_i915_gem_object,
1813 mm_list);
1815 obj->base.write_domain = 0;
1816 list_del_init(&obj->gpu_write_list);
1817 i915_gem_object_move_to_inactive(obj);
1820 /* Move everything out of the GPU domains to ensure we do any
1821 * necessary invalidation upon reuse.
1823 list_for_each_entry(obj,
1824 &dev_priv->mm.inactive_list,
1825 mm_list)
1827 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1830 /* The fence registers are invalidated so clear them out */
1831 i915_gem_reset_fences(dev);
1835 * This function clears the request list as sequence numbers are passed.
1837 static void
1838 i915_gem_retire_requests_ring(struct drm_device *dev,
1839 struct intel_ring_buffer *ring)
1841 drm_i915_private_t *dev_priv = dev->dev_private;
1842 uint32_t seqno;
1843 int i;
1845 if (!ring->status_page.page_addr ||
1846 list_empty(&ring->request_list))
1847 return;
1849 WARN_ON(i915_verify_lists(dev));
1851 seqno = ring->get_seqno(ring);
1853 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1854 if (seqno >= ring->sync_seqno[i])
1855 ring->sync_seqno[i] = 0;
1857 while (!list_empty(&ring->request_list)) {
1858 struct drm_i915_gem_request *request;
1860 request = list_first_entry(&ring->request_list,
1861 struct drm_i915_gem_request,
1862 list);
1864 if (!i915_seqno_passed(seqno, request->seqno))
1865 break;
1867 trace_i915_gem_request_retire(dev, request->seqno);
1869 list_del(&request->list);
1870 i915_gem_request_remove_from_client(request);
1871 kfree(request);
1874 /* Move any buffers on the active list that are no longer referenced
1875 * by the ringbuffer to the flushing/inactive lists as appropriate.
1877 while (!list_empty(&ring->active_list)) {
1878 struct drm_i915_gem_object *obj;
1880 obj= list_first_entry(&ring->active_list,
1881 struct drm_i915_gem_object,
1882 ring_list);
1884 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1885 break;
1887 if (obj->base.write_domain != 0)
1888 i915_gem_object_move_to_flushing(obj);
1889 else
1890 i915_gem_object_move_to_inactive(obj);
1893 if (unlikely (dev_priv->trace_irq_seqno &&
1894 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1895 ring->irq_put(ring);
1896 dev_priv->trace_irq_seqno = 0;
1899 WARN_ON(i915_verify_lists(dev));
1902 void
1903 i915_gem_retire_requests(struct drm_device *dev)
1905 drm_i915_private_t *dev_priv = dev->dev_private;
1906 int i;
1908 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1909 struct drm_i915_gem_object *obj, *next;
1911 /* We must be careful that during unbind() we do not
1912 * accidentally infinitely recurse into retire requests.
1913 * Currently:
1914 * retire -> free -> unbind -> wait -> retire_ring
1916 list_for_each_entry_safe(obj, next,
1917 &dev_priv->mm.deferred_free_list,
1918 mm_list)
1919 i915_gem_free_object_tail(obj);
1922 for (i = 0; i < I915_NUM_RINGS; i++)
1923 i915_gem_retire_requests_ring(dev, &dev_priv->ring[i]);
1926 static void
1927 i915_gem_retire_work_handler(struct work_struct *work)
1929 drm_i915_private_t *dev_priv;
1930 struct drm_device *dev;
1931 bool idle;
1932 int i;
1934 dev_priv = container_of(work, drm_i915_private_t,
1935 mm.retire_work.work);
1936 dev = dev_priv->dev;
1938 /* Come back later if the device is busy... */
1939 if (!mutex_trylock(&dev->struct_mutex)) {
1940 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1941 return;
1944 i915_gem_retire_requests(dev);
1946 /* Send a periodic flush down the ring so we don't hold onto GEM
1947 * objects indefinitely.
1949 idle = true;
1950 for (i = 0; i < I915_NUM_RINGS; i++) {
1951 struct intel_ring_buffer *ring = &dev_priv->ring[i];
1953 if (!list_empty(&ring->gpu_write_list)) {
1954 struct drm_i915_gem_request *request;
1955 int ret;
1957 ret = i915_gem_flush_ring(dev, ring, 0,
1958 I915_GEM_GPU_DOMAINS);
1959 request = kzalloc(sizeof(*request), GFP_KERNEL);
1960 if (ret || request == NULL ||
1961 i915_add_request(dev, NULL, request, ring))
1962 kfree(request);
1965 idle &= list_empty(&ring->request_list);
1968 if (!dev_priv->mm.suspended && !idle)
1969 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1971 mutex_unlock(&dev->struct_mutex);
1975 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1976 bool interruptible, struct intel_ring_buffer *ring)
1978 drm_i915_private_t *dev_priv = dev->dev_private;
1979 u32 ier;
1980 int ret = 0;
1982 BUG_ON(seqno == 0);
1984 if (atomic_read(&dev_priv->mm.wedged))
1985 return -EAGAIN;
1987 if (seqno == ring->outstanding_lazy_request) {
1988 struct drm_i915_gem_request *request;
1990 request = kzalloc(sizeof(*request), GFP_KERNEL);
1991 if (request == NULL)
1992 return -ENOMEM;
1994 ret = i915_add_request(dev, NULL, request, ring);
1995 if (ret) {
1996 kfree(request);
1997 return ret;
2000 seqno = request->seqno;
2003 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2004 if (HAS_PCH_SPLIT(dev))
2005 ier = I915_READ(DEIER) | I915_READ(GTIER);
2006 else
2007 ier = I915_READ(IER);
2008 if (!ier) {
2009 DRM_ERROR("something (likely vbetool) disabled "
2010 "interrupts, re-enabling\n");
2011 i915_driver_irq_preinstall(dev);
2012 i915_driver_irq_postinstall(dev);
2015 trace_i915_gem_request_wait_begin(dev, seqno);
2017 ring->waiting_seqno = seqno;
2018 if (ring->irq_get(ring)) {
2019 if (interruptible)
2020 ret = wait_event_interruptible(ring->irq_queue,
2021 i915_seqno_passed(ring->get_seqno(ring), seqno)
2022 || atomic_read(&dev_priv->mm.wedged));
2023 else
2024 wait_event(ring->irq_queue,
2025 i915_seqno_passed(ring->get_seqno(ring), seqno)
2026 || atomic_read(&dev_priv->mm.wedged));
2028 ring->irq_put(ring);
2029 } else if (wait_for(i915_seqno_passed(ring->get_seqno(ring),
2030 seqno) ||
2031 atomic_read(&dev_priv->mm.wedged), 3000))
2032 ret = -EBUSY;
2033 ring->waiting_seqno = 0;
2035 trace_i915_gem_request_wait_end(dev, seqno);
2037 if (atomic_read(&dev_priv->mm.wedged))
2038 ret = -EAGAIN;
2040 if (ret && ret != -ERESTARTSYS)
2041 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2042 __func__, ret, seqno, ring->get_seqno(ring),
2043 dev_priv->next_seqno);
2045 /* Directly dispatch request retiring. While we have the work queue
2046 * to handle this, the waiter on a request often wants an associated
2047 * buffer to have made it to the inactive list, and we would need
2048 * a separate wait queue to handle that.
2050 if (ret == 0)
2051 i915_gem_retire_requests_ring(dev, ring);
2053 return ret;
2057 * Waits for a sequence number to be signaled, and cleans up the
2058 * request and object lists appropriately for that event.
2060 static int
2061 i915_wait_request(struct drm_device *dev, uint32_t seqno,
2062 struct intel_ring_buffer *ring)
2064 return i915_do_wait_request(dev, seqno, 1, ring);
2068 * Ensures that all rendering to the object has completed and the object is
2069 * safe to unbind from the GTT or access from the CPU.
2072 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
2073 bool interruptible)
2075 struct drm_device *dev = obj->base.dev;
2076 int ret;
2078 /* This function only exists to support waiting for existing rendering,
2079 * not for emitting required flushes.
2081 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
2083 /* If there is rendering queued on the buffer being evicted, wait for
2084 * it.
2086 if (obj->active) {
2087 ret = i915_do_wait_request(dev,
2088 obj->last_rendering_seqno,
2089 interruptible,
2090 obj->ring);
2091 if (ret)
2092 return ret;
2095 return 0;
2099 * Unbinds an object from the GTT aperture.
2102 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2104 int ret = 0;
2106 if (obj->gtt_space == NULL)
2107 return 0;
2109 if (obj->pin_count != 0) {
2110 DRM_ERROR("Attempting to unbind pinned buffer\n");
2111 return -EINVAL;
2114 /* blow away mappings if mapped through GTT */
2115 i915_gem_release_mmap(obj);
2117 /* Move the object to the CPU domain to ensure that
2118 * any possible CPU writes while it's not in the GTT
2119 * are flushed when we go to remap it. This will
2120 * also ensure that all pending GPU writes are finished
2121 * before we unbind.
2123 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2124 if (ret == -ERESTARTSYS)
2125 return ret;
2126 /* Continue on if we fail due to EIO, the GPU is hung so we
2127 * should be safe and we need to cleanup or else we might
2128 * cause memory corruption through use-after-free.
2130 if (ret) {
2131 i915_gem_clflush_object(obj);
2132 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2135 /* release the fence reg _after_ flushing */
2136 ret = i915_gem_object_put_fence(obj);
2137 if (ret == -ERESTARTSYS)
2138 return ret;
2140 i915_gem_gtt_unbind_object(obj);
2141 i915_gem_object_put_pages_gtt(obj);
2143 list_del_init(&obj->gtt_list);
2144 list_del_init(&obj->mm_list);
2145 /* Avoid an unnecessary call to unbind on rebind. */
2146 obj->map_and_fenceable = true;
2148 drm_mm_put_block(obj->gtt_space);
2149 obj->gtt_space = NULL;
2150 obj->gtt_offset = 0;
2152 if (i915_gem_object_is_purgeable(obj))
2153 i915_gem_object_truncate(obj);
2155 trace_i915_gem_object_unbind(obj);
2157 return ret;
2161 i915_gem_flush_ring(struct drm_device *dev,
2162 struct intel_ring_buffer *ring,
2163 uint32_t invalidate_domains,
2164 uint32_t flush_domains)
2166 int ret;
2168 ret = ring->flush(ring, invalidate_domains, flush_domains);
2169 if (ret)
2170 return ret;
2172 i915_gem_process_flushing_list(dev, flush_domains, ring);
2173 return 0;
2176 static int i915_ring_idle(struct drm_device *dev,
2177 struct intel_ring_buffer *ring)
2179 int ret;
2181 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2182 return 0;
2184 if (!list_empty(&ring->gpu_write_list)) {
2185 ret = i915_gem_flush_ring(dev, ring,
2186 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2187 if (ret)
2188 return ret;
2191 return i915_wait_request(dev,
2192 i915_gem_next_request_seqno(dev, ring),
2193 ring);
2197 i915_gpu_idle(struct drm_device *dev)
2199 drm_i915_private_t *dev_priv = dev->dev_private;
2200 bool lists_empty;
2201 int ret, i;
2203 lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2204 list_empty(&dev_priv->mm.active_list));
2205 if (lists_empty)
2206 return 0;
2208 /* Flush everything onto the inactive list. */
2209 for (i = 0; i < I915_NUM_RINGS; i++) {
2210 ret = i915_ring_idle(dev, &dev_priv->ring[i]);
2211 if (ret)
2212 return ret;
2215 return 0;
2218 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2219 struct intel_ring_buffer *pipelined)
2221 struct drm_device *dev = obj->base.dev;
2222 drm_i915_private_t *dev_priv = dev->dev_private;
2223 u32 size = obj->gtt_space->size;
2224 int regnum = obj->fence_reg;
2225 uint64_t val;
2227 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2228 0xfffff000) << 32;
2229 val |= obj->gtt_offset & 0xfffff000;
2230 val |= (uint64_t)((obj->stride / 128) - 1) <<
2231 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2233 if (obj->tiling_mode == I915_TILING_Y)
2234 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2235 val |= I965_FENCE_REG_VALID;
2237 if (pipelined) {
2238 int ret = intel_ring_begin(pipelined, 6);
2239 if (ret)
2240 return ret;
2242 intel_ring_emit(pipelined, MI_NOOP);
2243 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2244 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2245 intel_ring_emit(pipelined, (u32)val);
2246 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2247 intel_ring_emit(pipelined, (u32)(val >> 32));
2248 intel_ring_advance(pipelined);
2249 } else
2250 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2252 return 0;
2255 static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2256 struct intel_ring_buffer *pipelined)
2258 struct drm_device *dev = obj->base.dev;
2259 drm_i915_private_t *dev_priv = dev->dev_private;
2260 u32 size = obj->gtt_space->size;
2261 int regnum = obj->fence_reg;
2262 uint64_t val;
2264 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2265 0xfffff000) << 32;
2266 val |= obj->gtt_offset & 0xfffff000;
2267 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2268 if (obj->tiling_mode == I915_TILING_Y)
2269 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2270 val |= I965_FENCE_REG_VALID;
2272 if (pipelined) {
2273 int ret = intel_ring_begin(pipelined, 6);
2274 if (ret)
2275 return ret;
2277 intel_ring_emit(pipelined, MI_NOOP);
2278 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2279 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2280 intel_ring_emit(pipelined, (u32)val);
2281 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2282 intel_ring_emit(pipelined, (u32)(val >> 32));
2283 intel_ring_advance(pipelined);
2284 } else
2285 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2287 return 0;
2290 static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2291 struct intel_ring_buffer *pipelined)
2293 struct drm_device *dev = obj->base.dev;
2294 drm_i915_private_t *dev_priv = dev->dev_private;
2295 u32 size = obj->gtt_space->size;
2296 u32 fence_reg, val, pitch_val;
2297 int tile_width;
2299 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2300 (size & -size) != size ||
2301 (obj->gtt_offset & (size - 1)),
2302 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2303 obj->gtt_offset, obj->map_and_fenceable, size))
2304 return -EINVAL;
2306 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2307 tile_width = 128;
2308 else
2309 tile_width = 512;
2311 /* Note: pitch better be a power of two tile widths */
2312 pitch_val = obj->stride / tile_width;
2313 pitch_val = ffs(pitch_val) - 1;
2315 val = obj->gtt_offset;
2316 if (obj->tiling_mode == I915_TILING_Y)
2317 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2318 val |= I915_FENCE_SIZE_BITS(size);
2319 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2320 val |= I830_FENCE_REG_VALID;
2322 fence_reg = obj->fence_reg;
2323 if (fence_reg < 8)
2324 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2325 else
2326 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2328 if (pipelined) {
2329 int ret = intel_ring_begin(pipelined, 4);
2330 if (ret)
2331 return ret;
2333 intel_ring_emit(pipelined, MI_NOOP);
2334 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2335 intel_ring_emit(pipelined, fence_reg);
2336 intel_ring_emit(pipelined, val);
2337 intel_ring_advance(pipelined);
2338 } else
2339 I915_WRITE(fence_reg, val);
2341 return 0;
2344 static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2345 struct intel_ring_buffer *pipelined)
2347 struct drm_device *dev = obj->base.dev;
2348 drm_i915_private_t *dev_priv = dev->dev_private;
2349 u32 size = obj->gtt_space->size;
2350 int regnum = obj->fence_reg;
2351 uint32_t val;
2352 uint32_t pitch_val;
2354 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2355 (size & -size) != size ||
2356 (obj->gtt_offset & (size - 1)),
2357 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2358 obj->gtt_offset, size))
2359 return -EINVAL;
2361 pitch_val = obj->stride / 128;
2362 pitch_val = ffs(pitch_val) - 1;
2364 val = obj->gtt_offset;
2365 if (obj->tiling_mode == I915_TILING_Y)
2366 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2367 val |= I830_FENCE_SIZE_BITS(size);
2368 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2369 val |= I830_FENCE_REG_VALID;
2371 if (pipelined) {
2372 int ret = intel_ring_begin(pipelined, 4);
2373 if (ret)
2374 return ret;
2376 intel_ring_emit(pipelined, MI_NOOP);
2377 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2378 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2379 intel_ring_emit(pipelined, val);
2380 intel_ring_advance(pipelined);
2381 } else
2382 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2384 return 0;
2387 static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno)
2389 return i915_seqno_passed(ring->get_seqno(ring), seqno);
2392 static int
2393 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
2394 struct intel_ring_buffer *pipelined,
2395 bool interruptible)
2397 int ret;
2399 if (obj->fenced_gpu_access) {
2400 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2401 ret = i915_gem_flush_ring(obj->base.dev,
2402 obj->last_fenced_ring,
2403 0, obj->base.write_domain);
2404 if (ret)
2405 return ret;
2408 obj->fenced_gpu_access = false;
2411 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
2412 if (!ring_passed_seqno(obj->last_fenced_ring,
2413 obj->last_fenced_seqno)) {
2414 ret = i915_do_wait_request(obj->base.dev,
2415 obj->last_fenced_seqno,
2416 interruptible,
2417 obj->last_fenced_ring);
2418 if (ret)
2419 return ret;
2422 obj->last_fenced_seqno = 0;
2423 obj->last_fenced_ring = NULL;
2426 /* Ensure that all CPU reads are completed before installing a fence
2427 * and all writes before removing the fence.
2429 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2430 mb();
2432 return 0;
2436 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2438 int ret;
2440 if (obj->tiling_mode)
2441 i915_gem_release_mmap(obj);
2443 ret = i915_gem_object_flush_fence(obj, NULL, true);
2444 if (ret)
2445 return ret;
2447 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2448 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2449 i915_gem_clear_fence_reg(obj->base.dev,
2450 &dev_priv->fence_regs[obj->fence_reg]);
2452 obj->fence_reg = I915_FENCE_REG_NONE;
2455 return 0;
2458 static struct drm_i915_fence_reg *
2459 i915_find_fence_reg(struct drm_device *dev,
2460 struct intel_ring_buffer *pipelined)
2462 struct drm_i915_private *dev_priv = dev->dev_private;
2463 struct drm_i915_fence_reg *reg, *first, *avail;
2464 int i;
2466 /* First try to find a free reg */
2467 avail = NULL;
2468 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2469 reg = &dev_priv->fence_regs[i];
2470 if (!reg->obj)
2471 return reg;
2473 if (!reg->obj->pin_count)
2474 avail = reg;
2477 if (avail == NULL)
2478 return NULL;
2480 /* None available, try to steal one or wait for a user to finish */
2481 avail = first = NULL;
2482 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2483 if (reg->obj->pin_count)
2484 continue;
2486 if (first == NULL)
2487 first = reg;
2489 if (!pipelined ||
2490 !reg->obj->last_fenced_ring ||
2491 reg->obj->last_fenced_ring == pipelined) {
2492 avail = reg;
2493 break;
2497 if (avail == NULL)
2498 avail = first;
2500 return avail;
2504 * i915_gem_object_get_fence - set up a fence reg for an object
2505 * @obj: object to map through a fence reg
2506 * @pipelined: ring on which to queue the change, or NULL for CPU access
2507 * @interruptible: must we wait uninterruptibly for the register to retire?
2509 * When mapping objects through the GTT, userspace wants to be able to write
2510 * to them without having to worry about swizzling if the object is tiled.
2512 * This function walks the fence regs looking for a free one for @obj,
2513 * stealing one if it can't find any.
2515 * It then sets up the reg based on the object's properties: address, pitch
2516 * and tiling format.
2519 i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
2520 struct intel_ring_buffer *pipelined,
2521 bool interruptible)
2523 struct drm_device *dev = obj->base.dev;
2524 struct drm_i915_private *dev_priv = dev->dev_private;
2525 struct drm_i915_fence_reg *reg;
2526 int ret;
2528 /* XXX disable pipelining. There are bugs. Shocking. */
2529 pipelined = NULL;
2531 /* Just update our place in the LRU if our fence is getting reused. */
2532 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2533 reg = &dev_priv->fence_regs[obj->fence_reg];
2534 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2536 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2537 pipelined = NULL;
2539 if (!pipelined) {
2540 if (reg->setup_seqno) {
2541 if (!ring_passed_seqno(obj->last_fenced_ring,
2542 reg->setup_seqno)) {
2543 ret = i915_do_wait_request(obj->base.dev,
2544 reg->setup_seqno,
2545 interruptible,
2546 obj->last_fenced_ring);
2547 if (ret)
2548 return ret;
2551 reg->setup_seqno = 0;
2553 } else if (obj->last_fenced_ring &&
2554 obj->last_fenced_ring != pipelined) {
2555 ret = i915_gem_object_flush_fence(obj,
2556 pipelined,
2557 interruptible);
2558 if (ret)
2559 return ret;
2560 } else if (obj->tiling_changed) {
2561 if (obj->fenced_gpu_access) {
2562 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2563 ret = i915_gem_flush_ring(obj->base.dev, obj->ring,
2564 0, obj->base.write_domain);
2565 if (ret)
2566 return ret;
2569 obj->fenced_gpu_access = false;
2573 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2574 pipelined = NULL;
2575 BUG_ON(!pipelined && reg->setup_seqno);
2577 if (obj->tiling_changed) {
2578 if (pipelined) {
2579 reg->setup_seqno =
2580 i915_gem_next_request_seqno(dev, pipelined);
2581 obj->last_fenced_seqno = reg->setup_seqno;
2582 obj->last_fenced_ring = pipelined;
2584 goto update;
2587 return 0;
2590 reg = i915_find_fence_reg(dev, pipelined);
2591 if (reg == NULL)
2592 return -ENOSPC;
2594 ret = i915_gem_object_flush_fence(obj, pipelined, interruptible);
2595 if (ret)
2596 return ret;
2598 if (reg->obj) {
2599 struct drm_i915_gem_object *old = reg->obj;
2601 drm_gem_object_reference(&old->base);
2603 if (old->tiling_mode)
2604 i915_gem_release_mmap(old);
2606 ret = i915_gem_object_flush_fence(old,
2607 pipelined,
2608 interruptible);
2609 if (ret) {
2610 drm_gem_object_unreference(&old->base);
2611 return ret;
2614 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
2615 pipelined = NULL;
2617 old->fence_reg = I915_FENCE_REG_NONE;
2618 old->last_fenced_ring = pipelined;
2619 old->last_fenced_seqno =
2620 pipelined ? i915_gem_next_request_seqno(dev, pipelined) : 0;
2622 drm_gem_object_unreference(&old->base);
2623 } else if (obj->last_fenced_seqno == 0)
2624 pipelined = NULL;
2626 reg->obj = obj;
2627 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2628 obj->fence_reg = reg - dev_priv->fence_regs;
2629 obj->last_fenced_ring = pipelined;
2631 reg->setup_seqno =
2632 pipelined ? i915_gem_next_request_seqno(dev, pipelined) : 0;
2633 obj->last_fenced_seqno = reg->setup_seqno;
2635 update:
2636 obj->tiling_changed = false;
2637 switch (INTEL_INFO(dev)->gen) {
2638 case 6:
2639 ret = sandybridge_write_fence_reg(obj, pipelined);
2640 break;
2641 case 5:
2642 case 4:
2643 ret = i965_write_fence_reg(obj, pipelined);
2644 break;
2645 case 3:
2646 ret = i915_write_fence_reg(obj, pipelined);
2647 break;
2648 case 2:
2649 ret = i830_write_fence_reg(obj, pipelined);
2650 break;
2653 return ret;
2657 * i915_gem_clear_fence_reg - clear out fence register info
2658 * @obj: object to clear
2660 * Zeroes out the fence register itself and clears out the associated
2661 * data structures in dev_priv and obj.
2663 static void
2664 i915_gem_clear_fence_reg(struct drm_device *dev,
2665 struct drm_i915_fence_reg *reg)
2667 drm_i915_private_t *dev_priv = dev->dev_private;
2668 uint32_t fence_reg = reg - dev_priv->fence_regs;
2670 switch (INTEL_INFO(dev)->gen) {
2671 case 6:
2672 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
2673 break;
2674 case 5:
2675 case 4:
2676 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
2677 break;
2678 case 3:
2679 if (fence_reg >= 8)
2680 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2681 else
2682 case 2:
2683 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2685 I915_WRITE(fence_reg, 0);
2686 break;
2689 list_del_init(&reg->lru_list);
2690 reg->obj = NULL;
2691 reg->setup_seqno = 0;
2695 * Finds free space in the GTT aperture and binds the object there.
2697 static int
2698 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2699 unsigned alignment,
2700 bool map_and_fenceable)
2702 struct drm_device *dev = obj->base.dev;
2703 drm_i915_private_t *dev_priv = dev->dev_private;
2704 struct drm_mm_node *free_space;
2705 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2706 u32 size, fence_size, fence_alignment, unfenced_alignment;
2707 bool mappable, fenceable;
2708 int ret;
2710 if (obj->madv != I915_MADV_WILLNEED) {
2711 DRM_ERROR("Attempting to bind a purgeable object\n");
2712 return -EINVAL;
2715 fence_size = i915_gem_get_gtt_size(obj);
2716 fence_alignment = i915_gem_get_gtt_alignment(obj);
2717 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj);
2719 if (alignment == 0)
2720 alignment = map_and_fenceable ? fence_alignment :
2721 unfenced_alignment;
2722 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2723 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2724 return -EINVAL;
2727 size = map_and_fenceable ? fence_size : obj->base.size;
2729 /* If the object is bigger than the entire aperture, reject it early
2730 * before evicting everything in a vain attempt to find space.
2732 if (obj->base.size >
2733 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2734 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2735 return -E2BIG;
2738 search_free:
2739 if (map_and_fenceable)
2740 free_space =
2741 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2742 size, alignment, 0,
2743 dev_priv->mm.gtt_mappable_end,
2745 else
2746 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2747 size, alignment, 0);
2749 if (free_space != NULL) {
2750 if (map_and_fenceable)
2751 obj->gtt_space =
2752 drm_mm_get_block_range_generic(free_space,
2753 size, alignment, 0,
2754 dev_priv->mm.gtt_mappable_end,
2756 else
2757 obj->gtt_space =
2758 drm_mm_get_block(free_space, size, alignment);
2760 if (obj->gtt_space == NULL) {
2761 /* If the gtt is empty and we're still having trouble
2762 * fitting our object in, we're out of memory.
2764 ret = i915_gem_evict_something(dev, size, alignment,
2765 map_and_fenceable);
2766 if (ret)
2767 return ret;
2769 goto search_free;
2772 ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2773 if (ret) {
2774 drm_mm_put_block(obj->gtt_space);
2775 obj->gtt_space = NULL;
2777 if (ret == -ENOMEM) {
2778 /* first try to reclaim some memory by clearing the GTT */
2779 ret = i915_gem_evict_everything(dev, false);
2780 if (ret) {
2781 /* now try to shrink everyone else */
2782 if (gfpmask) {
2783 gfpmask = 0;
2784 goto search_free;
2787 return -ENOMEM;
2790 goto search_free;
2793 return ret;
2796 ret = i915_gem_gtt_bind_object(obj);
2797 if (ret) {
2798 i915_gem_object_put_pages_gtt(obj);
2799 drm_mm_put_block(obj->gtt_space);
2800 obj->gtt_space = NULL;
2802 if (i915_gem_evict_everything(dev, false))
2803 return ret;
2805 goto search_free;
2808 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2809 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2811 /* Assert that the object is not currently in any GPU domain. As it
2812 * wasn't in the GTT, there shouldn't be any way it could have been in
2813 * a GPU cache
2815 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2816 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2818 obj->gtt_offset = obj->gtt_space->start;
2820 fenceable =
2821 obj->gtt_space->size == fence_size &&
2822 (obj->gtt_space->start & (fence_alignment -1)) == 0;
2824 mappable =
2825 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2827 obj->map_and_fenceable = mappable && fenceable;
2829 trace_i915_gem_object_bind(obj, obj->gtt_offset, map_and_fenceable);
2830 return 0;
2833 void
2834 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2836 /* If we don't have a page list set up, then we're not pinned
2837 * to GPU, and we can ignore the cache flush because it'll happen
2838 * again at bind time.
2840 if (obj->pages == NULL)
2841 return;
2843 trace_i915_gem_object_clflush(obj);
2845 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2848 /** Flushes any GPU write domain for the object if it's dirty. */
2849 static int
2850 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
2852 struct drm_device *dev = obj->base.dev;
2854 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2855 return 0;
2857 /* Queue the GPU write cache flushing we need. */
2858 return i915_gem_flush_ring(dev, obj->ring, 0, obj->base.write_domain);
2861 /** Flushes the GTT write domain for the object if it's dirty. */
2862 static void
2863 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2865 uint32_t old_write_domain;
2867 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2868 return;
2870 /* No actual flushing is required for the GTT write domain. Writes
2871 * to it immediately go to main memory as far as we know, so there's
2872 * no chipset flush. It also doesn't land in render cache.
2874 * However, we do have to enforce the order so that all writes through
2875 * the GTT land before any writes to the device, such as updates to
2876 * the GATT itself.
2878 wmb();
2880 i915_gem_release_mmap(obj);
2882 old_write_domain = obj->base.write_domain;
2883 obj->base.write_domain = 0;
2885 trace_i915_gem_object_change_domain(obj,
2886 obj->base.read_domains,
2887 old_write_domain);
2890 /** Flushes the CPU write domain for the object if it's dirty. */
2891 static void
2892 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2894 uint32_t old_write_domain;
2896 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2897 return;
2899 i915_gem_clflush_object(obj);
2900 intel_gtt_chipset_flush();
2901 old_write_domain = obj->base.write_domain;
2902 obj->base.write_domain = 0;
2904 trace_i915_gem_object_change_domain(obj,
2905 obj->base.read_domains,
2906 old_write_domain);
2910 * Moves a single object to the GTT read, and possibly write domain.
2912 * This function returns when the move is complete, including waiting on
2913 * flushes to occur.
2916 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2918 uint32_t old_write_domain, old_read_domains;
2919 int ret;
2921 /* Not valid to be called on unbound objects. */
2922 if (obj->gtt_space == NULL)
2923 return -EINVAL;
2925 ret = i915_gem_object_flush_gpu_write_domain(obj);
2926 if (ret)
2927 return ret;
2929 if (obj->pending_gpu_write || write) {
2930 ret = i915_gem_object_wait_rendering(obj, true);
2931 if (ret)
2932 return ret;
2935 i915_gem_object_flush_cpu_write_domain(obj);
2937 old_write_domain = obj->base.write_domain;
2938 old_read_domains = obj->base.read_domains;
2940 /* It should now be out of any other write domains, and we can update
2941 * the domain values for our changes.
2943 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2944 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2945 if (write) {
2946 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2947 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2948 obj->dirty = 1;
2951 trace_i915_gem_object_change_domain(obj,
2952 old_read_domains,
2953 old_write_domain);
2955 return 0;
2959 * Prepare buffer for display plane. Use uninterruptible for possible flush
2960 * wait, as in modesetting process we're not supposed to be interrupted.
2963 i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
2964 struct intel_ring_buffer *pipelined)
2966 uint32_t old_read_domains;
2967 int ret;
2969 /* Not valid to be called on unbound objects. */
2970 if (obj->gtt_space == NULL)
2971 return -EINVAL;
2973 ret = i915_gem_object_flush_gpu_write_domain(obj);
2974 if (ret)
2975 return ret;
2978 /* Currently, we are always called from an non-interruptible context. */
2979 if (pipelined != obj->ring) {
2980 ret = i915_gem_object_wait_rendering(obj, false);
2981 if (ret)
2982 return ret;
2985 i915_gem_object_flush_cpu_write_domain(obj);
2987 old_read_domains = obj->base.read_domains;
2988 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2990 trace_i915_gem_object_change_domain(obj,
2991 old_read_domains,
2992 obj->base.write_domain);
2994 return 0;
2998 i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj,
2999 bool interruptible)
3001 int ret;
3003 if (!obj->active)
3004 return 0;
3006 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3007 ret = i915_gem_flush_ring(obj->base.dev, obj->ring,
3008 0, obj->base.write_domain);
3009 if (ret)
3010 return ret;
3013 return i915_gem_object_wait_rendering(obj, interruptible);
3017 * Moves a single object to the CPU read, and possibly write domain.
3019 * This function returns when the move is complete, including waiting on
3020 * flushes to occur.
3022 static int
3023 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3025 uint32_t old_write_domain, old_read_domains;
3026 int ret;
3028 ret = i915_gem_object_flush_gpu_write_domain(obj);
3029 if (ret)
3030 return ret;
3032 ret = i915_gem_object_wait_rendering(obj, true);
3033 if (ret)
3034 return ret;
3036 i915_gem_object_flush_gtt_write_domain(obj);
3038 /* If we have a partially-valid cache of the object in the CPU,
3039 * finish invalidating it and free the per-page flags.
3041 i915_gem_object_set_to_full_cpu_read_domain(obj);
3043 old_write_domain = obj->base.write_domain;
3044 old_read_domains = obj->base.read_domains;
3046 /* Flush the CPU cache if it's still invalid. */
3047 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3048 i915_gem_clflush_object(obj);
3050 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3053 /* It should now be out of any other write domains, and we can update
3054 * the domain values for our changes.
3056 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3058 /* If we're writing through the CPU, then the GPU read domains will
3059 * need to be invalidated at next use.
3061 if (write) {
3062 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3063 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3066 trace_i915_gem_object_change_domain(obj,
3067 old_read_domains,
3068 old_write_domain);
3070 return 0;
3074 * Moves the object from a partially CPU read to a full one.
3076 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3077 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3079 static void
3080 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
3082 if (!obj->page_cpu_valid)
3083 return;
3085 /* If we're partially in the CPU read domain, finish moving it in.
3087 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
3088 int i;
3090 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3091 if (obj->page_cpu_valid[i])
3092 continue;
3093 drm_clflush_pages(obj->pages + i, 1);
3097 /* Free the page_cpu_valid mappings which are now stale, whether
3098 * or not we've got I915_GEM_DOMAIN_CPU.
3100 kfree(obj->page_cpu_valid);
3101 obj->page_cpu_valid = NULL;
3105 * Set the CPU read domain on a range of the object.
3107 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3108 * not entirely valid. The page_cpu_valid member of the object flags which
3109 * pages have been flushed, and will be respected by
3110 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3111 * of the whole object.
3113 * This function returns when the move is complete, including waiting on
3114 * flushes to occur.
3116 static int
3117 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
3118 uint64_t offset, uint64_t size)
3120 uint32_t old_read_domains;
3121 int i, ret;
3123 if (offset == 0 && size == obj->base.size)
3124 return i915_gem_object_set_to_cpu_domain(obj, 0);
3126 ret = i915_gem_object_flush_gpu_write_domain(obj);
3127 if (ret)
3128 return ret;
3130 ret = i915_gem_object_wait_rendering(obj, true);
3131 if (ret)
3132 return ret;
3134 i915_gem_object_flush_gtt_write_domain(obj);
3136 /* If we're already fully in the CPU read domain, we're done. */
3137 if (obj->page_cpu_valid == NULL &&
3138 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
3139 return 0;
3141 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3142 * newly adding I915_GEM_DOMAIN_CPU
3144 if (obj->page_cpu_valid == NULL) {
3145 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3146 GFP_KERNEL);
3147 if (obj->page_cpu_valid == NULL)
3148 return -ENOMEM;
3149 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3150 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
3152 /* Flush the cache on any pages that are still invalid from the CPU's
3153 * perspective.
3155 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3156 i++) {
3157 if (obj->page_cpu_valid[i])
3158 continue;
3160 drm_clflush_pages(obj->pages + i, 1);
3162 obj->page_cpu_valid[i] = 1;
3165 /* It should now be out of any other write domains, and we can update
3166 * the domain values for our changes.
3168 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3170 old_read_domains = obj->base.read_domains;
3171 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3173 trace_i915_gem_object_change_domain(obj,
3174 old_read_domains,
3175 obj->base.write_domain);
3177 return 0;
3180 /* Throttle our rendering by waiting until the ring has completed our requests
3181 * emitted over 20 msec ago.
3183 * Note that if we were to use the current jiffies each time around the loop,
3184 * we wouldn't escape the function with any frames outstanding if the time to
3185 * render a frame was over 20ms.
3187 * This should get us reasonable parallelism between CPU and GPU but also
3188 * relatively low latency when blocking on a particular request to finish.
3190 static int
3191 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3193 struct drm_i915_private *dev_priv = dev->dev_private;
3194 struct drm_i915_file_private *file_priv = file->driver_priv;
3195 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3196 struct drm_i915_gem_request *request;
3197 struct intel_ring_buffer *ring = NULL;
3198 u32 seqno = 0;
3199 int ret;
3201 if (atomic_read(&dev_priv->mm.wedged))
3202 return -EIO;
3204 spin_lock(&file_priv->mm.lock);
3205 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3206 if (time_after_eq(request->emitted_jiffies, recent_enough))
3207 break;
3209 ring = request->ring;
3210 seqno = request->seqno;
3212 spin_unlock(&file_priv->mm.lock);
3214 if (seqno == 0)
3215 return 0;
3217 ret = 0;
3218 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3219 /* And wait for the seqno passing without holding any locks and
3220 * causing extra latency for others. This is safe as the irq
3221 * generation is designed to be run atomically and so is
3222 * lockless.
3224 if (ring->irq_get(ring)) {
3225 ret = wait_event_interruptible(ring->irq_queue,
3226 i915_seqno_passed(ring->get_seqno(ring), seqno)
3227 || atomic_read(&dev_priv->mm.wedged));
3228 ring->irq_put(ring);
3230 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3231 ret = -EIO;
3235 if (ret == 0)
3236 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3238 return ret;
3242 i915_gem_object_pin(struct drm_i915_gem_object *obj,
3243 uint32_t alignment,
3244 bool map_and_fenceable)
3246 struct drm_device *dev = obj->base.dev;
3247 struct drm_i915_private *dev_priv = dev->dev_private;
3248 int ret;
3250 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3251 WARN_ON(i915_verify_lists(dev));
3253 if (obj->gtt_space != NULL) {
3254 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3255 (map_and_fenceable && !obj->map_and_fenceable)) {
3256 WARN(obj->pin_count,
3257 "bo is already pinned with incorrect alignment:"
3258 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3259 " obj->map_and_fenceable=%d\n",
3260 obj->gtt_offset, alignment,
3261 map_and_fenceable,
3262 obj->map_and_fenceable);
3263 ret = i915_gem_object_unbind(obj);
3264 if (ret)
3265 return ret;
3269 if (obj->gtt_space == NULL) {
3270 ret = i915_gem_object_bind_to_gtt(obj, alignment,
3271 map_and_fenceable);
3272 if (ret)
3273 return ret;
3276 if (obj->pin_count++ == 0) {
3277 if (!obj->active)
3278 list_move_tail(&obj->mm_list,
3279 &dev_priv->mm.pinned_list);
3281 obj->pin_mappable |= map_and_fenceable;
3283 WARN_ON(i915_verify_lists(dev));
3284 return 0;
3287 void
3288 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3290 struct drm_device *dev = obj->base.dev;
3291 drm_i915_private_t *dev_priv = dev->dev_private;
3293 WARN_ON(i915_verify_lists(dev));
3294 BUG_ON(obj->pin_count == 0);
3295 BUG_ON(obj->gtt_space == NULL);
3297 if (--obj->pin_count == 0) {
3298 if (!obj->active)
3299 list_move_tail(&obj->mm_list,
3300 &dev_priv->mm.inactive_list);
3301 obj->pin_mappable = false;
3303 WARN_ON(i915_verify_lists(dev));
3307 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3308 struct drm_file *file)
3310 struct drm_i915_gem_pin *args = data;
3311 struct drm_i915_gem_object *obj;
3312 int ret;
3314 ret = i915_mutex_lock_interruptible(dev);
3315 if (ret)
3316 return ret;
3318 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3319 if (obj == NULL) {
3320 ret = -ENOENT;
3321 goto unlock;
3324 if (obj->madv != I915_MADV_WILLNEED) {
3325 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3326 ret = -EINVAL;
3327 goto out;
3330 if (obj->pin_filp != NULL && obj->pin_filp != file) {
3331 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3332 args->handle);
3333 ret = -EINVAL;
3334 goto out;
3337 obj->user_pin_count++;
3338 obj->pin_filp = file;
3339 if (obj->user_pin_count == 1) {
3340 ret = i915_gem_object_pin(obj, args->alignment, true);
3341 if (ret)
3342 goto out;
3345 /* XXX - flush the CPU caches for pinned objects
3346 * as the X server doesn't manage domains yet
3348 i915_gem_object_flush_cpu_write_domain(obj);
3349 args->offset = obj->gtt_offset;
3350 out:
3351 drm_gem_object_unreference(&obj->base);
3352 unlock:
3353 mutex_unlock(&dev->struct_mutex);
3354 return ret;
3358 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3359 struct drm_file *file)
3361 struct drm_i915_gem_pin *args = data;
3362 struct drm_i915_gem_object *obj;
3363 int ret;
3365 ret = i915_mutex_lock_interruptible(dev);
3366 if (ret)
3367 return ret;
3369 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3370 if (obj == NULL) {
3371 ret = -ENOENT;
3372 goto unlock;
3375 if (obj->pin_filp != file) {
3376 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3377 args->handle);
3378 ret = -EINVAL;
3379 goto out;
3381 obj->user_pin_count--;
3382 if (obj->user_pin_count == 0) {
3383 obj->pin_filp = NULL;
3384 i915_gem_object_unpin(obj);
3387 out:
3388 drm_gem_object_unreference(&obj->base);
3389 unlock:
3390 mutex_unlock(&dev->struct_mutex);
3391 return ret;
3395 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3396 struct drm_file *file)
3398 struct drm_i915_gem_busy *args = data;
3399 struct drm_i915_gem_object *obj;
3400 int ret;
3402 ret = i915_mutex_lock_interruptible(dev);
3403 if (ret)
3404 return ret;
3406 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3407 if (obj == NULL) {
3408 ret = -ENOENT;
3409 goto unlock;
3412 /* Count all active objects as busy, even if they are currently not used
3413 * by the gpu. Users of this interface expect objects to eventually
3414 * become non-busy without any further actions, therefore emit any
3415 * necessary flushes here.
3417 args->busy = obj->active;
3418 if (args->busy) {
3419 /* Unconditionally flush objects, even when the gpu still uses this
3420 * object. Userspace calling this function indicates that it wants to
3421 * use this buffer rather sooner than later, so issuing the required
3422 * flush earlier is beneficial.
3424 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3425 ret = i915_gem_flush_ring(dev, obj->ring,
3426 0, obj->base.write_domain);
3427 } else if (obj->ring->outstanding_lazy_request ==
3428 obj->last_rendering_seqno) {
3429 struct drm_i915_gem_request *request;
3431 /* This ring is not being cleared by active usage,
3432 * so emit a request to do so.
3434 request = kzalloc(sizeof(*request), GFP_KERNEL);
3435 if (request)
3436 ret = i915_add_request(dev,
3437 NULL, request,
3438 obj->ring);
3439 else
3440 ret = -ENOMEM;
3443 /* Update the active list for the hardware's current position.
3444 * Otherwise this only updates on a delayed timer or when irqs
3445 * are actually unmasked, and our working set ends up being
3446 * larger than required.
3448 i915_gem_retire_requests_ring(dev, obj->ring);
3450 args->busy = obj->active;
3453 drm_gem_object_unreference(&obj->base);
3454 unlock:
3455 mutex_unlock(&dev->struct_mutex);
3456 return ret;
3460 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3461 struct drm_file *file_priv)
3463 return i915_gem_ring_throttle(dev, file_priv);
3467 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3468 struct drm_file *file_priv)
3470 struct drm_i915_gem_madvise *args = data;
3471 struct drm_i915_gem_object *obj;
3472 int ret;
3474 switch (args->madv) {
3475 case I915_MADV_DONTNEED:
3476 case I915_MADV_WILLNEED:
3477 break;
3478 default:
3479 return -EINVAL;
3482 ret = i915_mutex_lock_interruptible(dev);
3483 if (ret)
3484 return ret;
3486 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3487 if (obj == NULL) {
3488 ret = -ENOENT;
3489 goto unlock;
3492 if (obj->pin_count) {
3493 ret = -EINVAL;
3494 goto out;
3497 if (obj->madv != __I915_MADV_PURGED)
3498 obj->madv = args->madv;
3500 /* if the object is no longer bound, discard its backing storage */
3501 if (i915_gem_object_is_purgeable(obj) &&
3502 obj->gtt_space == NULL)
3503 i915_gem_object_truncate(obj);
3505 args->retained = obj->madv != __I915_MADV_PURGED;
3507 out:
3508 drm_gem_object_unreference(&obj->base);
3509 unlock:
3510 mutex_unlock(&dev->struct_mutex);
3511 return ret;
3514 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3515 size_t size)
3517 struct drm_i915_private *dev_priv = dev->dev_private;
3518 struct drm_i915_gem_object *obj;
3520 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3521 if (obj == NULL)
3522 return NULL;
3524 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3525 kfree(obj);
3526 return NULL;
3529 i915_gem_info_add_obj(dev_priv, size);
3531 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3532 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3534 obj->agp_type = AGP_USER_MEMORY;
3535 obj->base.driver_private = NULL;
3536 obj->fence_reg = I915_FENCE_REG_NONE;
3537 INIT_LIST_HEAD(&obj->mm_list);
3538 INIT_LIST_HEAD(&obj->gtt_list);
3539 INIT_LIST_HEAD(&obj->ring_list);
3540 INIT_LIST_HEAD(&obj->exec_list);
3541 INIT_LIST_HEAD(&obj->gpu_write_list);
3542 obj->madv = I915_MADV_WILLNEED;
3543 /* Avoid an unnecessary call to unbind on the first bind. */
3544 obj->map_and_fenceable = true;
3546 return obj;
3549 int i915_gem_init_object(struct drm_gem_object *obj)
3551 BUG();
3553 return 0;
3556 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
3558 struct drm_device *dev = obj->base.dev;
3559 drm_i915_private_t *dev_priv = dev->dev_private;
3560 int ret;
3562 ret = i915_gem_object_unbind(obj);
3563 if (ret == -ERESTARTSYS) {
3564 list_move(&obj->mm_list,
3565 &dev_priv->mm.deferred_free_list);
3566 return;
3569 if (obj->base.map_list.map)
3570 i915_gem_free_mmap_offset(obj);
3572 drm_gem_object_release(&obj->base);
3573 i915_gem_info_remove_obj(dev_priv, obj->base.size);
3575 kfree(obj->page_cpu_valid);
3576 kfree(obj->bit_17);
3577 kfree(obj);
3580 void i915_gem_free_object(struct drm_gem_object *gem_obj)
3582 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3583 struct drm_device *dev = obj->base.dev;
3585 trace_i915_gem_object_destroy(obj);
3587 while (obj->pin_count > 0)
3588 i915_gem_object_unpin(obj);
3590 if (obj->phys_obj)
3591 i915_gem_detach_phys_object(dev, obj);
3593 i915_gem_free_object_tail(obj);
3597 i915_gem_idle(struct drm_device *dev)
3599 drm_i915_private_t *dev_priv = dev->dev_private;
3600 int ret;
3602 mutex_lock(&dev->struct_mutex);
3604 if (dev_priv->mm.suspended) {
3605 mutex_unlock(&dev->struct_mutex);
3606 return 0;
3609 ret = i915_gpu_idle(dev);
3610 if (ret) {
3611 mutex_unlock(&dev->struct_mutex);
3612 return ret;
3615 /* Under UMS, be paranoid and evict. */
3616 if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
3617 ret = i915_gem_evict_inactive(dev, false);
3618 if (ret) {
3619 mutex_unlock(&dev->struct_mutex);
3620 return ret;
3624 i915_gem_reset_fences(dev);
3626 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3627 * We need to replace this with a semaphore, or something.
3628 * And not confound mm.suspended!
3630 dev_priv->mm.suspended = 1;
3631 del_timer_sync(&dev_priv->hangcheck_timer);
3633 i915_kernel_lost_context(dev);
3634 i915_gem_cleanup_ringbuffer(dev);
3636 mutex_unlock(&dev->struct_mutex);
3638 /* Cancel the retire work handler, which should be idle now. */
3639 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3641 return 0;
3645 i915_gem_init_ringbuffer(struct drm_device *dev)
3647 drm_i915_private_t *dev_priv = dev->dev_private;
3648 int ret;
3650 ret = intel_init_render_ring_buffer(dev);
3651 if (ret)
3652 return ret;
3654 if (HAS_BSD(dev)) {
3655 ret = intel_init_bsd_ring_buffer(dev);
3656 if (ret)
3657 goto cleanup_render_ring;
3660 if (HAS_BLT(dev)) {
3661 ret = intel_init_blt_ring_buffer(dev);
3662 if (ret)
3663 goto cleanup_bsd_ring;
3666 dev_priv->next_seqno = 1;
3668 return 0;
3670 cleanup_bsd_ring:
3671 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3672 cleanup_render_ring:
3673 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3674 return ret;
3677 void
3678 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3680 drm_i915_private_t *dev_priv = dev->dev_private;
3681 int i;
3683 for (i = 0; i < I915_NUM_RINGS; i++)
3684 intel_cleanup_ring_buffer(&dev_priv->ring[i]);
3688 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3689 struct drm_file *file_priv)
3691 drm_i915_private_t *dev_priv = dev->dev_private;
3692 int ret, i;
3694 if (drm_core_check_feature(dev, DRIVER_MODESET))
3695 return 0;
3697 if (atomic_read(&dev_priv->mm.wedged)) {
3698 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3699 atomic_set(&dev_priv->mm.wedged, 0);
3702 mutex_lock(&dev->struct_mutex);
3703 dev_priv->mm.suspended = 0;
3705 ret = i915_gem_init_ringbuffer(dev);
3706 if (ret != 0) {
3707 mutex_unlock(&dev->struct_mutex);
3708 return ret;
3711 BUG_ON(!list_empty(&dev_priv->mm.active_list));
3712 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3713 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3714 for (i = 0; i < I915_NUM_RINGS; i++) {
3715 BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
3716 BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
3718 mutex_unlock(&dev->struct_mutex);
3720 ret = drm_irq_install(dev);
3721 if (ret)
3722 goto cleanup_ringbuffer;
3724 return 0;
3726 cleanup_ringbuffer:
3727 mutex_lock(&dev->struct_mutex);
3728 i915_gem_cleanup_ringbuffer(dev);
3729 dev_priv->mm.suspended = 1;
3730 mutex_unlock(&dev->struct_mutex);
3732 return ret;
3736 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3737 struct drm_file *file_priv)
3739 if (drm_core_check_feature(dev, DRIVER_MODESET))
3740 return 0;
3742 drm_irq_uninstall(dev);
3743 return i915_gem_idle(dev);
3746 void
3747 i915_gem_lastclose(struct drm_device *dev)
3749 int ret;
3751 if (drm_core_check_feature(dev, DRIVER_MODESET))
3752 return;
3754 ret = i915_gem_idle(dev);
3755 if (ret)
3756 DRM_ERROR("failed to idle hardware: %d\n", ret);
3759 static void
3760 init_ring_lists(struct intel_ring_buffer *ring)
3762 INIT_LIST_HEAD(&ring->active_list);
3763 INIT_LIST_HEAD(&ring->request_list);
3764 INIT_LIST_HEAD(&ring->gpu_write_list);
3767 void
3768 i915_gem_load(struct drm_device *dev)
3770 int i;
3771 drm_i915_private_t *dev_priv = dev->dev_private;
3773 INIT_LIST_HEAD(&dev_priv->mm.active_list);
3774 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3775 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3776 INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
3777 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3778 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
3779 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3780 for (i = 0; i < I915_NUM_RINGS; i++)
3781 init_ring_lists(&dev_priv->ring[i]);
3782 for (i = 0; i < 16; i++)
3783 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3784 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3785 i915_gem_retire_work_handler);
3786 init_completion(&dev_priv->error_completion);
3788 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3789 if (IS_GEN3(dev)) {
3790 u32 tmp = I915_READ(MI_ARB_STATE);
3791 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
3792 /* arb state is a masked write, so set bit + bit in mask */
3793 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
3794 I915_WRITE(MI_ARB_STATE, tmp);
3798 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3800 /* Old X drivers will take 0-2 for front, back, depth buffers */
3801 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3802 dev_priv->fence_reg_start = 3;
3804 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3805 dev_priv->num_fence_regs = 16;
3806 else
3807 dev_priv->num_fence_regs = 8;
3809 /* Initialize fence registers to zero */
3810 switch (INTEL_INFO(dev)->gen) {
3811 case 6:
3812 for (i = 0; i < 16; i++)
3813 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
3814 break;
3815 case 5:
3816 case 4:
3817 for (i = 0; i < 16; i++)
3818 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
3819 break;
3820 case 3:
3821 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3822 for (i = 0; i < 8; i++)
3823 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
3824 case 2:
3825 for (i = 0; i < 8; i++)
3826 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
3827 break;
3829 i915_gem_detect_bit_6_swizzle(dev);
3830 init_waitqueue_head(&dev_priv->pending_flip_queue);
3832 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3833 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3834 register_shrinker(&dev_priv->mm.inactive_shrinker);
3838 * Create a physically contiguous memory object for this object
3839 * e.g. for cursor + overlay regs
3841 static int i915_gem_init_phys_object(struct drm_device *dev,
3842 int id, int size, int align)
3844 drm_i915_private_t *dev_priv = dev->dev_private;
3845 struct drm_i915_gem_phys_object *phys_obj;
3846 int ret;
3848 if (dev_priv->mm.phys_objs[id - 1] || !size)
3849 return 0;
3851 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
3852 if (!phys_obj)
3853 return -ENOMEM;
3855 phys_obj->id = id;
3857 phys_obj->handle = drm_pci_alloc(dev, size, align);
3858 if (!phys_obj->handle) {
3859 ret = -ENOMEM;
3860 goto kfree_obj;
3862 #ifdef CONFIG_X86
3863 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3864 #endif
3866 dev_priv->mm.phys_objs[id - 1] = phys_obj;
3868 return 0;
3869 kfree_obj:
3870 kfree(phys_obj);
3871 return ret;
3874 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3876 drm_i915_private_t *dev_priv = dev->dev_private;
3877 struct drm_i915_gem_phys_object *phys_obj;
3879 if (!dev_priv->mm.phys_objs[id - 1])
3880 return;
3882 phys_obj = dev_priv->mm.phys_objs[id - 1];
3883 if (phys_obj->cur_obj) {
3884 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3887 #ifdef CONFIG_X86
3888 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3889 #endif
3890 drm_pci_free(dev, phys_obj->handle);
3891 kfree(phys_obj);
3892 dev_priv->mm.phys_objs[id - 1] = NULL;
3895 void i915_gem_free_all_phys_object(struct drm_device *dev)
3897 int i;
3899 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3900 i915_gem_free_phys_object(dev, i);
3903 void i915_gem_detach_phys_object(struct drm_device *dev,
3904 struct drm_i915_gem_object *obj)
3906 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3907 char *vaddr;
3908 int i;
3909 int page_count;
3911 if (!obj->phys_obj)
3912 return;
3913 vaddr = obj->phys_obj->handle->vaddr;
3915 page_count = obj->base.size / PAGE_SIZE;
3916 for (i = 0; i < page_count; i++) {
3917 struct page *page = read_cache_page_gfp(mapping, i,
3918 GFP_HIGHUSER | __GFP_RECLAIMABLE);
3919 if (!IS_ERR(page)) {
3920 char *dst = kmap_atomic(page);
3921 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
3922 kunmap_atomic(dst);
3924 drm_clflush_pages(&page, 1);
3926 set_page_dirty(page);
3927 mark_page_accessed(page);
3928 page_cache_release(page);
3931 intel_gtt_chipset_flush();
3933 obj->phys_obj->cur_obj = NULL;
3934 obj->phys_obj = NULL;
3938 i915_gem_attach_phys_object(struct drm_device *dev,
3939 struct drm_i915_gem_object *obj,
3940 int id,
3941 int align)
3943 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3944 drm_i915_private_t *dev_priv = dev->dev_private;
3945 int ret = 0;
3946 int page_count;
3947 int i;
3949 if (id > I915_MAX_PHYS_OBJECT)
3950 return -EINVAL;
3952 if (obj->phys_obj) {
3953 if (obj->phys_obj->id == id)
3954 return 0;
3955 i915_gem_detach_phys_object(dev, obj);
3958 /* create a new object */
3959 if (!dev_priv->mm.phys_objs[id - 1]) {
3960 ret = i915_gem_init_phys_object(dev, id,
3961 obj->base.size, align);
3962 if (ret) {
3963 DRM_ERROR("failed to init phys object %d size: %zu\n",
3964 id, obj->base.size);
3965 return ret;
3969 /* bind to the object */
3970 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
3971 obj->phys_obj->cur_obj = obj;
3973 page_count = obj->base.size / PAGE_SIZE;
3975 for (i = 0; i < page_count; i++) {
3976 struct page *page;
3977 char *dst, *src;
3979 page = read_cache_page_gfp(mapping, i,
3980 GFP_HIGHUSER | __GFP_RECLAIMABLE);
3981 if (IS_ERR(page))
3982 return PTR_ERR(page);
3984 src = kmap_atomic(page);
3985 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
3986 memcpy(dst, src, PAGE_SIZE);
3987 kunmap_atomic(src);
3989 mark_page_accessed(page);
3990 page_cache_release(page);
3993 return 0;
3996 static int
3997 i915_gem_phys_pwrite(struct drm_device *dev,
3998 struct drm_i915_gem_object *obj,
3999 struct drm_i915_gem_pwrite *args,
4000 struct drm_file *file_priv)
4002 void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
4003 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
4005 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4006 unsigned long unwritten;
4008 /* The physical object once assigned is fixed for the lifetime
4009 * of the obj, so we can safely drop the lock and continue
4010 * to access vaddr.
4012 mutex_unlock(&dev->struct_mutex);
4013 unwritten = copy_from_user(vaddr, user_data, args->size);
4014 mutex_lock(&dev->struct_mutex);
4015 if (unwritten)
4016 return -EFAULT;
4019 intel_gtt_chipset_flush();
4020 return 0;
4023 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4025 struct drm_i915_file_private *file_priv = file->driver_priv;
4027 /* Clean up our request list when the client is going away, so that
4028 * later retire_requests won't dereference our soon-to-be-gone
4029 * file_priv.
4031 spin_lock(&file_priv->mm.lock);
4032 while (!list_empty(&file_priv->mm.request_list)) {
4033 struct drm_i915_gem_request *request;
4035 request = list_first_entry(&file_priv->mm.request_list,
4036 struct drm_i915_gem_request,
4037 client_list);
4038 list_del(&request->client_list);
4039 request->file_priv = NULL;
4041 spin_unlock(&file_priv->mm.lock);
4044 static int
4045 i915_gpu_is_active(struct drm_device *dev)
4047 drm_i915_private_t *dev_priv = dev->dev_private;
4048 int lists_empty;
4050 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4051 list_empty(&dev_priv->mm.active_list);
4053 return !lists_empty;
4056 static int
4057 i915_gem_inactive_shrink(struct shrinker *shrinker,
4058 int nr_to_scan,
4059 gfp_t gfp_mask)
4061 struct drm_i915_private *dev_priv =
4062 container_of(shrinker,
4063 struct drm_i915_private,
4064 mm.inactive_shrinker);
4065 struct drm_device *dev = dev_priv->dev;
4066 struct drm_i915_gem_object *obj, *next;
4067 int cnt;
4069 if (!mutex_trylock(&dev->struct_mutex))
4070 return 0;
4072 /* "fast-path" to count number of available objects */
4073 if (nr_to_scan == 0) {
4074 cnt = 0;
4075 list_for_each_entry(obj,
4076 &dev_priv->mm.inactive_list,
4077 mm_list)
4078 cnt++;
4079 mutex_unlock(&dev->struct_mutex);
4080 return cnt / 100 * sysctl_vfs_cache_pressure;
4083 rescan:
4084 /* first scan for clean buffers */
4085 i915_gem_retire_requests(dev);
4087 list_for_each_entry_safe(obj, next,
4088 &dev_priv->mm.inactive_list,
4089 mm_list) {
4090 if (i915_gem_object_is_purgeable(obj)) {
4091 if (i915_gem_object_unbind(obj) == 0 &&
4092 --nr_to_scan == 0)
4093 break;
4097 /* second pass, evict/count anything still on the inactive list */
4098 cnt = 0;
4099 list_for_each_entry_safe(obj, next,
4100 &dev_priv->mm.inactive_list,
4101 mm_list) {
4102 if (nr_to_scan &&
4103 i915_gem_object_unbind(obj) == 0)
4104 nr_to_scan--;
4105 else
4106 cnt++;
4109 if (nr_to_scan && i915_gpu_is_active(dev)) {
4111 * We are desperate for pages, so as a last resort, wait
4112 * for the GPU to finish and discard whatever we can.
4113 * This has a dramatic impact to reduce the number of
4114 * OOM-killer events whilst running the GPU aggressively.
4116 if (i915_gpu_idle(dev) == 0)
4117 goto rescan;
4119 mutex_unlock(&dev->struct_mutex);
4120 return cnt / 100 * sysctl_vfs_cache_pressure;