2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
38 static __must_check
int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object
*obj
);
39 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
);
40 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
);
41 static __must_check
int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object
*obj
,
43 static __must_check
int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object
*obj
,
46 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object
*obj
);
47 static __must_check
int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object
*obj
,
49 bool map_and_fenceable
);
50 static void i915_gem_clear_fence_reg(struct drm_device
*dev
,
51 struct drm_i915_fence_reg
*reg
);
52 static int i915_gem_phys_pwrite(struct drm_device
*dev
,
53 struct drm_i915_gem_object
*obj
,
54 struct drm_i915_gem_pwrite
*args
,
55 struct drm_file
*file
);
56 static void i915_gem_free_object_tail(struct drm_i915_gem_object
*obj
);
58 static int i915_gem_inactive_shrink(struct shrinker
*shrinker
,
59 struct shrink_control
*sc
);
61 /* some bookkeeping */
62 static void i915_gem_info_add_obj(struct drm_i915_private
*dev_priv
,
65 dev_priv
->mm
.object_count
++;
66 dev_priv
->mm
.object_memory
+= size
;
69 static void i915_gem_info_remove_obj(struct drm_i915_private
*dev_priv
,
72 dev_priv
->mm
.object_count
--;
73 dev_priv
->mm
.object_memory
-= size
;
77 i915_gem_wait_for_error(struct drm_device
*dev
)
79 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
80 struct completion
*x
= &dev_priv
->error_completion
;
84 if (!atomic_read(&dev_priv
->mm
.wedged
))
87 ret
= wait_for_completion_interruptible(x
);
91 if (atomic_read(&dev_priv
->mm
.wedged
)) {
92 /* GPU is hung, bump the completion count to account for
93 * the token we just consumed so that we never hit zero and
94 * end up waiting upon a subsequent completion event that
97 spin_lock_irqsave(&x
->wait
.lock
, flags
);
99 spin_unlock_irqrestore(&x
->wait
.lock
, flags
);
104 int i915_mutex_lock_interruptible(struct drm_device
*dev
)
108 ret
= i915_gem_wait_for_error(dev
);
112 ret
= mutex_lock_interruptible(&dev
->struct_mutex
);
116 WARN_ON(i915_verify_lists(dev
));
121 i915_gem_object_is_inactive(struct drm_i915_gem_object
*obj
)
123 return obj
->gtt_space
&& !obj
->active
&& obj
->pin_count
== 0;
126 void i915_gem_do_init(struct drm_device
*dev
,
128 unsigned long mappable_end
,
131 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
133 drm_mm_init(&dev_priv
->mm
.gtt_space
, start
, end
- start
);
135 dev_priv
->mm
.gtt_start
= start
;
136 dev_priv
->mm
.gtt_mappable_end
= mappable_end
;
137 dev_priv
->mm
.gtt_end
= end
;
138 dev_priv
->mm
.gtt_total
= end
- start
;
139 dev_priv
->mm
.mappable_gtt_total
= min(end
, mappable_end
) - start
;
141 /* Take over this portion of the GTT */
142 intel_gtt_clear_range(start
/ PAGE_SIZE
, (end
-start
) / PAGE_SIZE
);
146 i915_gem_init_ioctl(struct drm_device
*dev
, void *data
,
147 struct drm_file
*file
)
149 struct drm_i915_gem_init
*args
= data
;
151 if (args
->gtt_start
>= args
->gtt_end
||
152 (args
->gtt_end
| args
->gtt_start
) & (PAGE_SIZE
- 1))
155 mutex_lock(&dev
->struct_mutex
);
156 i915_gem_do_init(dev
, args
->gtt_start
, args
->gtt_end
, args
->gtt_end
);
157 mutex_unlock(&dev
->struct_mutex
);
163 i915_gem_get_aperture_ioctl(struct drm_device
*dev
, void *data
,
164 struct drm_file
*file
)
166 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
167 struct drm_i915_gem_get_aperture
*args
= data
;
168 struct drm_i915_gem_object
*obj
;
171 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
175 mutex_lock(&dev
->struct_mutex
);
176 list_for_each_entry(obj
, &dev_priv
->mm
.pinned_list
, mm_list
)
177 pinned
+= obj
->gtt_space
->size
;
178 mutex_unlock(&dev
->struct_mutex
);
180 args
->aper_size
= dev_priv
->mm
.gtt_total
;
181 args
->aper_available_size
= args
->aper_size
-pinned
;
187 i915_gem_create(struct drm_file
*file
,
188 struct drm_device
*dev
,
192 struct drm_i915_gem_object
*obj
;
196 size
= roundup(size
, PAGE_SIZE
);
198 /* Allocate the new object */
199 obj
= i915_gem_alloc_object(dev
, size
);
203 ret
= drm_gem_handle_create(file
, &obj
->base
, &handle
);
205 drm_gem_object_release(&obj
->base
);
206 i915_gem_info_remove_obj(dev
->dev_private
, obj
->base
.size
);
211 /* drop reference from allocate - handle holds it now */
212 drm_gem_object_unreference(&obj
->base
);
213 trace_i915_gem_object_create(obj
);
220 i915_gem_dumb_create(struct drm_file
*file
,
221 struct drm_device
*dev
,
222 struct drm_mode_create_dumb
*args
)
224 /* have to work out size/pitch and return them */
225 args
->pitch
= ALIGN(args
->width
* ((args
->bpp
+ 7) / 8), 64);
226 args
->size
= args
->pitch
* args
->height
;
227 return i915_gem_create(file
, dev
,
228 args
->size
, &args
->handle
);
231 int i915_gem_dumb_destroy(struct drm_file
*file
,
232 struct drm_device
*dev
,
235 return drm_gem_handle_delete(file
, handle
);
239 * Creates a new mm object and returns a handle to it.
242 i915_gem_create_ioctl(struct drm_device
*dev
, void *data
,
243 struct drm_file
*file
)
245 struct drm_i915_gem_create
*args
= data
;
246 return i915_gem_create(file
, dev
,
247 args
->size
, &args
->handle
);
250 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object
*obj
)
252 drm_i915_private_t
*dev_priv
= obj
->base
.dev
->dev_private
;
254 return dev_priv
->mm
.bit_6_swizzle_x
== I915_BIT_6_SWIZZLE_9_10_17
&&
255 obj
->tiling_mode
!= I915_TILING_NONE
;
259 slow_shmem_copy(struct page
*dst_page
,
261 struct page
*src_page
,
265 char *dst_vaddr
, *src_vaddr
;
267 dst_vaddr
= kmap(dst_page
);
268 src_vaddr
= kmap(src_page
);
270 memcpy(dst_vaddr
+ dst_offset
, src_vaddr
+ src_offset
, length
);
277 slow_shmem_bit17_copy(struct page
*gpu_page
,
279 struct page
*cpu_page
,
284 char *gpu_vaddr
, *cpu_vaddr
;
286 /* Use the unswizzled path if this page isn't affected. */
287 if ((page_to_phys(gpu_page
) & (1 << 17)) == 0) {
289 return slow_shmem_copy(cpu_page
, cpu_offset
,
290 gpu_page
, gpu_offset
, length
);
292 return slow_shmem_copy(gpu_page
, gpu_offset
,
293 cpu_page
, cpu_offset
, length
);
296 gpu_vaddr
= kmap(gpu_page
);
297 cpu_vaddr
= kmap(cpu_page
);
299 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
300 * XORing with the other bits (A9 for Y, A9 and A10 for X)
303 int cacheline_end
= ALIGN(gpu_offset
+ 1, 64);
304 int this_length
= min(cacheline_end
- gpu_offset
, length
);
305 int swizzled_gpu_offset
= gpu_offset
^ 64;
308 memcpy(cpu_vaddr
+ cpu_offset
,
309 gpu_vaddr
+ swizzled_gpu_offset
,
312 memcpy(gpu_vaddr
+ swizzled_gpu_offset
,
313 cpu_vaddr
+ cpu_offset
,
316 cpu_offset
+= this_length
;
317 gpu_offset
+= this_length
;
318 length
-= this_length
;
326 * This is the fast shmem pread path, which attempts to copy_from_user directly
327 * from the backing pages of the object to the user's address space. On a
328 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
331 i915_gem_shmem_pread_fast(struct drm_device
*dev
,
332 struct drm_i915_gem_object
*obj
,
333 struct drm_i915_gem_pread
*args
,
334 struct drm_file
*file
)
336 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
339 char __user
*user_data
;
340 int page_offset
, page_length
;
342 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
345 offset
= args
->offset
;
352 /* Operation in this page
354 * page_offset = offset within page
355 * page_length = bytes to copy for this page
357 page_offset
= offset_in_page(offset
);
358 page_length
= remain
;
359 if ((page_offset
+ remain
) > PAGE_SIZE
)
360 page_length
= PAGE_SIZE
- page_offset
;
362 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
363 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
365 return PTR_ERR(page
);
367 vaddr
= kmap_atomic(page
);
368 ret
= __copy_to_user_inatomic(user_data
,
371 kunmap_atomic(vaddr
);
373 mark_page_accessed(page
);
374 page_cache_release(page
);
378 remain
-= page_length
;
379 user_data
+= page_length
;
380 offset
+= page_length
;
387 * This is the fallback shmem pread path, which allocates temporary storage
388 * in kernel space to copy_to_user into outside of the struct_mutex, so we
389 * can copy out of the object's backing pages while holding the struct mutex
390 * and not take page faults.
393 i915_gem_shmem_pread_slow(struct drm_device
*dev
,
394 struct drm_i915_gem_object
*obj
,
395 struct drm_i915_gem_pread
*args
,
396 struct drm_file
*file
)
398 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
399 struct mm_struct
*mm
= current
->mm
;
400 struct page
**user_pages
;
402 loff_t offset
, pinned_pages
, i
;
403 loff_t first_data_page
, last_data_page
, num_pages
;
404 int shmem_page_offset
;
405 int data_page_index
, data_page_offset
;
408 uint64_t data_ptr
= args
->data_ptr
;
409 int do_bit17_swizzling
;
413 /* Pin the user pages containing the data. We can't fault while
414 * holding the struct mutex, yet we want to hold it while
415 * dereferencing the user data.
417 first_data_page
= data_ptr
/ PAGE_SIZE
;
418 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
419 num_pages
= last_data_page
- first_data_page
+ 1;
421 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
422 if (user_pages
== NULL
)
425 mutex_unlock(&dev
->struct_mutex
);
426 down_read(&mm
->mmap_sem
);
427 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
428 num_pages
, 1, 0, user_pages
, NULL
);
429 up_read(&mm
->mmap_sem
);
430 mutex_lock(&dev
->struct_mutex
);
431 if (pinned_pages
< num_pages
) {
436 ret
= i915_gem_object_set_cpu_read_domain_range(obj
,
442 do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
444 offset
= args
->offset
;
449 /* Operation in this page
451 * shmem_page_offset = offset within page in shmem file
452 * data_page_index = page number in get_user_pages return
453 * data_page_offset = offset with data_page_index page.
454 * page_length = bytes to copy for this page
456 shmem_page_offset
= offset_in_page(offset
);
457 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
458 data_page_offset
= offset_in_page(data_ptr
);
460 page_length
= remain
;
461 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
462 page_length
= PAGE_SIZE
- shmem_page_offset
;
463 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
464 page_length
= PAGE_SIZE
- data_page_offset
;
466 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
467 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
469 return PTR_ERR(page
);
471 if (do_bit17_swizzling
) {
472 slow_shmem_bit17_copy(page
,
474 user_pages
[data_page_index
],
479 slow_shmem_copy(user_pages
[data_page_index
],
486 mark_page_accessed(page
);
487 page_cache_release(page
);
489 remain
-= page_length
;
490 data_ptr
+= page_length
;
491 offset
+= page_length
;
495 for (i
= 0; i
< pinned_pages
; i
++) {
496 SetPageDirty(user_pages
[i
]);
497 mark_page_accessed(user_pages
[i
]);
498 page_cache_release(user_pages
[i
]);
500 drm_free_large(user_pages
);
506 * Reads data from the object referenced by handle.
508 * On error, the contents of *data are undefined.
511 i915_gem_pread_ioctl(struct drm_device
*dev
, void *data
,
512 struct drm_file
*file
)
514 struct drm_i915_gem_pread
*args
= data
;
515 struct drm_i915_gem_object
*obj
;
521 if (!access_ok(VERIFY_WRITE
,
522 (char __user
*)(uintptr_t)args
->data_ptr
,
526 ret
= fault_in_pages_writeable((char __user
*)(uintptr_t)args
->data_ptr
,
531 ret
= i915_mutex_lock_interruptible(dev
);
535 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
536 if (&obj
->base
== NULL
) {
541 /* Bounds check source. */
542 if (args
->offset
> obj
->base
.size
||
543 args
->size
> obj
->base
.size
- args
->offset
) {
548 trace_i915_gem_object_pread(obj
, args
->offset
, args
->size
);
550 ret
= i915_gem_object_set_cpu_read_domain_range(obj
,
557 if (!i915_gem_object_needs_bit17_swizzle(obj
))
558 ret
= i915_gem_shmem_pread_fast(dev
, obj
, args
, file
);
560 ret
= i915_gem_shmem_pread_slow(dev
, obj
, args
, file
);
563 drm_gem_object_unreference(&obj
->base
);
565 mutex_unlock(&dev
->struct_mutex
);
569 /* This is the fast write path which cannot handle
570 * page faults in the source data
574 fast_user_write(struct io_mapping
*mapping
,
575 loff_t page_base
, int page_offset
,
576 char __user
*user_data
,
580 unsigned long unwritten
;
582 vaddr_atomic
= io_mapping_map_atomic_wc(mapping
, page_base
);
583 unwritten
= __copy_from_user_inatomic_nocache(vaddr_atomic
+ page_offset
,
585 io_mapping_unmap_atomic(vaddr_atomic
);
589 /* Here's the write path which can sleep for
594 slow_kernel_write(struct io_mapping
*mapping
,
595 loff_t gtt_base
, int gtt_offset
,
596 struct page
*user_page
, int user_offset
,
599 char __iomem
*dst_vaddr
;
602 dst_vaddr
= io_mapping_map_wc(mapping
, gtt_base
);
603 src_vaddr
= kmap(user_page
);
605 memcpy_toio(dst_vaddr
+ gtt_offset
,
606 src_vaddr
+ user_offset
,
610 io_mapping_unmap(dst_vaddr
);
614 * This is the fast pwrite path, where we copy the data directly from the
615 * user into the GTT, uncached.
618 i915_gem_gtt_pwrite_fast(struct drm_device
*dev
,
619 struct drm_i915_gem_object
*obj
,
620 struct drm_i915_gem_pwrite
*args
,
621 struct drm_file
*file
)
623 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
625 loff_t offset
, page_base
;
626 char __user
*user_data
;
627 int page_offset
, page_length
;
629 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
632 offset
= obj
->gtt_offset
+ args
->offset
;
635 /* Operation in this page
637 * page_base = page offset within aperture
638 * page_offset = offset within page
639 * page_length = bytes to copy for this page
641 page_base
= offset
& PAGE_MASK
;
642 page_offset
= offset_in_page(offset
);
643 page_length
= remain
;
644 if ((page_offset
+ remain
) > PAGE_SIZE
)
645 page_length
= PAGE_SIZE
- page_offset
;
647 /* If we get a fault while copying data, then (presumably) our
648 * source page isn't available. Return the error and we'll
649 * retry in the slow path.
651 if (fast_user_write(dev_priv
->mm
.gtt_mapping
, page_base
,
652 page_offset
, user_data
, page_length
))
655 remain
-= page_length
;
656 user_data
+= page_length
;
657 offset
+= page_length
;
664 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
665 * the memory and maps it using kmap_atomic for copying.
667 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
668 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
671 i915_gem_gtt_pwrite_slow(struct drm_device
*dev
,
672 struct drm_i915_gem_object
*obj
,
673 struct drm_i915_gem_pwrite
*args
,
674 struct drm_file
*file
)
676 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
678 loff_t gtt_page_base
, offset
;
679 loff_t first_data_page
, last_data_page
, num_pages
;
680 loff_t pinned_pages
, i
;
681 struct page
**user_pages
;
682 struct mm_struct
*mm
= current
->mm
;
683 int gtt_page_offset
, data_page_offset
, data_page_index
, page_length
;
685 uint64_t data_ptr
= args
->data_ptr
;
689 /* Pin the user pages containing the data. We can't fault while
690 * holding the struct mutex, and all of the pwrite implementations
691 * want to hold it while dereferencing the user data.
693 first_data_page
= data_ptr
/ PAGE_SIZE
;
694 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
695 num_pages
= last_data_page
- first_data_page
+ 1;
697 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
698 if (user_pages
== NULL
)
701 mutex_unlock(&dev
->struct_mutex
);
702 down_read(&mm
->mmap_sem
);
703 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
704 num_pages
, 0, 0, user_pages
, NULL
);
705 up_read(&mm
->mmap_sem
);
706 mutex_lock(&dev
->struct_mutex
);
707 if (pinned_pages
< num_pages
) {
709 goto out_unpin_pages
;
712 ret
= i915_gem_object_set_to_gtt_domain(obj
, true);
714 goto out_unpin_pages
;
716 ret
= i915_gem_object_put_fence(obj
);
718 goto out_unpin_pages
;
720 offset
= obj
->gtt_offset
+ args
->offset
;
723 /* Operation in this page
725 * gtt_page_base = page offset within aperture
726 * gtt_page_offset = offset within page in aperture
727 * data_page_index = page number in get_user_pages return
728 * data_page_offset = offset with data_page_index page.
729 * page_length = bytes to copy for this page
731 gtt_page_base
= offset
& PAGE_MASK
;
732 gtt_page_offset
= offset_in_page(offset
);
733 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
734 data_page_offset
= offset_in_page(data_ptr
);
736 page_length
= remain
;
737 if ((gtt_page_offset
+ page_length
) > PAGE_SIZE
)
738 page_length
= PAGE_SIZE
- gtt_page_offset
;
739 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
740 page_length
= PAGE_SIZE
- data_page_offset
;
742 slow_kernel_write(dev_priv
->mm
.gtt_mapping
,
743 gtt_page_base
, gtt_page_offset
,
744 user_pages
[data_page_index
],
748 remain
-= page_length
;
749 offset
+= page_length
;
750 data_ptr
+= page_length
;
754 for (i
= 0; i
< pinned_pages
; i
++)
755 page_cache_release(user_pages
[i
]);
756 drm_free_large(user_pages
);
762 * This is the fast shmem pwrite path, which attempts to directly
763 * copy_from_user into the kmapped pages backing the object.
766 i915_gem_shmem_pwrite_fast(struct drm_device
*dev
,
767 struct drm_i915_gem_object
*obj
,
768 struct drm_i915_gem_pwrite
*args
,
769 struct drm_file
*file
)
771 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
774 char __user
*user_data
;
775 int page_offset
, page_length
;
777 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
780 offset
= args
->offset
;
788 /* Operation in this page
790 * page_offset = offset within page
791 * page_length = bytes to copy for this page
793 page_offset
= offset_in_page(offset
);
794 page_length
= remain
;
795 if ((page_offset
+ remain
) > PAGE_SIZE
)
796 page_length
= PAGE_SIZE
- page_offset
;
798 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
799 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
801 return PTR_ERR(page
);
803 vaddr
= kmap_atomic(page
, KM_USER0
);
804 ret
= __copy_from_user_inatomic(vaddr
+ page_offset
,
807 kunmap_atomic(vaddr
, KM_USER0
);
809 set_page_dirty(page
);
810 mark_page_accessed(page
);
811 page_cache_release(page
);
813 /* If we get a fault while copying data, then (presumably) our
814 * source page isn't available. Return the error and we'll
815 * retry in the slow path.
820 remain
-= page_length
;
821 user_data
+= page_length
;
822 offset
+= page_length
;
829 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
830 * the memory and maps it using kmap_atomic for copying.
832 * This avoids taking mmap_sem for faulting on the user's address while the
833 * struct_mutex is held.
836 i915_gem_shmem_pwrite_slow(struct drm_device
*dev
,
837 struct drm_i915_gem_object
*obj
,
838 struct drm_i915_gem_pwrite
*args
,
839 struct drm_file
*file
)
841 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
842 struct mm_struct
*mm
= current
->mm
;
843 struct page
**user_pages
;
845 loff_t offset
, pinned_pages
, i
;
846 loff_t first_data_page
, last_data_page
, num_pages
;
847 int shmem_page_offset
;
848 int data_page_index
, data_page_offset
;
851 uint64_t data_ptr
= args
->data_ptr
;
852 int do_bit17_swizzling
;
856 /* Pin the user pages containing the data. We can't fault while
857 * holding the struct mutex, and all of the pwrite implementations
858 * want to hold it while dereferencing the user data.
860 first_data_page
= data_ptr
/ PAGE_SIZE
;
861 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
862 num_pages
= last_data_page
- first_data_page
+ 1;
864 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
865 if (user_pages
== NULL
)
868 mutex_unlock(&dev
->struct_mutex
);
869 down_read(&mm
->mmap_sem
);
870 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
871 num_pages
, 0, 0, user_pages
, NULL
);
872 up_read(&mm
->mmap_sem
);
873 mutex_lock(&dev
->struct_mutex
);
874 if (pinned_pages
< num_pages
) {
879 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
883 do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
885 offset
= args
->offset
;
891 /* Operation in this page
893 * shmem_page_offset = offset within page in shmem file
894 * data_page_index = page number in get_user_pages return
895 * data_page_offset = offset with data_page_index page.
896 * page_length = bytes to copy for this page
898 shmem_page_offset
= offset_in_page(offset
);
899 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
900 data_page_offset
= offset_in_page(data_ptr
);
902 page_length
= remain
;
903 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
904 page_length
= PAGE_SIZE
- shmem_page_offset
;
905 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
906 page_length
= PAGE_SIZE
- data_page_offset
;
908 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
909 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
915 if (do_bit17_swizzling
) {
916 slow_shmem_bit17_copy(page
,
918 user_pages
[data_page_index
],
923 slow_shmem_copy(page
,
925 user_pages
[data_page_index
],
930 set_page_dirty(page
);
931 mark_page_accessed(page
);
932 page_cache_release(page
);
934 remain
-= page_length
;
935 data_ptr
+= page_length
;
936 offset
+= page_length
;
940 for (i
= 0; i
< pinned_pages
; i
++)
941 page_cache_release(user_pages
[i
]);
942 drm_free_large(user_pages
);
948 * Writes data to the object referenced by handle.
950 * On error, the contents of the buffer that were to be modified are undefined.
953 i915_gem_pwrite_ioctl(struct drm_device
*dev
, void *data
,
954 struct drm_file
*file
)
956 struct drm_i915_gem_pwrite
*args
= data
;
957 struct drm_i915_gem_object
*obj
;
963 if (!access_ok(VERIFY_READ
,
964 (char __user
*)(uintptr_t)args
->data_ptr
,
968 ret
= fault_in_pages_readable((char __user
*)(uintptr_t)args
->data_ptr
,
973 ret
= i915_mutex_lock_interruptible(dev
);
977 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
978 if (&obj
->base
== NULL
) {
983 /* Bounds check destination. */
984 if (args
->offset
> obj
->base
.size
||
985 args
->size
> obj
->base
.size
- args
->offset
) {
990 trace_i915_gem_object_pwrite(obj
, args
->offset
, args
->size
);
992 /* We can only do the GTT pwrite on untiled buffers, as otherwise
993 * it would end up going through the fenced access, and we'll get
994 * different detiling behavior between reading and writing.
995 * pread/pwrite currently are reading and writing from the CPU
996 * perspective, requiring manual detiling by the client.
999 ret
= i915_gem_phys_pwrite(dev
, obj
, args
, file
);
1000 else if (obj
->gtt_space
&&
1001 obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
) {
1002 ret
= i915_gem_object_pin(obj
, 0, true);
1006 ret
= i915_gem_object_set_to_gtt_domain(obj
, true);
1010 ret
= i915_gem_object_put_fence(obj
);
1014 ret
= i915_gem_gtt_pwrite_fast(dev
, obj
, args
, file
);
1016 ret
= i915_gem_gtt_pwrite_slow(dev
, obj
, args
, file
);
1019 i915_gem_object_unpin(obj
);
1021 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
1026 if (!i915_gem_object_needs_bit17_swizzle(obj
))
1027 ret
= i915_gem_shmem_pwrite_fast(dev
, obj
, args
, file
);
1029 ret
= i915_gem_shmem_pwrite_slow(dev
, obj
, args
, file
);
1033 drm_gem_object_unreference(&obj
->base
);
1035 mutex_unlock(&dev
->struct_mutex
);
1040 * Called when user space prepares to use an object with the CPU, either
1041 * through the mmap ioctl's mapping or a GTT mapping.
1044 i915_gem_set_domain_ioctl(struct drm_device
*dev
, void *data
,
1045 struct drm_file
*file
)
1047 struct drm_i915_gem_set_domain
*args
= data
;
1048 struct drm_i915_gem_object
*obj
;
1049 uint32_t read_domains
= args
->read_domains
;
1050 uint32_t write_domain
= args
->write_domain
;
1053 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1056 /* Only handle setting domains to types used by the CPU. */
1057 if (write_domain
& I915_GEM_GPU_DOMAINS
)
1060 if (read_domains
& I915_GEM_GPU_DOMAINS
)
1063 /* Having something in the write domain implies it's in the read
1064 * domain, and only that read domain. Enforce that in the request.
1066 if (write_domain
!= 0 && read_domains
!= write_domain
)
1069 ret
= i915_mutex_lock_interruptible(dev
);
1073 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1074 if (&obj
->base
== NULL
) {
1079 if (read_domains
& I915_GEM_DOMAIN_GTT
) {
1080 ret
= i915_gem_object_set_to_gtt_domain(obj
, write_domain
!= 0);
1082 /* Silently promote "you're not bound, there was nothing to do"
1083 * to success, since the client was just asking us to
1084 * make sure everything was done.
1089 ret
= i915_gem_object_set_to_cpu_domain(obj
, write_domain
!= 0);
1092 drm_gem_object_unreference(&obj
->base
);
1094 mutex_unlock(&dev
->struct_mutex
);
1099 * Called when user space has done writes to this buffer
1102 i915_gem_sw_finish_ioctl(struct drm_device
*dev
, void *data
,
1103 struct drm_file
*file
)
1105 struct drm_i915_gem_sw_finish
*args
= data
;
1106 struct drm_i915_gem_object
*obj
;
1109 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1112 ret
= i915_mutex_lock_interruptible(dev
);
1116 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1117 if (&obj
->base
== NULL
) {
1122 /* Pinned buffers may be scanout, so flush the cache */
1124 i915_gem_object_flush_cpu_write_domain(obj
);
1126 drm_gem_object_unreference(&obj
->base
);
1128 mutex_unlock(&dev
->struct_mutex
);
1133 * Maps the contents of an object, returning the address it is mapped
1136 * While the mapping holds a reference on the contents of the object, it doesn't
1137 * imply a ref on the object itself.
1140 i915_gem_mmap_ioctl(struct drm_device
*dev
, void *data
,
1141 struct drm_file
*file
)
1143 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1144 struct drm_i915_gem_mmap
*args
= data
;
1145 struct drm_gem_object
*obj
;
1148 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1151 obj
= drm_gem_object_lookup(dev
, file
, args
->handle
);
1155 if (obj
->size
> dev_priv
->mm
.gtt_mappable_end
) {
1156 drm_gem_object_unreference_unlocked(obj
);
1160 down_write(¤t
->mm
->mmap_sem
);
1161 addr
= do_mmap(obj
->filp
, 0, args
->size
,
1162 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
1164 up_write(¤t
->mm
->mmap_sem
);
1165 drm_gem_object_unreference_unlocked(obj
);
1166 if (IS_ERR((void *)addr
))
1169 args
->addr_ptr
= (uint64_t) addr
;
1175 * i915_gem_fault - fault a page into the GTT
1176 * vma: VMA in question
1179 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1180 * from userspace. The fault handler takes care of binding the object to
1181 * the GTT (if needed), allocating and programming a fence register (again,
1182 * only if needed based on whether the old reg is still valid or the object
1183 * is tiled) and inserting a new PTE into the faulting process.
1185 * Note that the faulting process may involve evicting existing objects
1186 * from the GTT and/or fence registers to make room. So performance may
1187 * suffer if the GTT working set is large or there are few fence registers
1190 int i915_gem_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
1192 struct drm_i915_gem_object
*obj
= to_intel_bo(vma
->vm_private_data
);
1193 struct drm_device
*dev
= obj
->base
.dev
;
1194 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1195 pgoff_t page_offset
;
1198 bool write
= !!(vmf
->flags
& FAULT_FLAG_WRITE
);
1200 /* We don't use vmf->pgoff since that has the fake offset */
1201 page_offset
= ((unsigned long)vmf
->virtual_address
- vma
->vm_start
) >>
1204 ret
= i915_mutex_lock_interruptible(dev
);
1208 trace_i915_gem_object_fault(obj
, page_offset
, true, write
);
1210 /* Now bind it into the GTT if needed */
1211 if (!obj
->map_and_fenceable
) {
1212 ret
= i915_gem_object_unbind(obj
);
1216 if (!obj
->gtt_space
) {
1217 ret
= i915_gem_object_bind_to_gtt(obj
, 0, true);
1222 ret
= i915_gem_object_set_to_gtt_domain(obj
, write
);
1226 if (obj
->tiling_mode
== I915_TILING_NONE
)
1227 ret
= i915_gem_object_put_fence(obj
);
1229 ret
= i915_gem_object_get_fence(obj
, NULL
);
1233 if (i915_gem_object_is_inactive(obj
))
1234 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1236 obj
->fault_mappable
= true;
1238 pfn
= ((dev
->agp
->base
+ obj
->gtt_offset
) >> PAGE_SHIFT
) +
1241 /* Finally, remap it using the new GTT offset */
1242 ret
= vm_insert_pfn(vma
, (unsigned long)vmf
->virtual_address
, pfn
);
1244 mutex_unlock(&dev
->struct_mutex
);
1249 /* Give the error handler a chance to run and move the
1250 * objects off the GPU active list. Next time we service the
1251 * fault, we should be able to transition the page into the
1252 * GTT without touching the GPU (and so avoid further
1253 * EIO/EGAIN). If the GPU is wedged, then there is no issue
1254 * with coherency, just lost writes.
1260 return VM_FAULT_NOPAGE
;
1262 return VM_FAULT_OOM
;
1264 return VM_FAULT_SIGBUS
;
1269 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1270 * @obj: obj in question
1272 * GEM memory mapping works by handing back to userspace a fake mmap offset
1273 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1274 * up the object based on the offset and sets up the various memory mapping
1277 * This routine allocates and attaches a fake offset for @obj.
1280 i915_gem_create_mmap_offset(struct drm_i915_gem_object
*obj
)
1282 struct drm_device
*dev
= obj
->base
.dev
;
1283 struct drm_gem_mm
*mm
= dev
->mm_private
;
1284 struct drm_map_list
*list
;
1285 struct drm_local_map
*map
;
1288 /* Set the object up for mmap'ing */
1289 list
= &obj
->base
.map_list
;
1290 list
->map
= kzalloc(sizeof(struct drm_map_list
), GFP_KERNEL
);
1295 map
->type
= _DRM_GEM
;
1296 map
->size
= obj
->base
.size
;
1299 /* Get a DRM GEM mmap offset allocated... */
1300 list
->file_offset_node
= drm_mm_search_free(&mm
->offset_manager
,
1301 obj
->base
.size
/ PAGE_SIZE
,
1303 if (!list
->file_offset_node
) {
1304 DRM_ERROR("failed to allocate offset for bo %d\n",
1310 list
->file_offset_node
= drm_mm_get_block(list
->file_offset_node
,
1311 obj
->base
.size
/ PAGE_SIZE
,
1313 if (!list
->file_offset_node
) {
1318 list
->hash
.key
= list
->file_offset_node
->start
;
1319 ret
= drm_ht_insert_item(&mm
->offset_hash
, &list
->hash
);
1321 DRM_ERROR("failed to add to map hash\n");
1328 drm_mm_put_block(list
->file_offset_node
);
1337 * i915_gem_release_mmap - remove physical page mappings
1338 * @obj: obj in question
1340 * Preserve the reservation of the mmapping with the DRM core code, but
1341 * relinquish ownership of the pages back to the system.
1343 * It is vital that we remove the page mapping if we have mapped a tiled
1344 * object through the GTT and then lose the fence register due to
1345 * resource pressure. Similarly if the object has been moved out of the
1346 * aperture, than pages mapped into userspace must be revoked. Removing the
1347 * mapping will then trigger a page fault on the next user access, allowing
1348 * fixup by i915_gem_fault().
1351 i915_gem_release_mmap(struct drm_i915_gem_object
*obj
)
1353 if (!obj
->fault_mappable
)
1356 if (obj
->base
.dev
->dev_mapping
)
1357 unmap_mapping_range(obj
->base
.dev
->dev_mapping
,
1358 (loff_t
)obj
->base
.map_list
.hash
.key
<<PAGE_SHIFT
,
1361 obj
->fault_mappable
= false;
1365 i915_gem_free_mmap_offset(struct drm_i915_gem_object
*obj
)
1367 struct drm_device
*dev
= obj
->base
.dev
;
1368 struct drm_gem_mm
*mm
= dev
->mm_private
;
1369 struct drm_map_list
*list
= &obj
->base
.map_list
;
1371 drm_ht_remove_item(&mm
->offset_hash
, &list
->hash
);
1372 drm_mm_put_block(list
->file_offset_node
);
1378 i915_gem_get_gtt_size(struct drm_i915_gem_object
*obj
)
1380 struct drm_device
*dev
= obj
->base
.dev
;
1383 if (INTEL_INFO(dev
)->gen
>= 4 ||
1384 obj
->tiling_mode
== I915_TILING_NONE
)
1385 return obj
->base
.size
;
1387 /* Previous chips need a power-of-two fence region when tiling */
1388 if (INTEL_INFO(dev
)->gen
== 3)
1393 while (size
< obj
->base
.size
)
1400 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1401 * @obj: object to check
1403 * Return the required GTT alignment for an object, taking into account
1404 * potential fence register mapping.
1407 i915_gem_get_gtt_alignment(struct drm_i915_gem_object
*obj
)
1409 struct drm_device
*dev
= obj
->base
.dev
;
1412 * Minimum alignment is 4k (GTT page size), but might be greater
1413 * if a fence register is needed for the object.
1415 if (INTEL_INFO(dev
)->gen
>= 4 ||
1416 obj
->tiling_mode
== I915_TILING_NONE
)
1420 * Previous chips need to be aligned to the size of the smallest
1421 * fence register that can contain the object.
1423 return i915_gem_get_gtt_size(obj
);
1427 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1429 * @obj: object to check
1431 * Return the required GTT alignment for an object, only taking into account
1432 * unfenced tiled surface requirements.
1435 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object
*obj
)
1437 struct drm_device
*dev
= obj
->base
.dev
;
1441 * Minimum alignment is 4k (GTT page size) for sane hw.
1443 if (INTEL_INFO(dev
)->gen
>= 4 || IS_G33(dev
) ||
1444 obj
->tiling_mode
== I915_TILING_NONE
)
1448 * Older chips need unfenced tiled buffers to be aligned to the left
1449 * edge of an even tile row (where tile rows are counted as if the bo is
1450 * placed in a fenced gtt region).
1454 else if (obj
->tiling_mode
== I915_TILING_Y
&& HAS_128_BYTE_Y_TILING(dev
))
1459 return tile_height
* obj
->stride
* 2;
1463 i915_gem_mmap_gtt(struct drm_file
*file
,
1464 struct drm_device
*dev
,
1468 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1469 struct drm_i915_gem_object
*obj
;
1472 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1475 ret
= i915_mutex_lock_interruptible(dev
);
1479 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, handle
));
1480 if (&obj
->base
== NULL
) {
1485 if (obj
->base
.size
> dev_priv
->mm
.gtt_mappable_end
) {
1490 if (obj
->madv
!= I915_MADV_WILLNEED
) {
1491 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1496 if (!obj
->base
.map_list
.map
) {
1497 ret
= i915_gem_create_mmap_offset(obj
);
1502 *offset
= (u64
)obj
->base
.map_list
.hash
.key
<< PAGE_SHIFT
;
1505 drm_gem_object_unreference(&obj
->base
);
1507 mutex_unlock(&dev
->struct_mutex
);
1512 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1514 * @data: GTT mapping ioctl data
1515 * @file: GEM object info
1517 * Simply returns the fake offset to userspace so it can mmap it.
1518 * The mmap call will end up in drm_gem_mmap(), which will set things
1519 * up so we can get faults in the handler above.
1521 * The fault handler will take care of binding the object into the GTT
1522 * (since it may have been evicted to make room for something), allocating
1523 * a fence register, and mapping the appropriate aperture address into
1527 i915_gem_mmap_gtt_ioctl(struct drm_device
*dev
, void *data
,
1528 struct drm_file
*file
)
1530 struct drm_i915_gem_mmap_gtt
*args
= data
;
1532 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1535 return i915_gem_mmap_gtt(file
, dev
, args
->handle
, &args
->offset
);
1540 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object
*obj
,
1544 struct address_space
*mapping
;
1545 struct inode
*inode
;
1548 /* Get the list of pages out of our struct file. They'll be pinned
1549 * at this point until we release them.
1551 page_count
= obj
->base
.size
/ PAGE_SIZE
;
1552 BUG_ON(obj
->pages
!= NULL
);
1553 obj
->pages
= drm_malloc_ab(page_count
, sizeof(struct page
*));
1554 if (obj
->pages
== NULL
)
1557 inode
= obj
->base
.filp
->f_path
.dentry
->d_inode
;
1558 mapping
= inode
->i_mapping
;
1559 for (i
= 0; i
< page_count
; i
++) {
1560 page
= read_cache_page_gfp(mapping
, i
,
1568 obj
->pages
[i
] = page
;
1571 if (obj
->tiling_mode
!= I915_TILING_NONE
)
1572 i915_gem_object_do_bit_17_swizzle(obj
);
1578 page_cache_release(obj
->pages
[i
]);
1580 drm_free_large(obj
->pages
);
1582 return PTR_ERR(page
);
1586 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object
*obj
)
1588 int page_count
= obj
->base
.size
/ PAGE_SIZE
;
1591 BUG_ON(obj
->madv
== __I915_MADV_PURGED
);
1593 if (obj
->tiling_mode
!= I915_TILING_NONE
)
1594 i915_gem_object_save_bit_17_swizzle(obj
);
1596 if (obj
->madv
== I915_MADV_DONTNEED
)
1599 for (i
= 0; i
< page_count
; i
++) {
1601 set_page_dirty(obj
->pages
[i
]);
1603 if (obj
->madv
== I915_MADV_WILLNEED
)
1604 mark_page_accessed(obj
->pages
[i
]);
1606 page_cache_release(obj
->pages
[i
]);
1610 drm_free_large(obj
->pages
);
1615 i915_gem_object_move_to_active(struct drm_i915_gem_object
*obj
,
1616 struct intel_ring_buffer
*ring
,
1619 struct drm_device
*dev
= obj
->base
.dev
;
1620 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1622 BUG_ON(ring
== NULL
);
1625 /* Add a reference if we're newly entering the active list. */
1627 drm_gem_object_reference(&obj
->base
);
1631 /* Move from whatever list we were on to the tail of execution. */
1632 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.active_list
);
1633 list_move_tail(&obj
->ring_list
, &ring
->active_list
);
1635 obj
->last_rendering_seqno
= seqno
;
1636 if (obj
->fenced_gpu_access
) {
1637 struct drm_i915_fence_reg
*reg
;
1639 BUG_ON(obj
->fence_reg
== I915_FENCE_REG_NONE
);
1641 obj
->last_fenced_seqno
= seqno
;
1642 obj
->last_fenced_ring
= ring
;
1644 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
1645 list_move_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
1650 i915_gem_object_move_off_active(struct drm_i915_gem_object
*obj
)
1652 list_del_init(&obj
->ring_list
);
1653 obj
->last_rendering_seqno
= 0;
1657 i915_gem_object_move_to_flushing(struct drm_i915_gem_object
*obj
)
1659 struct drm_device
*dev
= obj
->base
.dev
;
1660 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1662 BUG_ON(!obj
->active
);
1663 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.flushing_list
);
1665 i915_gem_object_move_off_active(obj
);
1669 i915_gem_object_move_to_inactive(struct drm_i915_gem_object
*obj
)
1671 struct drm_device
*dev
= obj
->base
.dev
;
1672 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1674 if (obj
->pin_count
!= 0)
1675 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.pinned_list
);
1677 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1679 BUG_ON(!list_empty(&obj
->gpu_write_list
));
1680 BUG_ON(!obj
->active
);
1683 i915_gem_object_move_off_active(obj
);
1684 obj
->fenced_gpu_access
= false;
1687 obj
->pending_gpu_write
= false;
1688 drm_gem_object_unreference(&obj
->base
);
1690 WARN_ON(i915_verify_lists(dev
));
1693 /* Immediately discard the backing storage */
1695 i915_gem_object_truncate(struct drm_i915_gem_object
*obj
)
1697 struct inode
*inode
;
1699 /* Our goal here is to return as much of the memory as
1700 * is possible back to the system as we are called from OOM.
1701 * To do this we must instruct the shmfs to drop all of its
1702 * backing pages, *now*. Here we mirror the actions taken
1703 * when by shmem_delete_inode() to release the backing store.
1705 inode
= obj
->base
.filp
->f_path
.dentry
->d_inode
;
1706 truncate_inode_pages(inode
->i_mapping
, 0);
1707 if (inode
->i_op
->truncate_range
)
1708 inode
->i_op
->truncate_range(inode
, 0, (loff_t
)-1);
1710 obj
->madv
= __I915_MADV_PURGED
;
1714 i915_gem_object_is_purgeable(struct drm_i915_gem_object
*obj
)
1716 return obj
->madv
== I915_MADV_DONTNEED
;
1720 i915_gem_process_flushing_list(struct intel_ring_buffer
*ring
,
1721 uint32_t flush_domains
)
1723 struct drm_i915_gem_object
*obj
, *next
;
1725 list_for_each_entry_safe(obj
, next
,
1726 &ring
->gpu_write_list
,
1728 if (obj
->base
.write_domain
& flush_domains
) {
1729 uint32_t old_write_domain
= obj
->base
.write_domain
;
1731 obj
->base
.write_domain
= 0;
1732 list_del_init(&obj
->gpu_write_list
);
1733 i915_gem_object_move_to_active(obj
, ring
,
1734 i915_gem_next_request_seqno(ring
));
1736 trace_i915_gem_object_change_domain(obj
,
1737 obj
->base
.read_domains
,
1744 i915_add_request(struct intel_ring_buffer
*ring
,
1745 struct drm_file
*file
,
1746 struct drm_i915_gem_request
*request
)
1748 drm_i915_private_t
*dev_priv
= ring
->dev
->dev_private
;
1753 BUG_ON(request
== NULL
);
1755 ret
= ring
->add_request(ring
, &seqno
);
1759 trace_i915_gem_request_add(ring
, seqno
);
1761 request
->seqno
= seqno
;
1762 request
->ring
= ring
;
1763 request
->emitted_jiffies
= jiffies
;
1764 was_empty
= list_empty(&ring
->request_list
);
1765 list_add_tail(&request
->list
, &ring
->request_list
);
1768 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
1770 spin_lock(&file_priv
->mm
.lock
);
1771 request
->file_priv
= file_priv
;
1772 list_add_tail(&request
->client_list
,
1773 &file_priv
->mm
.request_list
);
1774 spin_unlock(&file_priv
->mm
.lock
);
1777 ring
->outstanding_lazy_request
= false;
1779 if (!dev_priv
->mm
.suspended
) {
1780 mod_timer(&dev_priv
->hangcheck_timer
,
1781 jiffies
+ msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD
));
1783 queue_delayed_work(dev_priv
->wq
,
1784 &dev_priv
->mm
.retire_work
, HZ
);
1790 i915_gem_request_remove_from_client(struct drm_i915_gem_request
*request
)
1792 struct drm_i915_file_private
*file_priv
= request
->file_priv
;
1797 spin_lock(&file_priv
->mm
.lock
);
1798 if (request
->file_priv
) {
1799 list_del(&request
->client_list
);
1800 request
->file_priv
= NULL
;
1802 spin_unlock(&file_priv
->mm
.lock
);
1805 static void i915_gem_reset_ring_lists(struct drm_i915_private
*dev_priv
,
1806 struct intel_ring_buffer
*ring
)
1808 while (!list_empty(&ring
->request_list
)) {
1809 struct drm_i915_gem_request
*request
;
1811 request
= list_first_entry(&ring
->request_list
,
1812 struct drm_i915_gem_request
,
1815 list_del(&request
->list
);
1816 i915_gem_request_remove_from_client(request
);
1820 while (!list_empty(&ring
->active_list
)) {
1821 struct drm_i915_gem_object
*obj
;
1823 obj
= list_first_entry(&ring
->active_list
,
1824 struct drm_i915_gem_object
,
1827 obj
->base
.write_domain
= 0;
1828 list_del_init(&obj
->gpu_write_list
);
1829 i915_gem_object_move_to_inactive(obj
);
1833 static void i915_gem_reset_fences(struct drm_device
*dev
)
1835 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1838 for (i
= 0; i
< 16; i
++) {
1839 struct drm_i915_fence_reg
*reg
= &dev_priv
->fence_regs
[i
];
1840 struct drm_i915_gem_object
*obj
= reg
->obj
;
1845 if (obj
->tiling_mode
)
1846 i915_gem_release_mmap(obj
);
1848 reg
->obj
->fence_reg
= I915_FENCE_REG_NONE
;
1849 reg
->obj
->fenced_gpu_access
= false;
1850 reg
->obj
->last_fenced_seqno
= 0;
1851 reg
->obj
->last_fenced_ring
= NULL
;
1852 i915_gem_clear_fence_reg(dev
, reg
);
1856 void i915_gem_reset(struct drm_device
*dev
)
1858 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1859 struct drm_i915_gem_object
*obj
;
1862 for (i
= 0; i
< I915_NUM_RINGS
; i
++)
1863 i915_gem_reset_ring_lists(dev_priv
, &dev_priv
->ring
[i
]);
1865 /* Remove anything from the flushing lists. The GPU cache is likely
1866 * to be lost on reset along with the data, so simply move the
1867 * lost bo to the inactive list.
1869 while (!list_empty(&dev_priv
->mm
.flushing_list
)) {
1870 obj
= list_first_entry(&dev_priv
->mm
.flushing_list
,
1871 struct drm_i915_gem_object
,
1874 obj
->base
.write_domain
= 0;
1875 list_del_init(&obj
->gpu_write_list
);
1876 i915_gem_object_move_to_inactive(obj
);
1879 /* Move everything out of the GPU domains to ensure we do any
1880 * necessary invalidation upon reuse.
1882 list_for_each_entry(obj
,
1883 &dev_priv
->mm
.inactive_list
,
1886 obj
->base
.read_domains
&= ~I915_GEM_GPU_DOMAINS
;
1889 /* The fence registers are invalidated so clear them out */
1890 i915_gem_reset_fences(dev
);
1894 * This function clears the request list as sequence numbers are passed.
1897 i915_gem_retire_requests_ring(struct intel_ring_buffer
*ring
)
1902 if (list_empty(&ring
->request_list
))
1905 WARN_ON(i915_verify_lists(ring
->dev
));
1907 seqno
= ring
->get_seqno(ring
);
1909 for (i
= 0; i
< ARRAY_SIZE(ring
->sync_seqno
); i
++)
1910 if (seqno
>= ring
->sync_seqno
[i
])
1911 ring
->sync_seqno
[i
] = 0;
1913 while (!list_empty(&ring
->request_list
)) {
1914 struct drm_i915_gem_request
*request
;
1916 request
= list_first_entry(&ring
->request_list
,
1917 struct drm_i915_gem_request
,
1920 if (!i915_seqno_passed(seqno
, request
->seqno
))
1923 trace_i915_gem_request_retire(ring
, request
->seqno
);
1925 list_del(&request
->list
);
1926 i915_gem_request_remove_from_client(request
);
1930 /* Move any buffers on the active list that are no longer referenced
1931 * by the ringbuffer to the flushing/inactive lists as appropriate.
1933 while (!list_empty(&ring
->active_list
)) {
1934 struct drm_i915_gem_object
*obj
;
1936 obj
= list_first_entry(&ring
->active_list
,
1937 struct drm_i915_gem_object
,
1940 if (!i915_seqno_passed(seqno
, obj
->last_rendering_seqno
))
1943 if (obj
->base
.write_domain
!= 0)
1944 i915_gem_object_move_to_flushing(obj
);
1946 i915_gem_object_move_to_inactive(obj
);
1949 if (unlikely(ring
->trace_irq_seqno
&&
1950 i915_seqno_passed(seqno
, ring
->trace_irq_seqno
))) {
1951 ring
->irq_put(ring
);
1952 ring
->trace_irq_seqno
= 0;
1955 WARN_ON(i915_verify_lists(ring
->dev
));
1959 i915_gem_retire_requests(struct drm_device
*dev
)
1961 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1964 if (!list_empty(&dev_priv
->mm
.deferred_free_list
)) {
1965 struct drm_i915_gem_object
*obj
, *next
;
1967 /* We must be careful that during unbind() we do not
1968 * accidentally infinitely recurse into retire requests.
1970 * retire -> free -> unbind -> wait -> retire_ring
1972 list_for_each_entry_safe(obj
, next
,
1973 &dev_priv
->mm
.deferred_free_list
,
1975 i915_gem_free_object_tail(obj
);
1978 for (i
= 0; i
< I915_NUM_RINGS
; i
++)
1979 i915_gem_retire_requests_ring(&dev_priv
->ring
[i
]);
1983 i915_gem_retire_work_handler(struct work_struct
*work
)
1985 drm_i915_private_t
*dev_priv
;
1986 struct drm_device
*dev
;
1990 dev_priv
= container_of(work
, drm_i915_private_t
,
1991 mm
.retire_work
.work
);
1992 dev
= dev_priv
->dev
;
1994 /* Come back later if the device is busy... */
1995 if (!mutex_trylock(&dev
->struct_mutex
)) {
1996 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, HZ
);
2000 i915_gem_retire_requests(dev
);
2002 /* Send a periodic flush down the ring so we don't hold onto GEM
2003 * objects indefinitely.
2006 for (i
= 0; i
< I915_NUM_RINGS
; i
++) {
2007 struct intel_ring_buffer
*ring
= &dev_priv
->ring
[i
];
2009 if (!list_empty(&ring
->gpu_write_list
)) {
2010 struct drm_i915_gem_request
*request
;
2013 ret
= i915_gem_flush_ring(ring
,
2014 0, I915_GEM_GPU_DOMAINS
);
2015 request
= kzalloc(sizeof(*request
), GFP_KERNEL
);
2016 if (ret
|| request
== NULL
||
2017 i915_add_request(ring
, NULL
, request
))
2021 idle
&= list_empty(&ring
->request_list
);
2024 if (!dev_priv
->mm
.suspended
&& !idle
)
2025 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, HZ
);
2027 mutex_unlock(&dev
->struct_mutex
);
2031 * Waits for a sequence number to be signaled, and cleans up the
2032 * request and object lists appropriately for that event.
2035 i915_wait_request(struct intel_ring_buffer
*ring
,
2038 drm_i915_private_t
*dev_priv
= ring
->dev
->dev_private
;
2044 if (atomic_read(&dev_priv
->mm
.wedged
)) {
2045 struct completion
*x
= &dev_priv
->error_completion
;
2046 bool recovery_complete
;
2047 unsigned long flags
;
2049 /* Give the error handler a chance to run. */
2050 spin_lock_irqsave(&x
->wait
.lock
, flags
);
2051 recovery_complete
= x
->done
> 0;
2052 spin_unlock_irqrestore(&x
->wait
.lock
, flags
);
2054 return recovery_complete
? -EIO
: -EAGAIN
;
2057 if (seqno
== ring
->outstanding_lazy_request
) {
2058 struct drm_i915_gem_request
*request
;
2060 request
= kzalloc(sizeof(*request
), GFP_KERNEL
);
2061 if (request
== NULL
)
2064 ret
= i915_add_request(ring
, NULL
, request
);
2070 seqno
= request
->seqno
;
2073 if (!i915_seqno_passed(ring
->get_seqno(ring
), seqno
)) {
2074 if (HAS_PCH_SPLIT(ring
->dev
))
2075 ier
= I915_READ(DEIER
) | I915_READ(GTIER
);
2077 ier
= I915_READ(IER
);
2079 DRM_ERROR("something (likely vbetool) disabled "
2080 "interrupts, re-enabling\n");
2081 i915_driver_irq_preinstall(ring
->dev
);
2082 i915_driver_irq_postinstall(ring
->dev
);
2085 trace_i915_gem_request_wait_begin(ring
, seqno
);
2087 ring
->waiting_seqno
= seqno
;
2088 if (ring
->irq_get(ring
)) {
2089 if (dev_priv
->mm
.interruptible
)
2090 ret
= wait_event_interruptible(ring
->irq_queue
,
2091 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
2092 || atomic_read(&dev_priv
->mm
.wedged
));
2094 wait_event(ring
->irq_queue
,
2095 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
2096 || atomic_read(&dev_priv
->mm
.wedged
));
2098 ring
->irq_put(ring
);
2099 } else if (wait_for(i915_seqno_passed(ring
->get_seqno(ring
),
2101 atomic_read(&dev_priv
->mm
.wedged
), 3000))
2103 ring
->waiting_seqno
= 0;
2105 trace_i915_gem_request_wait_end(ring
, seqno
);
2107 if (atomic_read(&dev_priv
->mm
.wedged
))
2110 if (ret
&& ret
!= -ERESTARTSYS
)
2111 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2112 __func__
, ret
, seqno
, ring
->get_seqno(ring
),
2113 dev_priv
->next_seqno
);
2115 /* Directly dispatch request retiring. While we have the work queue
2116 * to handle this, the waiter on a request often wants an associated
2117 * buffer to have made it to the inactive list, and we would need
2118 * a separate wait queue to handle that.
2121 i915_gem_retire_requests_ring(ring
);
2127 * Ensures that all rendering to the object has completed and the object is
2128 * safe to unbind from the GTT or access from the CPU.
2131 i915_gem_object_wait_rendering(struct drm_i915_gem_object
*obj
)
2135 /* This function only exists to support waiting for existing rendering,
2136 * not for emitting required flushes.
2138 BUG_ON((obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) != 0);
2140 /* If there is rendering queued on the buffer being evicted, wait for
2144 ret
= i915_wait_request(obj
->ring
, obj
->last_rendering_seqno
);
2152 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object
*obj
)
2154 u32 old_write_domain
, old_read_domains
;
2156 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_GTT
) == 0)
2159 /* Act a barrier for all accesses through the GTT */
2162 /* Force a pagefault for domain tracking on next user access */
2163 i915_gem_release_mmap(obj
);
2165 old_read_domains
= obj
->base
.read_domains
;
2166 old_write_domain
= obj
->base
.write_domain
;
2168 obj
->base
.read_domains
&= ~I915_GEM_DOMAIN_GTT
;
2169 obj
->base
.write_domain
&= ~I915_GEM_DOMAIN_GTT
;
2171 trace_i915_gem_object_change_domain(obj
,
2177 * Unbinds an object from the GTT aperture.
2180 i915_gem_object_unbind(struct drm_i915_gem_object
*obj
)
2184 if (obj
->gtt_space
== NULL
)
2187 if (obj
->pin_count
!= 0) {
2188 DRM_ERROR("Attempting to unbind pinned buffer\n");
2192 ret
= i915_gem_object_finish_gpu(obj
);
2193 if (ret
== -ERESTARTSYS
)
2195 /* Continue on if we fail due to EIO, the GPU is hung so we
2196 * should be safe and we need to cleanup or else we might
2197 * cause memory corruption through use-after-free.
2200 i915_gem_object_finish_gtt(obj
);
2202 /* Move the object to the CPU domain to ensure that
2203 * any possible CPU writes while it's not in the GTT
2204 * are flushed when we go to remap it.
2207 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
2208 if (ret
== -ERESTARTSYS
)
2211 /* In the event of a disaster, abandon all caches and
2212 * hope for the best.
2214 i915_gem_clflush_object(obj
);
2215 obj
->base
.read_domains
= obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
2218 /* release the fence reg _after_ flushing */
2219 ret
= i915_gem_object_put_fence(obj
);
2220 if (ret
== -ERESTARTSYS
)
2223 trace_i915_gem_object_unbind(obj
);
2225 i915_gem_gtt_unbind_object(obj
);
2226 i915_gem_object_put_pages_gtt(obj
);
2228 list_del_init(&obj
->gtt_list
);
2229 list_del_init(&obj
->mm_list
);
2230 /* Avoid an unnecessary call to unbind on rebind. */
2231 obj
->map_and_fenceable
= true;
2233 drm_mm_put_block(obj
->gtt_space
);
2234 obj
->gtt_space
= NULL
;
2235 obj
->gtt_offset
= 0;
2237 if (i915_gem_object_is_purgeable(obj
))
2238 i915_gem_object_truncate(obj
);
2244 i915_gem_flush_ring(struct intel_ring_buffer
*ring
,
2245 uint32_t invalidate_domains
,
2246 uint32_t flush_domains
)
2250 if (((invalidate_domains
| flush_domains
) & I915_GEM_GPU_DOMAINS
) == 0)
2253 trace_i915_gem_ring_flush(ring
, invalidate_domains
, flush_domains
);
2255 ret
= ring
->flush(ring
, invalidate_domains
, flush_domains
);
2259 if (flush_domains
& I915_GEM_GPU_DOMAINS
)
2260 i915_gem_process_flushing_list(ring
, flush_domains
);
2265 static int i915_ring_idle(struct intel_ring_buffer
*ring
)
2269 if (list_empty(&ring
->gpu_write_list
) && list_empty(&ring
->active_list
))
2272 if (!list_empty(&ring
->gpu_write_list
)) {
2273 ret
= i915_gem_flush_ring(ring
,
2274 I915_GEM_GPU_DOMAINS
, I915_GEM_GPU_DOMAINS
);
2279 return i915_wait_request(ring
, i915_gem_next_request_seqno(ring
));
2283 i915_gpu_idle(struct drm_device
*dev
)
2285 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2289 lists_empty
= (list_empty(&dev_priv
->mm
.flushing_list
) &&
2290 list_empty(&dev_priv
->mm
.active_list
));
2294 /* Flush everything onto the inactive list. */
2295 for (i
= 0; i
< I915_NUM_RINGS
; i
++) {
2296 ret
= i915_ring_idle(&dev_priv
->ring
[i
]);
2304 static int sandybridge_write_fence_reg(struct drm_i915_gem_object
*obj
,
2305 struct intel_ring_buffer
*pipelined
)
2307 struct drm_device
*dev
= obj
->base
.dev
;
2308 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2309 u32 size
= obj
->gtt_space
->size
;
2310 int regnum
= obj
->fence_reg
;
2313 val
= (uint64_t)((obj
->gtt_offset
+ size
- 4096) &
2315 val
|= obj
->gtt_offset
& 0xfffff000;
2316 val
|= (uint64_t)((obj
->stride
/ 128) - 1) <<
2317 SANDYBRIDGE_FENCE_PITCH_SHIFT
;
2319 if (obj
->tiling_mode
== I915_TILING_Y
)
2320 val
|= 1 << I965_FENCE_TILING_Y_SHIFT
;
2321 val
|= I965_FENCE_REG_VALID
;
2324 int ret
= intel_ring_begin(pipelined
, 6);
2328 intel_ring_emit(pipelined
, MI_NOOP
);
2329 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(2));
2330 intel_ring_emit(pipelined
, FENCE_REG_SANDYBRIDGE_0
+ regnum
*8);
2331 intel_ring_emit(pipelined
, (u32
)val
);
2332 intel_ring_emit(pipelined
, FENCE_REG_SANDYBRIDGE_0
+ regnum
*8 + 4);
2333 intel_ring_emit(pipelined
, (u32
)(val
>> 32));
2334 intel_ring_advance(pipelined
);
2336 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+ regnum
* 8, val
);
2341 static int i965_write_fence_reg(struct drm_i915_gem_object
*obj
,
2342 struct intel_ring_buffer
*pipelined
)
2344 struct drm_device
*dev
= obj
->base
.dev
;
2345 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2346 u32 size
= obj
->gtt_space
->size
;
2347 int regnum
= obj
->fence_reg
;
2350 val
= (uint64_t)((obj
->gtt_offset
+ size
- 4096) &
2352 val
|= obj
->gtt_offset
& 0xfffff000;
2353 val
|= ((obj
->stride
/ 128) - 1) << I965_FENCE_PITCH_SHIFT
;
2354 if (obj
->tiling_mode
== I915_TILING_Y
)
2355 val
|= 1 << I965_FENCE_TILING_Y_SHIFT
;
2356 val
|= I965_FENCE_REG_VALID
;
2359 int ret
= intel_ring_begin(pipelined
, 6);
2363 intel_ring_emit(pipelined
, MI_NOOP
);
2364 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(2));
2365 intel_ring_emit(pipelined
, FENCE_REG_965_0
+ regnum
*8);
2366 intel_ring_emit(pipelined
, (u32
)val
);
2367 intel_ring_emit(pipelined
, FENCE_REG_965_0
+ regnum
*8 + 4);
2368 intel_ring_emit(pipelined
, (u32
)(val
>> 32));
2369 intel_ring_advance(pipelined
);
2371 I915_WRITE64(FENCE_REG_965_0
+ regnum
* 8, val
);
2376 static int i915_write_fence_reg(struct drm_i915_gem_object
*obj
,
2377 struct intel_ring_buffer
*pipelined
)
2379 struct drm_device
*dev
= obj
->base
.dev
;
2380 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2381 u32 size
= obj
->gtt_space
->size
;
2382 u32 fence_reg
, val
, pitch_val
;
2385 if (WARN((obj
->gtt_offset
& ~I915_FENCE_START_MASK
) ||
2386 (size
& -size
) != size
||
2387 (obj
->gtt_offset
& (size
- 1)),
2388 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2389 obj
->gtt_offset
, obj
->map_and_fenceable
, size
))
2392 if (obj
->tiling_mode
== I915_TILING_Y
&& HAS_128_BYTE_Y_TILING(dev
))
2397 /* Note: pitch better be a power of two tile widths */
2398 pitch_val
= obj
->stride
/ tile_width
;
2399 pitch_val
= ffs(pitch_val
) - 1;
2401 val
= obj
->gtt_offset
;
2402 if (obj
->tiling_mode
== I915_TILING_Y
)
2403 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
2404 val
|= I915_FENCE_SIZE_BITS(size
);
2405 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
2406 val
|= I830_FENCE_REG_VALID
;
2408 fence_reg
= obj
->fence_reg
;
2410 fence_reg
= FENCE_REG_830_0
+ fence_reg
* 4;
2412 fence_reg
= FENCE_REG_945_8
+ (fence_reg
- 8) * 4;
2415 int ret
= intel_ring_begin(pipelined
, 4);
2419 intel_ring_emit(pipelined
, MI_NOOP
);
2420 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(1));
2421 intel_ring_emit(pipelined
, fence_reg
);
2422 intel_ring_emit(pipelined
, val
);
2423 intel_ring_advance(pipelined
);
2425 I915_WRITE(fence_reg
, val
);
2430 static int i830_write_fence_reg(struct drm_i915_gem_object
*obj
,
2431 struct intel_ring_buffer
*pipelined
)
2433 struct drm_device
*dev
= obj
->base
.dev
;
2434 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2435 u32 size
= obj
->gtt_space
->size
;
2436 int regnum
= obj
->fence_reg
;
2440 if (WARN((obj
->gtt_offset
& ~I830_FENCE_START_MASK
) ||
2441 (size
& -size
) != size
||
2442 (obj
->gtt_offset
& (size
- 1)),
2443 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2444 obj
->gtt_offset
, size
))
2447 pitch_val
= obj
->stride
/ 128;
2448 pitch_val
= ffs(pitch_val
) - 1;
2450 val
= obj
->gtt_offset
;
2451 if (obj
->tiling_mode
== I915_TILING_Y
)
2452 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
2453 val
|= I830_FENCE_SIZE_BITS(size
);
2454 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
2455 val
|= I830_FENCE_REG_VALID
;
2458 int ret
= intel_ring_begin(pipelined
, 4);
2462 intel_ring_emit(pipelined
, MI_NOOP
);
2463 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(1));
2464 intel_ring_emit(pipelined
, FENCE_REG_830_0
+ regnum
*4);
2465 intel_ring_emit(pipelined
, val
);
2466 intel_ring_advance(pipelined
);
2468 I915_WRITE(FENCE_REG_830_0
+ regnum
* 4, val
);
2473 static bool ring_passed_seqno(struct intel_ring_buffer
*ring
, u32 seqno
)
2475 return i915_seqno_passed(ring
->get_seqno(ring
), seqno
);
2479 i915_gem_object_flush_fence(struct drm_i915_gem_object
*obj
,
2480 struct intel_ring_buffer
*pipelined
)
2484 if (obj
->fenced_gpu_access
) {
2485 if (obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) {
2486 ret
= i915_gem_flush_ring(obj
->last_fenced_ring
,
2487 0, obj
->base
.write_domain
);
2492 obj
->fenced_gpu_access
= false;
2495 if (obj
->last_fenced_seqno
&& pipelined
!= obj
->last_fenced_ring
) {
2496 if (!ring_passed_seqno(obj
->last_fenced_ring
,
2497 obj
->last_fenced_seqno
)) {
2498 ret
= i915_wait_request(obj
->last_fenced_ring
,
2499 obj
->last_fenced_seqno
);
2504 obj
->last_fenced_seqno
= 0;
2505 obj
->last_fenced_ring
= NULL
;
2508 /* Ensure that all CPU reads are completed before installing a fence
2509 * and all writes before removing the fence.
2511 if (obj
->base
.read_domains
& I915_GEM_DOMAIN_GTT
)
2518 i915_gem_object_put_fence(struct drm_i915_gem_object
*obj
)
2522 if (obj
->tiling_mode
)
2523 i915_gem_release_mmap(obj
);
2525 ret
= i915_gem_object_flush_fence(obj
, NULL
);
2529 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
2530 struct drm_i915_private
*dev_priv
= obj
->base
.dev
->dev_private
;
2531 i915_gem_clear_fence_reg(obj
->base
.dev
,
2532 &dev_priv
->fence_regs
[obj
->fence_reg
]);
2534 obj
->fence_reg
= I915_FENCE_REG_NONE
;
2540 static struct drm_i915_fence_reg
*
2541 i915_find_fence_reg(struct drm_device
*dev
,
2542 struct intel_ring_buffer
*pipelined
)
2544 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2545 struct drm_i915_fence_reg
*reg
, *first
, *avail
;
2548 /* First try to find a free reg */
2550 for (i
= dev_priv
->fence_reg_start
; i
< dev_priv
->num_fence_regs
; i
++) {
2551 reg
= &dev_priv
->fence_regs
[i
];
2555 if (!reg
->obj
->pin_count
)
2562 /* None available, try to steal one or wait for a user to finish */
2563 avail
= first
= NULL
;
2564 list_for_each_entry(reg
, &dev_priv
->mm
.fence_list
, lru_list
) {
2565 if (reg
->obj
->pin_count
)
2572 !reg
->obj
->last_fenced_ring
||
2573 reg
->obj
->last_fenced_ring
== pipelined
) {
2586 * i915_gem_object_get_fence - set up a fence reg for an object
2587 * @obj: object to map through a fence reg
2588 * @pipelined: ring on which to queue the change, or NULL for CPU access
2589 * @interruptible: must we wait uninterruptibly for the register to retire?
2591 * When mapping objects through the GTT, userspace wants to be able to write
2592 * to them without having to worry about swizzling if the object is tiled.
2594 * This function walks the fence regs looking for a free one for @obj,
2595 * stealing one if it can't find any.
2597 * It then sets up the reg based on the object's properties: address, pitch
2598 * and tiling format.
2601 i915_gem_object_get_fence(struct drm_i915_gem_object
*obj
,
2602 struct intel_ring_buffer
*pipelined
)
2604 struct drm_device
*dev
= obj
->base
.dev
;
2605 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2606 struct drm_i915_fence_reg
*reg
;
2609 /* XXX disable pipelining. There are bugs. Shocking. */
2612 /* Just update our place in the LRU if our fence is getting reused. */
2613 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
2614 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2615 list_move_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
2617 if (obj
->tiling_changed
) {
2618 ret
= i915_gem_object_flush_fence(obj
, pipelined
);
2622 if (!obj
->fenced_gpu_access
&& !obj
->last_fenced_seqno
)
2627 i915_gem_next_request_seqno(pipelined
);
2628 obj
->last_fenced_seqno
= reg
->setup_seqno
;
2629 obj
->last_fenced_ring
= pipelined
;
2636 if (reg
->setup_seqno
) {
2637 if (!ring_passed_seqno(obj
->last_fenced_ring
,
2638 reg
->setup_seqno
)) {
2639 ret
= i915_wait_request(obj
->last_fenced_ring
,
2645 reg
->setup_seqno
= 0;
2647 } else if (obj
->last_fenced_ring
&&
2648 obj
->last_fenced_ring
!= pipelined
) {
2649 ret
= i915_gem_object_flush_fence(obj
, pipelined
);
2657 reg
= i915_find_fence_reg(dev
, pipelined
);
2661 ret
= i915_gem_object_flush_fence(obj
, pipelined
);
2666 struct drm_i915_gem_object
*old
= reg
->obj
;
2668 drm_gem_object_reference(&old
->base
);
2670 if (old
->tiling_mode
)
2671 i915_gem_release_mmap(old
);
2673 ret
= i915_gem_object_flush_fence(old
, pipelined
);
2675 drm_gem_object_unreference(&old
->base
);
2679 if (old
->last_fenced_seqno
== 0 && obj
->last_fenced_seqno
== 0)
2682 old
->fence_reg
= I915_FENCE_REG_NONE
;
2683 old
->last_fenced_ring
= pipelined
;
2684 old
->last_fenced_seqno
=
2685 pipelined
? i915_gem_next_request_seqno(pipelined
) : 0;
2687 drm_gem_object_unreference(&old
->base
);
2688 } else if (obj
->last_fenced_seqno
== 0)
2692 list_move_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
2693 obj
->fence_reg
= reg
- dev_priv
->fence_regs
;
2694 obj
->last_fenced_ring
= pipelined
;
2697 pipelined
? i915_gem_next_request_seqno(pipelined
) : 0;
2698 obj
->last_fenced_seqno
= reg
->setup_seqno
;
2701 obj
->tiling_changed
= false;
2702 switch (INTEL_INFO(dev
)->gen
) {
2705 ret
= sandybridge_write_fence_reg(obj
, pipelined
);
2709 ret
= i965_write_fence_reg(obj
, pipelined
);
2712 ret
= i915_write_fence_reg(obj
, pipelined
);
2715 ret
= i830_write_fence_reg(obj
, pipelined
);
2723 * i915_gem_clear_fence_reg - clear out fence register info
2724 * @obj: object to clear
2726 * Zeroes out the fence register itself and clears out the associated
2727 * data structures in dev_priv and obj.
2730 i915_gem_clear_fence_reg(struct drm_device
*dev
,
2731 struct drm_i915_fence_reg
*reg
)
2733 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2734 uint32_t fence_reg
= reg
- dev_priv
->fence_regs
;
2736 switch (INTEL_INFO(dev
)->gen
) {
2739 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+ fence_reg
*8, 0);
2743 I915_WRITE64(FENCE_REG_965_0
+ fence_reg
*8, 0);
2747 fence_reg
= FENCE_REG_945_8
+ (fence_reg
- 8) * 4;
2750 fence_reg
= FENCE_REG_830_0
+ fence_reg
* 4;
2752 I915_WRITE(fence_reg
, 0);
2756 list_del_init(®
->lru_list
);
2758 reg
->setup_seqno
= 0;
2762 * Finds free space in the GTT aperture and binds the object there.
2765 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object
*obj
,
2767 bool map_and_fenceable
)
2769 struct drm_device
*dev
= obj
->base
.dev
;
2770 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2771 struct drm_mm_node
*free_space
;
2772 gfp_t gfpmask
= __GFP_NORETRY
| __GFP_NOWARN
;
2773 u32 size
, fence_size
, fence_alignment
, unfenced_alignment
;
2774 bool mappable
, fenceable
;
2777 if (obj
->madv
!= I915_MADV_WILLNEED
) {
2778 DRM_ERROR("Attempting to bind a purgeable object\n");
2782 fence_size
= i915_gem_get_gtt_size(obj
);
2783 fence_alignment
= i915_gem_get_gtt_alignment(obj
);
2784 unfenced_alignment
= i915_gem_get_unfenced_gtt_alignment(obj
);
2787 alignment
= map_and_fenceable
? fence_alignment
:
2789 if (map_and_fenceable
&& alignment
& (fence_alignment
- 1)) {
2790 DRM_ERROR("Invalid object alignment requested %u\n", alignment
);
2794 size
= map_and_fenceable
? fence_size
: obj
->base
.size
;
2796 /* If the object is bigger than the entire aperture, reject it early
2797 * before evicting everything in a vain attempt to find space.
2799 if (obj
->base
.size
>
2800 (map_and_fenceable
? dev_priv
->mm
.gtt_mappable_end
: dev_priv
->mm
.gtt_total
)) {
2801 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2806 if (map_and_fenceable
)
2808 drm_mm_search_free_in_range(&dev_priv
->mm
.gtt_space
,
2810 dev_priv
->mm
.gtt_mappable_end
,
2813 free_space
= drm_mm_search_free(&dev_priv
->mm
.gtt_space
,
2814 size
, alignment
, 0);
2816 if (free_space
!= NULL
) {
2817 if (map_and_fenceable
)
2819 drm_mm_get_block_range_generic(free_space
,
2821 dev_priv
->mm
.gtt_mappable_end
,
2825 drm_mm_get_block(free_space
, size
, alignment
);
2827 if (obj
->gtt_space
== NULL
) {
2828 /* If the gtt is empty and we're still having trouble
2829 * fitting our object in, we're out of memory.
2831 ret
= i915_gem_evict_something(dev
, size
, alignment
,
2839 ret
= i915_gem_object_get_pages_gtt(obj
, gfpmask
);
2841 drm_mm_put_block(obj
->gtt_space
);
2842 obj
->gtt_space
= NULL
;
2844 if (ret
== -ENOMEM
) {
2845 /* first try to reclaim some memory by clearing the GTT */
2846 ret
= i915_gem_evict_everything(dev
, false);
2848 /* now try to shrink everyone else */
2863 ret
= i915_gem_gtt_bind_object(obj
);
2865 i915_gem_object_put_pages_gtt(obj
);
2866 drm_mm_put_block(obj
->gtt_space
);
2867 obj
->gtt_space
= NULL
;
2869 if (i915_gem_evict_everything(dev
, false))
2875 list_add_tail(&obj
->gtt_list
, &dev_priv
->mm
.gtt_list
);
2876 list_add_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
2878 /* Assert that the object is not currently in any GPU domain. As it
2879 * wasn't in the GTT, there shouldn't be any way it could have been in
2882 BUG_ON(obj
->base
.read_domains
& I915_GEM_GPU_DOMAINS
);
2883 BUG_ON(obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
);
2885 obj
->gtt_offset
= obj
->gtt_space
->start
;
2888 obj
->gtt_space
->size
== fence_size
&&
2889 (obj
->gtt_space
->start
& (fence_alignment
-1)) == 0;
2892 obj
->gtt_offset
+ obj
->base
.size
<= dev_priv
->mm
.gtt_mappable_end
;
2894 obj
->map_and_fenceable
= mappable
&& fenceable
;
2896 trace_i915_gem_object_bind(obj
, map_and_fenceable
);
2901 i915_gem_clflush_object(struct drm_i915_gem_object
*obj
)
2903 /* If we don't have a page list set up, then we're not pinned
2904 * to GPU, and we can ignore the cache flush because it'll happen
2905 * again at bind time.
2907 if (obj
->pages
== NULL
)
2910 /* If the GPU is snooping the contents of the CPU cache,
2911 * we do not need to manually clear the CPU cache lines. However,
2912 * the caches are only snooped when the render cache is
2913 * flushed/invalidated. As we always have to emit invalidations
2914 * and flushes when moving into and out of the RENDER domain, correct
2915 * snooping behaviour occurs naturally as the result of our domain
2918 if (obj
->cache_level
!= I915_CACHE_NONE
)
2921 trace_i915_gem_object_clflush(obj
);
2923 drm_clflush_pages(obj
->pages
, obj
->base
.size
/ PAGE_SIZE
);
2926 /** Flushes any GPU write domain for the object if it's dirty. */
2928 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object
*obj
)
2930 if ((obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) == 0)
2933 /* Queue the GPU write cache flushing we need. */
2934 return i915_gem_flush_ring(obj
->ring
, 0, obj
->base
.write_domain
);
2937 /** Flushes the GTT write domain for the object if it's dirty. */
2939 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
)
2941 uint32_t old_write_domain
;
2943 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_GTT
)
2946 /* No actual flushing is required for the GTT write domain. Writes
2947 * to it immediately go to main memory as far as we know, so there's
2948 * no chipset flush. It also doesn't land in render cache.
2950 * However, we do have to enforce the order so that all writes through
2951 * the GTT land before any writes to the device, such as updates to
2956 i915_gem_release_mmap(obj
);
2958 old_write_domain
= obj
->base
.write_domain
;
2959 obj
->base
.write_domain
= 0;
2961 trace_i915_gem_object_change_domain(obj
,
2962 obj
->base
.read_domains
,
2966 /** Flushes the CPU write domain for the object if it's dirty. */
2968 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
)
2970 uint32_t old_write_domain
;
2972 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
)
2975 i915_gem_clflush_object(obj
);
2976 intel_gtt_chipset_flush();
2977 old_write_domain
= obj
->base
.write_domain
;
2978 obj
->base
.write_domain
= 0;
2980 trace_i915_gem_object_change_domain(obj
,
2981 obj
->base
.read_domains
,
2986 * Moves a single object to the GTT read, and possibly write domain.
2988 * This function returns when the move is complete, including waiting on
2992 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object
*obj
, bool write
)
2994 uint32_t old_write_domain
, old_read_domains
;
2997 /* Not valid to be called on unbound objects. */
2998 if (obj
->gtt_space
== NULL
)
3001 if (obj
->base
.write_domain
== I915_GEM_DOMAIN_GTT
)
3004 ret
= i915_gem_object_flush_gpu_write_domain(obj
);
3008 if (obj
->pending_gpu_write
|| write
) {
3009 ret
= i915_gem_object_wait_rendering(obj
);
3014 i915_gem_object_flush_cpu_write_domain(obj
);
3016 old_write_domain
= obj
->base
.write_domain
;
3017 old_read_domains
= obj
->base
.read_domains
;
3019 /* It should now be out of any other write domains, and we can update
3020 * the domain values for our changes.
3022 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_GTT
) != 0);
3023 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
3025 obj
->base
.read_domains
= I915_GEM_DOMAIN_GTT
;
3026 obj
->base
.write_domain
= I915_GEM_DOMAIN_GTT
;
3030 trace_i915_gem_object_change_domain(obj
,
3037 int i915_gem_object_set_cache_level(struct drm_i915_gem_object
*obj
,
3038 enum i915_cache_level cache_level
)
3042 if (obj
->cache_level
== cache_level
)
3045 if (obj
->pin_count
) {
3046 DRM_DEBUG("can not change the cache level of pinned objects\n");
3050 if (obj
->gtt_space
) {
3051 ret
= i915_gem_object_finish_gpu(obj
);
3055 i915_gem_object_finish_gtt(obj
);
3057 /* Before SandyBridge, you could not use tiling or fence
3058 * registers with snooped memory, so relinquish any fences
3059 * currently pointing to our region in the aperture.
3061 if (INTEL_INFO(obj
->base
.dev
)->gen
< 6) {
3062 ret
= i915_gem_object_put_fence(obj
);
3067 i915_gem_gtt_rebind_object(obj
, cache_level
);
3070 if (cache_level
== I915_CACHE_NONE
) {
3071 u32 old_read_domains
, old_write_domain
;
3073 /* If we're coming from LLC cached, then we haven't
3074 * actually been tracking whether the data is in the
3075 * CPU cache or not, since we only allow one bit set
3076 * in obj->write_domain and have been skipping the clflushes.
3077 * Just set it to the CPU cache for now.
3079 WARN_ON(obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
);
3080 WARN_ON(obj
->base
.read_domains
& ~I915_GEM_DOMAIN_CPU
);
3082 old_read_domains
= obj
->base
.read_domains
;
3083 old_write_domain
= obj
->base
.write_domain
;
3085 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
3086 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
3088 trace_i915_gem_object_change_domain(obj
,
3093 obj
->cache_level
= cache_level
;
3098 * Prepare buffer for display plane (scanout, cursors, etc).
3099 * Can be called from an uninterruptible phase (modesetting) and allows
3100 * any flushes to be pipelined (for pageflips).
3102 * For the display plane, we want to be in the GTT but out of any write
3103 * domains. So in many ways this looks like set_to_gtt_domain() apart from the
3104 * ability to pipeline the waits, pinning and any additional subtleties
3105 * that may differentiate the display plane from ordinary buffers.
3108 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object
*obj
,
3110 struct intel_ring_buffer
*pipelined
)
3112 u32 old_read_domains
, old_write_domain
;
3115 ret
= i915_gem_object_flush_gpu_write_domain(obj
);
3119 if (pipelined
!= obj
->ring
) {
3120 ret
= i915_gem_object_wait_rendering(obj
);
3125 /* The display engine is not coherent with the LLC cache on gen6. As
3126 * a result, we make sure that the pinning that is about to occur is
3127 * done with uncached PTEs. This is lowest common denominator for all
3130 * However for gen6+, we could do better by using the GFDT bit instead
3131 * of uncaching, which would allow us to flush all the LLC-cached data
3132 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3134 ret
= i915_gem_object_set_cache_level(obj
, I915_CACHE_NONE
);
3138 /* As the user may map the buffer once pinned in the display plane
3139 * (e.g. libkms for the bootup splash), we have to ensure that we
3140 * always use map_and_fenceable for all scanout buffers.
3142 ret
= i915_gem_object_pin(obj
, alignment
, true);
3146 i915_gem_object_flush_cpu_write_domain(obj
);
3148 old_write_domain
= obj
->base
.write_domain
;
3149 old_read_domains
= obj
->base
.read_domains
;
3151 /* It should now be out of any other write domains, and we can update
3152 * the domain values for our changes.
3154 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_GTT
) != 0);
3155 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
3157 trace_i915_gem_object_change_domain(obj
,
3165 i915_gem_object_finish_gpu(struct drm_i915_gem_object
*obj
)
3169 if ((obj
->base
.read_domains
& I915_GEM_GPU_DOMAINS
) == 0)
3172 if (obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) {
3173 ret
= i915_gem_flush_ring(obj
->ring
, 0, obj
->base
.write_domain
);
3178 /* Ensure that we invalidate the GPU's caches and TLBs. */
3179 obj
->base
.read_domains
&= ~I915_GEM_GPU_DOMAINS
;
3181 return i915_gem_object_wait_rendering(obj
);
3185 * Moves a single object to the CPU read, and possibly write domain.
3187 * This function returns when the move is complete, including waiting on
3191 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object
*obj
, bool write
)
3193 uint32_t old_write_domain
, old_read_domains
;
3196 if (obj
->base
.write_domain
== I915_GEM_DOMAIN_CPU
)
3199 ret
= i915_gem_object_flush_gpu_write_domain(obj
);
3203 ret
= i915_gem_object_wait_rendering(obj
);
3207 i915_gem_object_flush_gtt_write_domain(obj
);
3209 /* If we have a partially-valid cache of the object in the CPU,
3210 * finish invalidating it and free the per-page flags.
3212 i915_gem_object_set_to_full_cpu_read_domain(obj
);
3214 old_write_domain
= obj
->base
.write_domain
;
3215 old_read_domains
= obj
->base
.read_domains
;
3217 /* Flush the CPU cache if it's still invalid. */
3218 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0) {
3219 i915_gem_clflush_object(obj
);
3221 obj
->base
.read_domains
|= I915_GEM_DOMAIN_CPU
;
3224 /* It should now be out of any other write domains, and we can update
3225 * the domain values for our changes.
3227 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
) != 0);
3229 /* If we're writing through the CPU, then the GPU read domains will
3230 * need to be invalidated at next use.
3233 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
3234 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
3237 trace_i915_gem_object_change_domain(obj
,
3245 * Moves the object from a partially CPU read to a full one.
3247 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3248 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3251 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object
*obj
)
3253 if (!obj
->page_cpu_valid
)
3256 /* If we're partially in the CPU read domain, finish moving it in.
3258 if (obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) {
3261 for (i
= 0; i
<= (obj
->base
.size
- 1) / PAGE_SIZE
; i
++) {
3262 if (obj
->page_cpu_valid
[i
])
3264 drm_clflush_pages(obj
->pages
+ i
, 1);
3268 /* Free the page_cpu_valid mappings which are now stale, whether
3269 * or not we've got I915_GEM_DOMAIN_CPU.
3271 kfree(obj
->page_cpu_valid
);
3272 obj
->page_cpu_valid
= NULL
;
3276 * Set the CPU read domain on a range of the object.
3278 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3279 * not entirely valid. The page_cpu_valid member of the object flags which
3280 * pages have been flushed, and will be respected by
3281 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3282 * of the whole object.
3284 * This function returns when the move is complete, including waiting on
3288 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object
*obj
,
3289 uint64_t offset
, uint64_t size
)
3291 uint32_t old_read_domains
;
3294 if (offset
== 0 && size
== obj
->base
.size
)
3295 return i915_gem_object_set_to_cpu_domain(obj
, 0);
3297 ret
= i915_gem_object_flush_gpu_write_domain(obj
);
3301 ret
= i915_gem_object_wait_rendering(obj
);
3305 i915_gem_object_flush_gtt_write_domain(obj
);
3307 /* If we're already fully in the CPU read domain, we're done. */
3308 if (obj
->page_cpu_valid
== NULL
&&
3309 (obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) != 0)
3312 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3313 * newly adding I915_GEM_DOMAIN_CPU
3315 if (obj
->page_cpu_valid
== NULL
) {
3316 obj
->page_cpu_valid
= kzalloc(obj
->base
.size
/ PAGE_SIZE
,
3318 if (obj
->page_cpu_valid
== NULL
)
3320 } else if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0)
3321 memset(obj
->page_cpu_valid
, 0, obj
->base
.size
/ PAGE_SIZE
);
3323 /* Flush the cache on any pages that are still invalid from the CPU's
3326 for (i
= offset
/ PAGE_SIZE
; i
<= (offset
+ size
- 1) / PAGE_SIZE
;
3328 if (obj
->page_cpu_valid
[i
])
3331 drm_clflush_pages(obj
->pages
+ i
, 1);
3333 obj
->page_cpu_valid
[i
] = 1;
3336 /* It should now be out of any other write domains, and we can update
3337 * the domain values for our changes.
3339 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
) != 0);
3341 old_read_domains
= obj
->base
.read_domains
;
3342 obj
->base
.read_domains
|= I915_GEM_DOMAIN_CPU
;
3344 trace_i915_gem_object_change_domain(obj
,
3346 obj
->base
.write_domain
);
3351 /* Throttle our rendering by waiting until the ring has completed our requests
3352 * emitted over 20 msec ago.
3354 * Note that if we were to use the current jiffies each time around the loop,
3355 * we wouldn't escape the function with any frames outstanding if the time to
3356 * render a frame was over 20ms.
3358 * This should get us reasonable parallelism between CPU and GPU but also
3359 * relatively low latency when blocking on a particular request to finish.
3362 i915_gem_ring_throttle(struct drm_device
*dev
, struct drm_file
*file
)
3364 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3365 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
3366 unsigned long recent_enough
= jiffies
- msecs_to_jiffies(20);
3367 struct drm_i915_gem_request
*request
;
3368 struct intel_ring_buffer
*ring
= NULL
;
3372 if (atomic_read(&dev_priv
->mm
.wedged
))
3375 spin_lock(&file_priv
->mm
.lock
);
3376 list_for_each_entry(request
, &file_priv
->mm
.request_list
, client_list
) {
3377 if (time_after_eq(request
->emitted_jiffies
, recent_enough
))
3380 ring
= request
->ring
;
3381 seqno
= request
->seqno
;
3383 spin_unlock(&file_priv
->mm
.lock
);
3389 if (!i915_seqno_passed(ring
->get_seqno(ring
), seqno
)) {
3390 /* And wait for the seqno passing without holding any locks and
3391 * causing extra latency for others. This is safe as the irq
3392 * generation is designed to be run atomically and so is
3395 if (ring
->irq_get(ring
)) {
3396 ret
= wait_event_interruptible(ring
->irq_queue
,
3397 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
3398 || atomic_read(&dev_priv
->mm
.wedged
));
3399 ring
->irq_put(ring
);
3401 if (ret
== 0 && atomic_read(&dev_priv
->mm
.wedged
))
3407 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, 0);
3413 i915_gem_object_pin(struct drm_i915_gem_object
*obj
,
3415 bool map_and_fenceable
)
3417 struct drm_device
*dev
= obj
->base
.dev
;
3418 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3421 BUG_ON(obj
->pin_count
== DRM_I915_GEM_OBJECT_MAX_PIN_COUNT
);
3422 WARN_ON(i915_verify_lists(dev
));
3424 if (obj
->gtt_space
!= NULL
) {
3425 if ((alignment
&& obj
->gtt_offset
& (alignment
- 1)) ||
3426 (map_and_fenceable
&& !obj
->map_and_fenceable
)) {
3427 WARN(obj
->pin_count
,
3428 "bo is already pinned with incorrect alignment:"
3429 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3430 " obj->map_and_fenceable=%d\n",
3431 obj
->gtt_offset
, alignment
,
3433 obj
->map_and_fenceable
);
3434 ret
= i915_gem_object_unbind(obj
);
3440 if (obj
->gtt_space
== NULL
) {
3441 ret
= i915_gem_object_bind_to_gtt(obj
, alignment
,
3447 if (obj
->pin_count
++ == 0) {
3449 list_move_tail(&obj
->mm_list
,
3450 &dev_priv
->mm
.pinned_list
);
3452 obj
->pin_mappable
|= map_and_fenceable
;
3454 WARN_ON(i915_verify_lists(dev
));
3459 i915_gem_object_unpin(struct drm_i915_gem_object
*obj
)
3461 struct drm_device
*dev
= obj
->base
.dev
;
3462 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3464 WARN_ON(i915_verify_lists(dev
));
3465 BUG_ON(obj
->pin_count
== 0);
3466 BUG_ON(obj
->gtt_space
== NULL
);
3468 if (--obj
->pin_count
== 0) {
3470 list_move_tail(&obj
->mm_list
,
3471 &dev_priv
->mm
.inactive_list
);
3472 obj
->pin_mappable
= false;
3474 WARN_ON(i915_verify_lists(dev
));
3478 i915_gem_pin_ioctl(struct drm_device
*dev
, void *data
,
3479 struct drm_file
*file
)
3481 struct drm_i915_gem_pin
*args
= data
;
3482 struct drm_i915_gem_object
*obj
;
3485 ret
= i915_mutex_lock_interruptible(dev
);
3489 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
3490 if (&obj
->base
== NULL
) {
3495 if (obj
->madv
!= I915_MADV_WILLNEED
) {
3496 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3501 if (obj
->pin_filp
!= NULL
&& obj
->pin_filp
!= file
) {
3502 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3508 obj
->user_pin_count
++;
3509 obj
->pin_filp
= file
;
3510 if (obj
->user_pin_count
== 1) {
3511 ret
= i915_gem_object_pin(obj
, args
->alignment
, true);
3516 /* XXX - flush the CPU caches for pinned objects
3517 * as the X server doesn't manage domains yet
3519 i915_gem_object_flush_cpu_write_domain(obj
);
3520 args
->offset
= obj
->gtt_offset
;
3522 drm_gem_object_unreference(&obj
->base
);
3524 mutex_unlock(&dev
->struct_mutex
);
3529 i915_gem_unpin_ioctl(struct drm_device
*dev
, void *data
,
3530 struct drm_file
*file
)
3532 struct drm_i915_gem_pin
*args
= data
;
3533 struct drm_i915_gem_object
*obj
;
3536 ret
= i915_mutex_lock_interruptible(dev
);
3540 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
3541 if (&obj
->base
== NULL
) {
3546 if (obj
->pin_filp
!= file
) {
3547 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3552 obj
->user_pin_count
--;
3553 if (obj
->user_pin_count
== 0) {
3554 obj
->pin_filp
= NULL
;
3555 i915_gem_object_unpin(obj
);
3559 drm_gem_object_unreference(&obj
->base
);
3561 mutex_unlock(&dev
->struct_mutex
);
3566 i915_gem_busy_ioctl(struct drm_device
*dev
, void *data
,
3567 struct drm_file
*file
)
3569 struct drm_i915_gem_busy
*args
= data
;
3570 struct drm_i915_gem_object
*obj
;
3573 ret
= i915_mutex_lock_interruptible(dev
);
3577 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
3578 if (&obj
->base
== NULL
) {
3583 /* Count all active objects as busy, even if they are currently not used
3584 * by the gpu. Users of this interface expect objects to eventually
3585 * become non-busy without any further actions, therefore emit any
3586 * necessary flushes here.
3588 args
->busy
= obj
->active
;
3590 /* Unconditionally flush objects, even when the gpu still uses this
3591 * object. Userspace calling this function indicates that it wants to
3592 * use this buffer rather sooner than later, so issuing the required
3593 * flush earlier is beneficial.
3595 if (obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) {
3596 ret
= i915_gem_flush_ring(obj
->ring
,
3597 0, obj
->base
.write_domain
);
3598 } else if (obj
->ring
->outstanding_lazy_request
==
3599 obj
->last_rendering_seqno
) {
3600 struct drm_i915_gem_request
*request
;
3602 /* This ring is not being cleared by active usage,
3603 * so emit a request to do so.
3605 request
= kzalloc(sizeof(*request
), GFP_KERNEL
);
3607 ret
= i915_add_request(obj
->ring
, NULL
,request
);
3612 /* Update the active list for the hardware's current position.
3613 * Otherwise this only updates on a delayed timer or when irqs
3614 * are actually unmasked, and our working set ends up being
3615 * larger than required.
3617 i915_gem_retire_requests_ring(obj
->ring
);
3619 args
->busy
= obj
->active
;
3622 drm_gem_object_unreference(&obj
->base
);
3624 mutex_unlock(&dev
->struct_mutex
);
3629 i915_gem_throttle_ioctl(struct drm_device
*dev
, void *data
,
3630 struct drm_file
*file_priv
)
3632 return i915_gem_ring_throttle(dev
, file_priv
);
3636 i915_gem_madvise_ioctl(struct drm_device
*dev
, void *data
,
3637 struct drm_file
*file_priv
)
3639 struct drm_i915_gem_madvise
*args
= data
;
3640 struct drm_i915_gem_object
*obj
;
3643 switch (args
->madv
) {
3644 case I915_MADV_DONTNEED
:
3645 case I915_MADV_WILLNEED
:
3651 ret
= i915_mutex_lock_interruptible(dev
);
3655 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file_priv
, args
->handle
));
3656 if (&obj
->base
== NULL
) {
3661 if (obj
->pin_count
) {
3666 if (obj
->madv
!= __I915_MADV_PURGED
)
3667 obj
->madv
= args
->madv
;
3669 /* if the object is no longer bound, discard its backing storage */
3670 if (i915_gem_object_is_purgeable(obj
) &&
3671 obj
->gtt_space
== NULL
)
3672 i915_gem_object_truncate(obj
);
3674 args
->retained
= obj
->madv
!= __I915_MADV_PURGED
;
3677 drm_gem_object_unreference(&obj
->base
);
3679 mutex_unlock(&dev
->struct_mutex
);
3683 struct drm_i915_gem_object
*i915_gem_alloc_object(struct drm_device
*dev
,
3686 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3687 struct drm_i915_gem_object
*obj
;
3689 obj
= kzalloc(sizeof(*obj
), GFP_KERNEL
);
3693 if (drm_gem_object_init(dev
, &obj
->base
, size
) != 0) {
3698 i915_gem_info_add_obj(dev_priv
, size
);
3700 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
3701 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
3703 obj
->cache_level
= I915_CACHE_NONE
;
3704 obj
->base
.driver_private
= NULL
;
3705 obj
->fence_reg
= I915_FENCE_REG_NONE
;
3706 INIT_LIST_HEAD(&obj
->mm_list
);
3707 INIT_LIST_HEAD(&obj
->gtt_list
);
3708 INIT_LIST_HEAD(&obj
->ring_list
);
3709 INIT_LIST_HEAD(&obj
->exec_list
);
3710 INIT_LIST_HEAD(&obj
->gpu_write_list
);
3711 obj
->madv
= I915_MADV_WILLNEED
;
3712 /* Avoid an unnecessary call to unbind on the first bind. */
3713 obj
->map_and_fenceable
= true;
3718 int i915_gem_init_object(struct drm_gem_object
*obj
)
3725 static void i915_gem_free_object_tail(struct drm_i915_gem_object
*obj
)
3727 struct drm_device
*dev
= obj
->base
.dev
;
3728 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3731 ret
= i915_gem_object_unbind(obj
);
3732 if (ret
== -ERESTARTSYS
) {
3733 list_move(&obj
->mm_list
,
3734 &dev_priv
->mm
.deferred_free_list
);
3738 trace_i915_gem_object_destroy(obj
);
3740 if (obj
->base
.map_list
.map
)
3741 i915_gem_free_mmap_offset(obj
);
3743 drm_gem_object_release(&obj
->base
);
3744 i915_gem_info_remove_obj(dev_priv
, obj
->base
.size
);
3746 kfree(obj
->page_cpu_valid
);
3751 void i915_gem_free_object(struct drm_gem_object
*gem_obj
)
3753 struct drm_i915_gem_object
*obj
= to_intel_bo(gem_obj
);
3754 struct drm_device
*dev
= obj
->base
.dev
;
3756 while (obj
->pin_count
> 0)
3757 i915_gem_object_unpin(obj
);
3760 i915_gem_detach_phys_object(dev
, obj
);
3762 i915_gem_free_object_tail(obj
);
3766 i915_gem_idle(struct drm_device
*dev
)
3768 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3771 mutex_lock(&dev
->struct_mutex
);
3773 if (dev_priv
->mm
.suspended
) {
3774 mutex_unlock(&dev
->struct_mutex
);
3778 ret
= i915_gpu_idle(dev
);
3780 mutex_unlock(&dev
->struct_mutex
);
3784 /* Under UMS, be paranoid and evict. */
3785 if (!drm_core_check_feature(dev
, DRIVER_MODESET
)) {
3786 ret
= i915_gem_evict_inactive(dev
, false);
3788 mutex_unlock(&dev
->struct_mutex
);
3793 i915_gem_reset_fences(dev
);
3795 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3796 * We need to replace this with a semaphore, or something.
3797 * And not confound mm.suspended!
3799 dev_priv
->mm
.suspended
= 1;
3800 del_timer_sync(&dev_priv
->hangcheck_timer
);
3802 i915_kernel_lost_context(dev
);
3803 i915_gem_cleanup_ringbuffer(dev
);
3805 mutex_unlock(&dev
->struct_mutex
);
3807 /* Cancel the retire work handler, which should be idle now. */
3808 cancel_delayed_work_sync(&dev_priv
->mm
.retire_work
);
3814 i915_gem_init_ringbuffer(struct drm_device
*dev
)
3816 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3819 ret
= intel_init_render_ring_buffer(dev
);
3824 ret
= intel_init_bsd_ring_buffer(dev
);
3826 goto cleanup_render_ring
;
3830 ret
= intel_init_blt_ring_buffer(dev
);
3832 goto cleanup_bsd_ring
;
3835 dev_priv
->next_seqno
= 1;
3840 intel_cleanup_ring_buffer(&dev_priv
->ring
[VCS
]);
3841 cleanup_render_ring
:
3842 intel_cleanup_ring_buffer(&dev_priv
->ring
[RCS
]);
3847 i915_gem_cleanup_ringbuffer(struct drm_device
*dev
)
3849 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3852 for (i
= 0; i
< I915_NUM_RINGS
; i
++)
3853 intel_cleanup_ring_buffer(&dev_priv
->ring
[i
]);
3857 i915_gem_entervt_ioctl(struct drm_device
*dev
, void *data
,
3858 struct drm_file
*file_priv
)
3860 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3863 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
3866 if (atomic_read(&dev_priv
->mm
.wedged
)) {
3867 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3868 atomic_set(&dev_priv
->mm
.wedged
, 0);
3871 mutex_lock(&dev
->struct_mutex
);
3872 dev_priv
->mm
.suspended
= 0;
3874 ret
= i915_gem_init_ringbuffer(dev
);
3876 mutex_unlock(&dev
->struct_mutex
);
3880 BUG_ON(!list_empty(&dev_priv
->mm
.active_list
));
3881 BUG_ON(!list_empty(&dev_priv
->mm
.flushing_list
));
3882 BUG_ON(!list_empty(&dev_priv
->mm
.inactive_list
));
3883 for (i
= 0; i
< I915_NUM_RINGS
; i
++) {
3884 BUG_ON(!list_empty(&dev_priv
->ring
[i
].active_list
));
3885 BUG_ON(!list_empty(&dev_priv
->ring
[i
].request_list
));
3887 mutex_unlock(&dev
->struct_mutex
);
3889 ret
= drm_irq_install(dev
);
3891 goto cleanup_ringbuffer
;
3896 mutex_lock(&dev
->struct_mutex
);
3897 i915_gem_cleanup_ringbuffer(dev
);
3898 dev_priv
->mm
.suspended
= 1;
3899 mutex_unlock(&dev
->struct_mutex
);
3905 i915_gem_leavevt_ioctl(struct drm_device
*dev
, void *data
,
3906 struct drm_file
*file_priv
)
3908 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
3911 drm_irq_uninstall(dev
);
3912 return i915_gem_idle(dev
);
3916 i915_gem_lastclose(struct drm_device
*dev
)
3920 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
3923 ret
= i915_gem_idle(dev
);
3925 DRM_ERROR("failed to idle hardware: %d\n", ret
);
3929 init_ring_lists(struct intel_ring_buffer
*ring
)
3931 INIT_LIST_HEAD(&ring
->active_list
);
3932 INIT_LIST_HEAD(&ring
->request_list
);
3933 INIT_LIST_HEAD(&ring
->gpu_write_list
);
3937 i915_gem_load(struct drm_device
*dev
)
3940 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3942 INIT_LIST_HEAD(&dev_priv
->mm
.active_list
);
3943 INIT_LIST_HEAD(&dev_priv
->mm
.flushing_list
);
3944 INIT_LIST_HEAD(&dev_priv
->mm
.inactive_list
);
3945 INIT_LIST_HEAD(&dev_priv
->mm
.pinned_list
);
3946 INIT_LIST_HEAD(&dev_priv
->mm
.fence_list
);
3947 INIT_LIST_HEAD(&dev_priv
->mm
.deferred_free_list
);
3948 INIT_LIST_HEAD(&dev_priv
->mm
.gtt_list
);
3949 for (i
= 0; i
< I915_NUM_RINGS
; i
++)
3950 init_ring_lists(&dev_priv
->ring
[i
]);
3951 for (i
= 0; i
< 16; i
++)
3952 INIT_LIST_HEAD(&dev_priv
->fence_regs
[i
].lru_list
);
3953 INIT_DELAYED_WORK(&dev_priv
->mm
.retire_work
,
3954 i915_gem_retire_work_handler
);
3955 init_completion(&dev_priv
->error_completion
);
3957 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3959 u32 tmp
= I915_READ(MI_ARB_STATE
);
3960 if (!(tmp
& MI_ARB_C3_LP_WRITE_ENABLE
)) {
3961 /* arb state is a masked write, so set bit + bit in mask */
3962 tmp
= MI_ARB_C3_LP_WRITE_ENABLE
| (MI_ARB_C3_LP_WRITE_ENABLE
<< MI_ARB_MASK_SHIFT
);
3963 I915_WRITE(MI_ARB_STATE
, tmp
);
3967 dev_priv
->relative_constants_mode
= I915_EXEC_CONSTANTS_REL_GENERAL
;
3969 /* Old X drivers will take 0-2 for front, back, depth buffers */
3970 if (!drm_core_check_feature(dev
, DRIVER_MODESET
))
3971 dev_priv
->fence_reg_start
= 3;
3973 if (INTEL_INFO(dev
)->gen
>= 4 || IS_I945G(dev
) || IS_I945GM(dev
) || IS_G33(dev
))
3974 dev_priv
->num_fence_regs
= 16;
3976 dev_priv
->num_fence_regs
= 8;
3978 /* Initialize fence registers to zero */
3979 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++) {
3980 i915_gem_clear_fence_reg(dev
, &dev_priv
->fence_regs
[i
]);
3983 i915_gem_detect_bit_6_swizzle(dev
);
3984 init_waitqueue_head(&dev_priv
->pending_flip_queue
);
3986 dev_priv
->mm
.interruptible
= true;
3988 dev_priv
->mm
.inactive_shrinker
.shrink
= i915_gem_inactive_shrink
;
3989 dev_priv
->mm
.inactive_shrinker
.seeks
= DEFAULT_SEEKS
;
3990 register_shrinker(&dev_priv
->mm
.inactive_shrinker
);
3994 * Create a physically contiguous memory object for this object
3995 * e.g. for cursor + overlay regs
3997 static int i915_gem_init_phys_object(struct drm_device
*dev
,
3998 int id
, int size
, int align
)
4000 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4001 struct drm_i915_gem_phys_object
*phys_obj
;
4004 if (dev_priv
->mm
.phys_objs
[id
- 1] || !size
)
4007 phys_obj
= kzalloc(sizeof(struct drm_i915_gem_phys_object
), GFP_KERNEL
);
4013 phys_obj
->handle
= drm_pci_alloc(dev
, size
, align
);
4014 if (!phys_obj
->handle
) {
4019 set_memory_wc((unsigned long)phys_obj
->handle
->vaddr
, phys_obj
->handle
->size
/ PAGE_SIZE
);
4022 dev_priv
->mm
.phys_objs
[id
- 1] = phys_obj
;
4030 static void i915_gem_free_phys_object(struct drm_device
*dev
, int id
)
4032 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4033 struct drm_i915_gem_phys_object
*phys_obj
;
4035 if (!dev_priv
->mm
.phys_objs
[id
- 1])
4038 phys_obj
= dev_priv
->mm
.phys_objs
[id
- 1];
4039 if (phys_obj
->cur_obj
) {
4040 i915_gem_detach_phys_object(dev
, phys_obj
->cur_obj
);
4044 set_memory_wb((unsigned long)phys_obj
->handle
->vaddr
, phys_obj
->handle
->size
/ PAGE_SIZE
);
4046 drm_pci_free(dev
, phys_obj
->handle
);
4048 dev_priv
->mm
.phys_objs
[id
- 1] = NULL
;
4051 void i915_gem_free_all_phys_object(struct drm_device
*dev
)
4055 for (i
= I915_GEM_PHYS_CURSOR_0
; i
<= I915_MAX_PHYS_OBJECT
; i
++)
4056 i915_gem_free_phys_object(dev
, i
);
4059 void i915_gem_detach_phys_object(struct drm_device
*dev
,
4060 struct drm_i915_gem_object
*obj
)
4062 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
4069 vaddr
= obj
->phys_obj
->handle
->vaddr
;
4071 page_count
= obj
->base
.size
/ PAGE_SIZE
;
4072 for (i
= 0; i
< page_count
; i
++) {
4073 struct page
*page
= read_cache_page_gfp(mapping
, i
,
4074 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
4075 if (!IS_ERR(page
)) {
4076 char *dst
= kmap_atomic(page
);
4077 memcpy(dst
, vaddr
+ i
*PAGE_SIZE
, PAGE_SIZE
);
4080 drm_clflush_pages(&page
, 1);
4082 set_page_dirty(page
);
4083 mark_page_accessed(page
);
4084 page_cache_release(page
);
4087 intel_gtt_chipset_flush();
4089 obj
->phys_obj
->cur_obj
= NULL
;
4090 obj
->phys_obj
= NULL
;
4094 i915_gem_attach_phys_object(struct drm_device
*dev
,
4095 struct drm_i915_gem_object
*obj
,
4099 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
4100 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4105 if (id
> I915_MAX_PHYS_OBJECT
)
4108 if (obj
->phys_obj
) {
4109 if (obj
->phys_obj
->id
== id
)
4111 i915_gem_detach_phys_object(dev
, obj
);
4114 /* create a new object */
4115 if (!dev_priv
->mm
.phys_objs
[id
- 1]) {
4116 ret
= i915_gem_init_phys_object(dev
, id
,
4117 obj
->base
.size
, align
);
4119 DRM_ERROR("failed to init phys object %d size: %zu\n",
4120 id
, obj
->base
.size
);
4125 /* bind to the object */
4126 obj
->phys_obj
= dev_priv
->mm
.phys_objs
[id
- 1];
4127 obj
->phys_obj
->cur_obj
= obj
;
4129 page_count
= obj
->base
.size
/ PAGE_SIZE
;
4131 for (i
= 0; i
< page_count
; i
++) {
4135 page
= read_cache_page_gfp(mapping
, i
,
4136 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
4138 return PTR_ERR(page
);
4140 src
= kmap_atomic(page
);
4141 dst
= obj
->phys_obj
->handle
->vaddr
+ (i
* PAGE_SIZE
);
4142 memcpy(dst
, src
, PAGE_SIZE
);
4145 mark_page_accessed(page
);
4146 page_cache_release(page
);
4153 i915_gem_phys_pwrite(struct drm_device
*dev
,
4154 struct drm_i915_gem_object
*obj
,
4155 struct drm_i915_gem_pwrite
*args
,
4156 struct drm_file
*file_priv
)
4158 void *vaddr
= obj
->phys_obj
->handle
->vaddr
+ args
->offset
;
4159 char __user
*user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
4161 if (__copy_from_user_inatomic_nocache(vaddr
, user_data
, args
->size
)) {
4162 unsigned long unwritten
;
4164 /* The physical object once assigned is fixed for the lifetime
4165 * of the obj, so we can safely drop the lock and continue
4168 mutex_unlock(&dev
->struct_mutex
);
4169 unwritten
= copy_from_user(vaddr
, user_data
, args
->size
);
4170 mutex_lock(&dev
->struct_mutex
);
4175 intel_gtt_chipset_flush();
4179 void i915_gem_release(struct drm_device
*dev
, struct drm_file
*file
)
4181 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
4183 /* Clean up our request list when the client is going away, so that
4184 * later retire_requests won't dereference our soon-to-be-gone
4187 spin_lock(&file_priv
->mm
.lock
);
4188 while (!list_empty(&file_priv
->mm
.request_list
)) {
4189 struct drm_i915_gem_request
*request
;
4191 request
= list_first_entry(&file_priv
->mm
.request_list
,
4192 struct drm_i915_gem_request
,
4194 list_del(&request
->client_list
);
4195 request
->file_priv
= NULL
;
4197 spin_unlock(&file_priv
->mm
.lock
);
4201 i915_gpu_is_active(struct drm_device
*dev
)
4203 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4206 lists_empty
= list_empty(&dev_priv
->mm
.flushing_list
) &&
4207 list_empty(&dev_priv
->mm
.active_list
);
4209 return !lists_empty
;
4213 i915_gem_inactive_shrink(struct shrinker
*shrinker
, struct shrink_control
*sc
)
4215 struct drm_i915_private
*dev_priv
=
4216 container_of(shrinker
,
4217 struct drm_i915_private
,
4218 mm
.inactive_shrinker
);
4219 struct drm_device
*dev
= dev_priv
->dev
;
4220 struct drm_i915_gem_object
*obj
, *next
;
4221 int nr_to_scan
= sc
->nr_to_scan
;
4224 if (!mutex_trylock(&dev
->struct_mutex
))
4227 /* "fast-path" to count number of available objects */
4228 if (nr_to_scan
== 0) {
4230 list_for_each_entry(obj
,
4231 &dev_priv
->mm
.inactive_list
,
4234 mutex_unlock(&dev
->struct_mutex
);
4235 return cnt
/ 100 * sysctl_vfs_cache_pressure
;
4239 /* first scan for clean buffers */
4240 i915_gem_retire_requests(dev
);
4242 list_for_each_entry_safe(obj
, next
,
4243 &dev_priv
->mm
.inactive_list
,
4245 if (i915_gem_object_is_purgeable(obj
)) {
4246 if (i915_gem_object_unbind(obj
) == 0 &&
4252 /* second pass, evict/count anything still on the inactive list */
4254 list_for_each_entry_safe(obj
, next
,
4255 &dev_priv
->mm
.inactive_list
,
4258 i915_gem_object_unbind(obj
) == 0)
4264 if (nr_to_scan
&& i915_gpu_is_active(dev
)) {
4266 * We are desperate for pages, so as a last resort, wait
4267 * for the GPU to finish and discard whatever we can.
4268 * This has a dramatic impact to reduce the number of
4269 * OOM-killer events whilst running the GPU aggressively.
4271 if (i915_gpu_idle(dev
) == 0)
4274 mutex_unlock(&dev
->struct_mutex
);
4275 return cnt
/ 100 * sysctl_vfs_cache_pressure
;