4 * Copyright (c) 2009, Intel Corporation.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 * Eric Anholt <eric@anholt.net>
32 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
33 * Use is subject to license terms.
36 #include <sys/sysmacros.h>
42 /** @file i915_gem_tiling.c
44 * Support for managing tiling state of buffer objects.
46 * The idea behind tiling is to increase cache hit rates by rearranging
47 * pixel data so that a group of pixel accesses are in the same cacheline.
48 * Performance improvement from doing this on the back/depth buffer are on
51 * Intel architectures make this somewhat more complicated, though, by
52 * adjustments made to addressing of data when the memory is in interleaved
53 * mode (matched pairs of DIMMS) to improve memory bandwidth.
54 * For interleaved memory, the CPU sends every sequential 64 bytes
55 * to an alternate memory channel so it can get the bandwidth from both.
57 * The GPU also rearranges its accesses for increased bandwidth to interleaved
58 * memory, and it matches what the CPU does for non-tiled. However, when tiled
59 * it does it a little differently, since one walks addresses not just in the
60 * X direction but also Y. So, along with alternating channels when bit
61 * 6 of the address flips, it also alternates when other bits flip -- Bits 9
62 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
63 * are common to both the 915 and 965-class hardware.
65 * The CPU also sometimes XORs in higher bits as well, to improve
66 * bandwidth doing strided access like we do so frequently in graphics. This
67 * is called "Channel XOR Randomization" in the MCH documentation. The result
68 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
71 * All of this bit 6 XORing has an effect on our memory management,
72 * as we need to make sure that the 3d driver can correctly address object
75 * If we don't have interleaved memory, all tiling is safe and no swizzling is
78 * When bit 17 is XORed in, we simply refuse to tile at all. Bit
79 * 17 is not just a page offset, so as we page an objet out and back in,
80 * individual pages in it will have different bit 17 addresses, resulting in
81 * each 64 bytes being swapped with its neighbor!
83 * Otherwise, if interleaved, we have to tell the 3d driver what the address
84 * swizzling it needs to do is, since it's writing with the CPU to the pages
85 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
86 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
87 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
88 * to match what the GPU expects.
92 * Detects bit 6 swizzling of address lookup between IGD access and CPU
93 * access through main memory.
96 i915_gem_detect_bit_6_swizzle(struct drm_device
*dev
)
98 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
99 uint32_t swizzle_x
= I915_BIT_6_SWIZZLE_UNKNOWN
;
100 uint32_t swizzle_y
= I915_BIT_6_SWIZZLE_UNKNOWN
;
103 /* As far as we know, the 865 doesn't have these bit 6
106 swizzle_x
= I915_BIT_6_SWIZZLE_NONE
;
107 swizzle_y
= I915_BIT_6_SWIZZLE_NONE
;
108 } else if (IS_MOBILE(dev
)) {
111 /* On mobile 9xx chipsets, channel interleave by the CPU is
112 * determined by DCC. For single-channel, neither the CPU
113 * nor the GPU do swizzling. For dual channel interleaved,
114 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
115 * 9 for Y tiled. The CPU's interleave is independent, and
116 * can be based on either bit 11 (haven't seen this yet) or
120 dcc
= I915_READ(DCC
);
121 switch (dcc
& DCC_ADDRESSING_MODE_MASK
) {
122 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL
:
123 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC
:
124 swizzle_x
= I915_BIT_6_SWIZZLE_NONE
;
125 swizzle_y
= I915_BIT_6_SWIZZLE_NONE
;
127 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED
:
128 if (dcc
& DCC_CHANNEL_XOR_DISABLE
) {
129 /* This is the base swizzling by the GPU for
132 swizzle_x
= I915_BIT_6_SWIZZLE_9_10
;
133 swizzle_y
= I915_BIT_6_SWIZZLE_9
;
134 } else if ((dcc
& DCC_CHANNEL_XOR_BIT_17
) == 0) {
135 /* Bit 11 swizzling by the CPU in addition. */
136 swizzle_x
= I915_BIT_6_SWIZZLE_9_10_11
;
137 swizzle_y
= I915_BIT_6_SWIZZLE_9_11
;
139 /* Bit 17 swizzling by the CPU in addition. */
140 swizzle_x
= I915_BIT_6_SWIZZLE_UNKNOWN
;
141 swizzle_y
= I915_BIT_6_SWIZZLE_UNKNOWN
;
145 if (dcc
== 0xffffffff) {
146 DRM_ERROR("Couldn't read from MCHBAR. "
147 "Disabling tiling.\n");
148 swizzle_x
= I915_BIT_6_SWIZZLE_UNKNOWN
;
149 swizzle_y
= I915_BIT_6_SWIZZLE_UNKNOWN
;
152 /* The 965, G33, and newer, have a very flexible memory
153 * configuration. It will enable dual-channel mode
154 * (interleaving) on as much memory as it can, and the GPU
155 * will additionally sometimes enable different bit 6
156 * swizzling for tiled objects from the CPU.
158 * Here's what I found on the G965:
159 * slot fill memory size swizzling
160 * 0A 0B 1A 1B 1-ch 2-ch
162 * 512 0 512 0 16 1008 X
163 * 512 0 0 512 16 1008 X
164 * 0 512 0 512 16 1008 X
165 * 1024 1024 1024 0 2048 1024 O
167 * We could probably detect this based on either the DRB
168 * matching, which was the case for the swizzling required in
169 * the table above, or from the 1-ch value being less than
170 * the minimum size of a rank.
172 if (I915_READ16(C0DRB3
) != I915_READ16(C1DRB3
)) {
173 swizzle_x
= I915_BIT_6_SWIZZLE_NONE
;
174 swizzle_y
= I915_BIT_6_SWIZZLE_NONE
;
176 swizzle_x
= I915_BIT_6_SWIZZLE_9_10
;
177 swizzle_y
= I915_BIT_6_SWIZZLE_9
;
181 /* FIXME: check with memory config on IGDNG */
183 swizzle_x
= I915_BIT_6_SWIZZLE_9_10
;
184 swizzle_y
= I915_BIT_6_SWIZZLE_9
;
187 dev_priv
->mm
.bit_6_swizzle_x
= swizzle_x
;
188 dev_priv
->mm
.bit_6_swizzle_y
= swizzle_y
;
193 * Returns the size of the fence for a tiled object of the given size.
196 i915_get_fence_size(struct drm_device
*dev
, int size
)
202 /* The 965 can have fences at any page boundary. */
204 return (size
+ PAGE_SIZE
-1) & ~(PAGE_SIZE
-1);
206 /* Align the size to a power of two greater than the smallest
214 for (i
= start
; i
< size
; i
<<= 1)
221 /* Check pitch constriants for all chips & tiling formats */
223 i915_tiling_ok(struct drm_device
*dev
, int stride
, int size
, int tiling_mode
)
227 /* Linear is always fine */
228 if (tiling_mode
== I915_TILING_NONE
)
231 if (tiling_mode
== I915_TILING_Y
&& HAS_128_BYTE_Y_TILING(dev
))
239 /* 965+ just needs multiples of tile width */
241 if (stride
& (tile_width
- 1))
246 /* Pre-965 needs power of two tile widths */
247 if (stride
< tile_width
)
253 /* We don't handle the aperture area covered by the fence being bigger
254 * than the object size.
256 if (i915_get_fence_size(dev
, size
) != size
)
263 * Sets the tiling mode of an object, returning the required swizzling of
264 * bit 6 of addresses in the object.
268 i915_gem_set_tiling(DRM_IOCTL_ARGS
)
271 struct drm_i915_gem_set_tiling args
;
272 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
273 struct drm_gem_object
*obj
;
274 struct drm_i915_gem_object
*obj_priv
;
277 if (dev
->driver
->use_gem
!= 1)
280 DRM_COPYFROM_WITH_RETURN(&args
,
281 (struct drm_i915_gem_set_tiling __user
*) data
, sizeof(args
));
283 obj
= drm_gem_object_lookup(fpriv
, args
.handle
);
286 obj_priv
= obj
->driver_private
;
288 if (!i915_tiling_ok(dev
, args
.stride
, obj
->size
, args
.tiling_mode
)) {
289 drm_gem_object_unreference(obj
);
290 DRM_DEBUG("i915 tiling is not OK");
294 spin_lock(&dev
->struct_mutex
);
296 if (args
.tiling_mode
== I915_TILING_NONE
) {
297 args
.swizzle_mode
= I915_BIT_6_SWIZZLE_NONE
;
299 if (args
.tiling_mode
== I915_TILING_X
)
300 args
.swizzle_mode
= dev_priv
->mm
.bit_6_swizzle_x
;
302 args
.swizzle_mode
= dev_priv
->mm
.bit_6_swizzle_y
;
303 /* If we can't handle the swizzling, make it untiled. */
304 if (args
.swizzle_mode
== I915_BIT_6_SWIZZLE_UNKNOWN
) {
305 args
.tiling_mode
= I915_TILING_NONE
;
306 args
.swizzle_mode
= I915_BIT_6_SWIZZLE_NONE
;
310 if (args
.tiling_mode
!= obj_priv
->tiling_mode
) {
313 /* Unbind the object, as switching tiling means we're
314 * switching the cache organization due to fencing, probably.
316 ret
= i915_gem_object_unbind(obj
, 1);
318 args
.tiling_mode
= obj_priv
->tiling_mode
;
319 spin_unlock(&dev
->struct_mutex
);
320 drm_gem_object_unreference(obj
);
321 DRM_ERROR("tiling switch!! unbind error %d", ret
);
324 obj_priv
->tiling_mode
= args
.tiling_mode
;
326 obj_priv
->stride
= args
.stride
;
328 ret
= DRM_COPY_TO_USER((struct drm_i915_gem_set_tiling __user
*) data
, &args
, sizeof(args
));
330 DRM_ERROR(" gem set tiling error! %d", ret
);
332 drm_gem_object_unreference(obj
);
333 spin_unlock(&dev
->struct_mutex
);
339 * Returns the current tiling mode and required bit 6 swizzling for the object.
343 i915_gem_get_tiling(DRM_IOCTL_ARGS
)
346 struct drm_i915_gem_get_tiling args
;
347 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
348 struct drm_gem_object
*obj
;
349 struct drm_i915_gem_object
*obj_priv
;
352 if (dev
->driver
->use_gem
!= 1)
355 DRM_COPYFROM_WITH_RETURN(&args
,
356 (struct drm_i915_gem_get_tiling __user
*) data
, sizeof(args
));
358 obj
= drm_gem_object_lookup(fpriv
, args
.handle
);
361 obj_priv
= obj
->driver_private
;
363 spin_lock(&dev
->struct_mutex
);
365 args
.tiling_mode
= obj_priv
->tiling_mode
;
366 switch (obj_priv
->tiling_mode
) {
368 args
.swizzle_mode
= dev_priv
->mm
.bit_6_swizzle_x
;
371 args
.swizzle_mode
= dev_priv
->mm
.bit_6_swizzle_y
;
373 case I915_TILING_NONE
:
374 args
.swizzle_mode
= I915_BIT_6_SWIZZLE_NONE
;
377 DRM_ERROR("unknown tiling mode\n");
382 ret
= DRM_COPY_TO_USER((struct drm_i915_gem_get_tiling __user
*) data
, &args
, sizeof(args
));
384 DRM_ERROR(" gem get tiling error! %d", ret
);
386 drm_gem_object_unreference(obj
);
387 spin_unlock(&dev
->struct_mutex
);