2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
28 #include <linux/firmware.h>
29 #include <linux/platform_device.h>
30 #include <linux/slab.h>
33 #include "radeon_asic.h"
34 #include <drm/radeon_drm.h>
39 #define R700_PFP_UCODE_SIZE 848
40 #define R700_PM4_UCODE_SIZE 1360
42 static void rv770_gpu_init(struct radeon_device
*rdev
);
43 void rv770_fini(struct radeon_device
*rdev
);
44 static void rv770_pcie_gen2_enable(struct radeon_device
*rdev
);
45 int evergreen_set_uvd_clocks(struct radeon_device
*rdev
, u32 vclk
, u32 dclk
);
47 static int rv770_uvd_calc_post_div(unsigned target_freq
,
51 /* Fclk = Fvco / PDIV */
52 *div
= vco_freq
/ target_freq
;
54 /* we alway need a frequency less than or equal the target */
55 if ((vco_freq
/ *div
) > target_freq
)
60 return -1; /* forget it */
63 return vco_freq
/ (*div
+ 1);
66 static int rv770_uvd_send_upll_ctlreq(struct radeon_device
*rdev
)
70 /* assert UPLL_CTLREQ */
71 WREG32_P(CG_UPLL_FUNC_CNTL
, UPLL_CTLREQ_MASK
, ~UPLL_CTLREQ_MASK
);
73 /* wait for CTLACK and CTLACK2 to get asserted */
74 for (i
= 0; i
< 100; ++i
) {
75 uint32_t mask
= UPLL_CTLACK_MASK
| UPLL_CTLACK2_MASK
;
76 if ((RREG32(CG_UPLL_FUNC_CNTL
) & mask
) == mask
)
83 /* deassert UPLL_CTLREQ */
84 WREG32_P(CG_UPLL_FUNC_CNTL
, 0, ~UPLL_CTLREQ_MASK
);
89 int rv770_set_uvd_clocks(struct radeon_device
*rdev
, u32 vclk
, u32 dclk
)
91 /* start off with something large */
92 int optimal_diff_score
= 0x7FFFFFF;
93 unsigned optimal_fb_div
= 0, optimal_vclk_div
= 0;
94 unsigned optimal_dclk_div
= 0, optimal_vco_freq
= 0;
95 unsigned vco_freq
, vco_min
= 50000, vco_max
= 160000;
96 unsigned ref_freq
= rdev
->clock
.spll
.reference_freq
;
99 /* RV740 uses evergreen uvd clk programming */
100 if (rdev
->family
== CHIP_RV740
)
101 return evergreen_set_uvd_clocks(rdev
, vclk
, dclk
);
103 /* bypass vclk and dclk with bclk */
104 WREG32_P(CG_UPLL_FUNC_CNTL_2
,
105 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
106 ~(VCLK_SRC_SEL_MASK
| DCLK_SRC_SEL_MASK
));
108 if (!vclk
|| !dclk
) {
109 /* keep the Bypass mode, put PLL to sleep */
110 WREG32_P(CG_UPLL_FUNC_CNTL
, UPLL_SLEEP_MASK
, ~UPLL_SLEEP_MASK
);
114 /* loop through vco from low to high */
115 vco_min
= max(max(vco_min
, vclk
), dclk
);
116 for (vco_freq
= vco_min
; vco_freq
<= vco_max
; vco_freq
+= 500) {
117 uint64_t fb_div
= (uint64_t)vco_freq
* 43663;
118 int calc_clk
, diff_score
, diff_vclk
, diff_dclk
;
119 unsigned vclk_div
, dclk_div
;
121 do_div(fb_div
, ref_freq
);
124 /* fb div out of range ? */
125 if (fb_div
> 0x03FFFFFF)
126 break; /* it can oly get worse */
128 /* calc vclk with current vco freq. */
129 calc_clk
= rv770_uvd_calc_post_div(vclk
, vco_freq
, &vclk_div
);
131 break; /* vco is too big, it has to stop. */
132 diff_vclk
= vclk
- calc_clk
;
134 /* calc dclk with current vco freq. */
135 calc_clk
= rv770_uvd_calc_post_div(dclk
, vco_freq
, &dclk_div
);
137 break; /* vco is too big, it has to stop. */
138 diff_dclk
= dclk
- calc_clk
;
140 /* determine if this vco setting is better than current optimal settings */
141 diff_score
= abs(diff_vclk
) + abs(diff_dclk
);
142 if (diff_score
< optimal_diff_score
) {
143 optimal_fb_div
= fb_div
;
144 optimal_vclk_div
= vclk_div
;
145 optimal_dclk_div
= dclk_div
;
146 optimal_vco_freq
= vco_freq
;
147 optimal_diff_score
= diff_score
;
148 if (optimal_diff_score
== 0)
149 break; /* it can't get better than this */
153 /* set UPLL_FB_DIV to 0x50000 */
154 WREG32_P(CG_UPLL_FUNC_CNTL_3
, UPLL_FB_DIV(0x50000), ~UPLL_FB_DIV_MASK
);
156 /* deassert UPLL_RESET and UPLL_SLEEP */
157 WREG32_P(CG_UPLL_FUNC_CNTL
, 0, ~(UPLL_RESET_MASK
| UPLL_SLEEP_MASK
));
159 /* assert BYPASS EN and FB_DIV[0] <- ??? why? */
160 WREG32_P(CG_UPLL_FUNC_CNTL
, UPLL_BYPASS_EN_MASK
, ~UPLL_BYPASS_EN_MASK
);
161 WREG32_P(CG_UPLL_FUNC_CNTL_3
, UPLL_FB_DIV(1), ~UPLL_FB_DIV(1));
163 r
= rv770_uvd_send_upll_ctlreq(rdev
);
167 /* assert PLL_RESET */
168 WREG32_P(CG_UPLL_FUNC_CNTL
, UPLL_RESET_MASK
, ~UPLL_RESET_MASK
);
170 /* set the required FB_DIV, REF_DIV, Post divder values */
171 WREG32_P(CG_UPLL_FUNC_CNTL
, UPLL_REF_DIV(1), ~UPLL_REF_DIV_MASK
);
172 WREG32_P(CG_UPLL_FUNC_CNTL_2
,
173 UPLL_SW_HILEN(optimal_vclk_div
>> 1) |
174 UPLL_SW_LOLEN((optimal_vclk_div
>> 1) + (optimal_vclk_div
& 1)) |
175 UPLL_SW_HILEN2(optimal_dclk_div
>> 1) |
176 UPLL_SW_LOLEN2((optimal_dclk_div
>> 1) + (optimal_dclk_div
& 1)),
179 WREG32_P(CG_UPLL_FUNC_CNTL_3
, UPLL_FB_DIV(optimal_fb_div
),
182 /* give the PLL some time to settle */
185 /* deassert PLL_RESET */
186 WREG32_P(CG_UPLL_FUNC_CNTL
, 0, ~UPLL_RESET_MASK
);
190 /* deassert BYPASS EN and FB_DIV[0] <- ??? why? */
191 WREG32_P(CG_UPLL_FUNC_CNTL
, 0, ~UPLL_BYPASS_EN_MASK
);
192 WREG32_P(CG_UPLL_FUNC_CNTL_3
, 0, ~UPLL_FB_DIV(1));
194 r
= rv770_uvd_send_upll_ctlreq(rdev
);
198 /* switch VCLK and DCLK selection */
199 WREG32_P(CG_UPLL_FUNC_CNTL_2
,
200 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
201 ~(VCLK_SRC_SEL_MASK
| DCLK_SRC_SEL_MASK
));
208 #define PCIE_BUS_CLK 10000
209 #define TCLK (PCIE_BUS_CLK / 10)
212 * rv770_get_xclk - get the xclk
214 * @rdev: radeon_device pointer
216 * Returns the reference clock used by the gfx engine
219 u32
rv770_get_xclk(struct radeon_device
*rdev
)
221 u32 reference_clock
= rdev
->clock
.spll
.reference_freq
;
222 u32 tmp
= RREG32(CG_CLKPIN_CNTL
);
224 if (tmp
& MUX_TCLK_TO_XCLK
)
227 if (tmp
& XTALIN_DIVIDE
)
228 return reference_clock
/ 4;
230 return reference_clock
;
233 int rv770_uvd_resume(struct radeon_device
*rdev
)
236 uint32_t chip_id
, size
;
239 r
= radeon_uvd_resume(rdev
);
243 /* programm the VCPU memory controller bits 0-27 */
244 addr
= rdev
->uvd
.gpu_addr
>> 3;
245 size
= RADEON_GPU_PAGE_ALIGN(rdev
->uvd_fw
->size
+ 4) >> 3;
246 WREG32(UVD_VCPU_CACHE_OFFSET0
, addr
);
247 WREG32(UVD_VCPU_CACHE_SIZE0
, size
);
250 size
= RADEON_UVD_STACK_SIZE
>> 3;
251 WREG32(UVD_VCPU_CACHE_OFFSET1
, addr
);
252 WREG32(UVD_VCPU_CACHE_SIZE1
, size
);
255 size
= RADEON_UVD_HEAP_SIZE
>> 3;
256 WREG32(UVD_VCPU_CACHE_OFFSET2
, addr
);
257 WREG32(UVD_VCPU_CACHE_SIZE2
, size
);
260 addr
= (rdev
->uvd
.gpu_addr
>> 28) & 0xF;
261 WREG32(UVD_LMI_ADDR_EXT
, (addr
<< 12) | (addr
<< 0));
264 addr
= (rdev
->uvd
.gpu_addr
>> 32) & 0xFF;
265 WREG32(UVD_LMI_EXT40_ADDR
, addr
| (0x9 << 16) | (0x1 << 31));
267 /* tell firmware which hardware it is running on */
268 switch (rdev
->family
) {
272 chip_id
= 0x01000005;
275 chip_id
= 0x01000006;
278 chip_id
= 0x01000007;
282 chip_id
= 0x01000008;
285 chip_id
= 0x01000009;
288 chip_id
= 0x0100000a;
291 chip_id
= 0x0100000b;
294 chip_id
= 0x0100000c;
297 chip_id
= 0x0100000d;
300 chip_id
= 0x0100000e;
303 chip_id
= 0x0100000f;
306 chip_id
= 0x01000010;
309 chip_id
= 0x01000011;
312 chip_id
= 0x01000012;
315 chip_id
= 0x01000014;
318 chip_id
= 0x01000015;
321 chip_id
= 0x01000016;
324 chip_id
= 0x01000017;
327 WREG32(UVD_VCPU_CHIP_ID
, chip_id
);
332 u32
rv770_page_flip(struct radeon_device
*rdev
, int crtc_id
, u64 crtc_base
)
334 struct radeon_crtc
*radeon_crtc
= rdev
->mode_info
.crtcs
[crtc_id
];
335 u32 tmp
= RREG32(AVIVO_D1GRPH_UPDATE
+ radeon_crtc
->crtc_offset
);
338 /* Lock the graphics update lock */
339 tmp
|= AVIVO_D1GRPH_UPDATE_LOCK
;
340 WREG32(AVIVO_D1GRPH_UPDATE
+ radeon_crtc
->crtc_offset
, tmp
);
342 /* update the scanout addresses */
343 if (radeon_crtc
->crtc_id
) {
344 WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH
, upper_32_bits(crtc_base
));
345 WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH
, upper_32_bits(crtc_base
));
347 WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH
, upper_32_bits(crtc_base
));
348 WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH
, upper_32_bits(crtc_base
));
350 WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS
+ radeon_crtc
->crtc_offset
,
352 WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS
+ radeon_crtc
->crtc_offset
,
355 /* Wait for update_pending to go high. */
356 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
357 if (RREG32(AVIVO_D1GRPH_UPDATE
+ radeon_crtc
->crtc_offset
) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING
)
361 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
363 /* Unlock the lock, so double-buffering can take place inside vblank */
364 tmp
&= ~AVIVO_D1GRPH_UPDATE_LOCK
;
365 WREG32(AVIVO_D1GRPH_UPDATE
+ radeon_crtc
->crtc_offset
, tmp
);
367 /* Return current update_pending status: */
368 return RREG32(AVIVO_D1GRPH_UPDATE
+ radeon_crtc
->crtc_offset
) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING
;
371 /* get temperature in millidegrees */
372 int rv770_get_temp(struct radeon_device
*rdev
)
374 u32 temp
= (RREG32(CG_MULT_THERMAL_STATUS
) & ASIC_T_MASK
) >>
380 else if (temp
& 0x200)
382 else if (temp
& 0x100) {
383 actual_temp
= temp
& 0x1ff;
384 actual_temp
|= ~0x1ff;
386 actual_temp
= temp
& 0xff;
388 return (actual_temp
* 1000) / 2;
391 void rv770_pm_misc(struct radeon_device
*rdev
)
393 int req_ps_idx
= rdev
->pm
.requested_power_state_index
;
394 int req_cm_idx
= rdev
->pm
.requested_clock_mode_index
;
395 struct radeon_power_state
*ps
= &rdev
->pm
.power_state
[req_ps_idx
];
396 struct radeon_voltage
*voltage
= &ps
->clock_info
[req_cm_idx
].voltage
;
398 if ((voltage
->type
== VOLTAGE_SW
) && voltage
->voltage
) {
399 /* 0xff01 is a flag rather then an actual voltage */
400 if (voltage
->voltage
== 0xff01)
402 if (voltage
->voltage
!= rdev
->pm
.current_vddc
) {
403 radeon_atom_set_voltage(rdev
, voltage
->voltage
, SET_VOLTAGE_TYPE_ASIC_VDDC
);
404 rdev
->pm
.current_vddc
= voltage
->voltage
;
405 DRM_DEBUG("Setting: v: %d\n", voltage
->voltage
);
413 static int rv770_pcie_gart_enable(struct radeon_device
*rdev
)
418 if (rdev
->gart
.robj
== NULL
) {
419 dev_err(rdev
->dev
, "No VRAM object for PCIE GART.\n");
422 r
= radeon_gart_table_vram_pin(rdev
);
425 radeon_gart_restore(rdev
);
427 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
| ENABLE_L2_FRAGMENT_PROCESSING
|
428 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
429 EFFECTIVE_L2_QUEUE_SIZE(7));
430 WREG32(VM_L2_CNTL2
, 0);
431 WREG32(VM_L2_CNTL3
, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
432 /* Setup TLB control */
433 tmp
= ENABLE_L1_TLB
| ENABLE_L1_FRAGMENT_PROCESSING
|
434 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
435 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
|
436 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
437 WREG32(MC_VM_MD_L1_TLB0_CNTL
, tmp
);
438 WREG32(MC_VM_MD_L1_TLB1_CNTL
, tmp
);
439 WREG32(MC_VM_MD_L1_TLB2_CNTL
, tmp
);
440 if (rdev
->family
== CHIP_RV740
)
441 WREG32(MC_VM_MD_L1_TLB3_CNTL
, tmp
);
442 WREG32(MC_VM_MB_L1_TLB0_CNTL
, tmp
);
443 WREG32(MC_VM_MB_L1_TLB1_CNTL
, tmp
);
444 WREG32(MC_VM_MB_L1_TLB2_CNTL
, tmp
);
445 WREG32(MC_VM_MB_L1_TLB3_CNTL
, tmp
);
446 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
, rdev
->mc
.gtt_start
>> 12);
447 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
, rdev
->mc
.gtt_end
>> 12);
448 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
, rdev
->gart
.table_addr
>> 12);
449 WREG32(VM_CONTEXT0_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(0) |
450 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
);
451 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR
,
452 (u32
)(rdev
->dummy_page
.addr
>> 12));
453 for (i
= 1; i
< 7; i
++)
454 WREG32(VM_CONTEXT0_CNTL
+ (i
* 4), 0);
456 r600_pcie_gart_tlb_flush(rdev
);
457 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
458 (unsigned)(rdev
->mc
.gtt_size
>> 20),
459 (unsigned long long)rdev
->gart
.table_addr
);
460 rdev
->gart
.ready
= true;
464 static void rv770_pcie_gart_disable(struct radeon_device
*rdev
)
469 /* Disable all tables */
470 for (i
= 0; i
< 7; i
++)
471 WREG32(VM_CONTEXT0_CNTL
+ (i
* 4), 0);
474 WREG32(VM_L2_CNTL
, ENABLE_L2_FRAGMENT_PROCESSING
|
475 EFFECTIVE_L2_QUEUE_SIZE(7));
476 WREG32(VM_L2_CNTL2
, 0);
477 WREG32(VM_L2_CNTL3
, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
478 /* Setup TLB control */
479 tmp
= EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
480 WREG32(MC_VM_MD_L1_TLB0_CNTL
, tmp
);
481 WREG32(MC_VM_MD_L1_TLB1_CNTL
, tmp
);
482 WREG32(MC_VM_MD_L1_TLB2_CNTL
, tmp
);
483 WREG32(MC_VM_MB_L1_TLB0_CNTL
, tmp
);
484 WREG32(MC_VM_MB_L1_TLB1_CNTL
, tmp
);
485 WREG32(MC_VM_MB_L1_TLB2_CNTL
, tmp
);
486 WREG32(MC_VM_MB_L1_TLB3_CNTL
, tmp
);
487 radeon_gart_table_vram_unpin(rdev
);
490 static void rv770_pcie_gart_fini(struct radeon_device
*rdev
)
492 radeon_gart_fini(rdev
);
493 rv770_pcie_gart_disable(rdev
);
494 radeon_gart_table_vram_free(rdev
);
498 static void rv770_agp_enable(struct radeon_device
*rdev
)
504 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
| ENABLE_L2_FRAGMENT_PROCESSING
|
505 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
506 EFFECTIVE_L2_QUEUE_SIZE(7));
507 WREG32(VM_L2_CNTL2
, 0);
508 WREG32(VM_L2_CNTL3
, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
509 /* Setup TLB control */
510 tmp
= ENABLE_L1_TLB
| ENABLE_L1_FRAGMENT_PROCESSING
|
511 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
512 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
|
513 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
514 WREG32(MC_VM_MD_L1_TLB0_CNTL
, tmp
);
515 WREG32(MC_VM_MD_L1_TLB1_CNTL
, tmp
);
516 WREG32(MC_VM_MD_L1_TLB2_CNTL
, tmp
);
517 WREG32(MC_VM_MB_L1_TLB0_CNTL
, tmp
);
518 WREG32(MC_VM_MB_L1_TLB1_CNTL
, tmp
);
519 WREG32(MC_VM_MB_L1_TLB2_CNTL
, tmp
);
520 WREG32(MC_VM_MB_L1_TLB3_CNTL
, tmp
);
521 for (i
= 0; i
< 7; i
++)
522 WREG32(VM_CONTEXT0_CNTL
+ (i
* 4), 0);
525 static void rv770_mc_program(struct radeon_device
*rdev
)
527 struct rv515_mc_save save
;
532 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
533 WREG32((0x2c14 + j
), 0x00000000);
534 WREG32((0x2c18 + j
), 0x00000000);
535 WREG32((0x2c1c + j
), 0x00000000);
536 WREG32((0x2c20 + j
), 0x00000000);
537 WREG32((0x2c24 + j
), 0x00000000);
539 /* r7xx hw bug. Read from HDP_DEBUG1 rather
540 * than writing to HDP_REG_COHERENCY_FLUSH_CNTL
542 tmp
= RREG32(HDP_DEBUG1
);
544 rv515_mc_stop(rdev
, &save
);
545 if (r600_mc_wait_for_idle(rdev
)) {
546 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
548 /* Lockout access through VGA aperture*/
549 WREG32(VGA_HDP_CONTROL
, VGA_MEMORY_DISABLE
);
550 /* Update configuration */
551 if (rdev
->flags
& RADEON_IS_AGP
) {
552 if (rdev
->mc
.vram_start
< rdev
->mc
.gtt_start
) {
553 /* VRAM before AGP */
554 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
555 rdev
->mc
.vram_start
>> 12);
556 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
557 rdev
->mc
.gtt_end
>> 12);
560 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
561 rdev
->mc
.gtt_start
>> 12);
562 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
563 rdev
->mc
.vram_end
>> 12);
566 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
567 rdev
->mc
.vram_start
>> 12);
568 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
569 rdev
->mc
.vram_end
>> 12);
571 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR
, rdev
->vram_scratch
.gpu_addr
>> 12);
572 tmp
= ((rdev
->mc
.vram_end
>> 24) & 0xFFFF) << 16;
573 tmp
|= ((rdev
->mc
.vram_start
>> 24) & 0xFFFF);
574 WREG32(MC_VM_FB_LOCATION
, tmp
);
575 WREG32(HDP_NONSURFACE_BASE
, (rdev
->mc
.vram_start
>> 8));
576 WREG32(HDP_NONSURFACE_INFO
, (2 << 7));
577 WREG32(HDP_NONSURFACE_SIZE
, 0x3FFFFFFF);
578 if (rdev
->flags
& RADEON_IS_AGP
) {
579 WREG32(MC_VM_AGP_TOP
, rdev
->mc
.gtt_end
>> 16);
580 WREG32(MC_VM_AGP_BOT
, rdev
->mc
.gtt_start
>> 16);
581 WREG32(MC_VM_AGP_BASE
, rdev
->mc
.agp_base
>> 22);
583 WREG32(MC_VM_AGP_BASE
, 0);
584 WREG32(MC_VM_AGP_TOP
, 0x0FFFFFFF);
585 WREG32(MC_VM_AGP_BOT
, 0x0FFFFFFF);
587 if (r600_mc_wait_for_idle(rdev
)) {
588 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
590 rv515_mc_resume(rdev
, &save
);
591 /* we need to own VRAM, so turn off the VGA renderer here
592 * to stop it overwriting our objects */
593 rv515_vga_render_disable(rdev
);
600 void r700_cp_stop(struct radeon_device
*rdev
)
602 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
603 WREG32(CP_ME_CNTL
, (CP_ME_HALT
| CP_PFP_HALT
));
604 WREG32(SCRATCH_UMSK
, 0);
605 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
608 static int rv770_cp_load_microcode(struct radeon_device
*rdev
)
610 const __be32
*fw_data
;
613 if (!rdev
->me_fw
|| !rdev
->pfp_fw
)
621 RB_NO_UPDATE
| RB_BLKSZ(15) | RB_BUFSZ(3));
624 WREG32(GRBM_SOFT_RESET
, SOFT_RESET_CP
);
625 RREG32(GRBM_SOFT_RESET
);
627 WREG32(GRBM_SOFT_RESET
, 0);
629 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
630 WREG32(CP_PFP_UCODE_ADDR
, 0);
631 for (i
= 0; i
< R700_PFP_UCODE_SIZE
; i
++)
632 WREG32(CP_PFP_UCODE_DATA
, be32_to_cpup(fw_data
++));
633 WREG32(CP_PFP_UCODE_ADDR
, 0);
635 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
636 WREG32(CP_ME_RAM_WADDR
, 0);
637 for (i
= 0; i
< R700_PM4_UCODE_SIZE
; i
++)
638 WREG32(CP_ME_RAM_DATA
, be32_to_cpup(fw_data
++));
640 WREG32(CP_PFP_UCODE_ADDR
, 0);
641 WREG32(CP_ME_RAM_WADDR
, 0);
642 WREG32(CP_ME_RAM_RADDR
, 0);
646 void r700_cp_fini(struct radeon_device
*rdev
)
648 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
650 radeon_ring_fini(rdev
, ring
);
651 radeon_scratch_free(rdev
, ring
->rptr_save_reg
);
657 static void rv770_gpu_init(struct radeon_device
*rdev
)
659 int i
, j
, num_qd_pipes
;
664 u32 num_gs_verts_per_thread
;
666 u32 gs_prim_buffer_depth
= 0;
667 u32 sq_ms_fifo_sizes
;
669 u32 sq_thread_resource_mgmt
;
670 u32 hdp_host_path_cntl
;
671 u32 sq_dyn_gpr_size_simd_ab_0
;
672 u32 gb_tiling_config
= 0;
673 u32 cc_rb_backend_disable
= 0;
674 u32 cc_gc_shader_pipe_config
= 0;
677 u32 inactive_pipes
, shader_pipe_config
;
678 u32 disabled_rb_mask
;
679 unsigned active_number
;
681 /* setup chip specs */
682 rdev
->config
.rv770
.tiling_group_size
= 256;
683 switch (rdev
->family
) {
685 rdev
->config
.rv770
.max_pipes
= 4;
686 rdev
->config
.rv770
.max_tile_pipes
= 8;
687 rdev
->config
.rv770
.max_simds
= 10;
688 rdev
->config
.rv770
.max_backends
= 4;
689 rdev
->config
.rv770
.max_gprs
= 256;
690 rdev
->config
.rv770
.max_threads
= 248;
691 rdev
->config
.rv770
.max_stack_entries
= 512;
692 rdev
->config
.rv770
.max_hw_contexts
= 8;
693 rdev
->config
.rv770
.max_gs_threads
= 16 * 2;
694 rdev
->config
.rv770
.sx_max_export_size
= 128;
695 rdev
->config
.rv770
.sx_max_export_pos_size
= 16;
696 rdev
->config
.rv770
.sx_max_export_smx_size
= 112;
697 rdev
->config
.rv770
.sq_num_cf_insts
= 2;
699 rdev
->config
.rv770
.sx_num_of_sets
= 7;
700 rdev
->config
.rv770
.sc_prim_fifo_size
= 0xF9;
701 rdev
->config
.rv770
.sc_hiz_tile_fifo_size
= 0x30;
702 rdev
->config
.rv770
.sc_earlyz_tile_fifo_fize
= 0x130;
705 rdev
->config
.rv770
.max_pipes
= 2;
706 rdev
->config
.rv770
.max_tile_pipes
= 4;
707 rdev
->config
.rv770
.max_simds
= 8;
708 rdev
->config
.rv770
.max_backends
= 2;
709 rdev
->config
.rv770
.max_gprs
= 128;
710 rdev
->config
.rv770
.max_threads
= 248;
711 rdev
->config
.rv770
.max_stack_entries
= 256;
712 rdev
->config
.rv770
.max_hw_contexts
= 8;
713 rdev
->config
.rv770
.max_gs_threads
= 16 * 2;
714 rdev
->config
.rv770
.sx_max_export_size
= 256;
715 rdev
->config
.rv770
.sx_max_export_pos_size
= 32;
716 rdev
->config
.rv770
.sx_max_export_smx_size
= 224;
717 rdev
->config
.rv770
.sq_num_cf_insts
= 2;
719 rdev
->config
.rv770
.sx_num_of_sets
= 7;
720 rdev
->config
.rv770
.sc_prim_fifo_size
= 0xf9;
721 rdev
->config
.rv770
.sc_hiz_tile_fifo_size
= 0x30;
722 rdev
->config
.rv770
.sc_earlyz_tile_fifo_fize
= 0x130;
723 if (rdev
->config
.rv770
.sx_max_export_pos_size
> 16) {
724 rdev
->config
.rv770
.sx_max_export_pos_size
-= 16;
725 rdev
->config
.rv770
.sx_max_export_smx_size
+= 16;
729 rdev
->config
.rv770
.max_pipes
= 2;
730 rdev
->config
.rv770
.max_tile_pipes
= 2;
731 rdev
->config
.rv770
.max_simds
= 2;
732 rdev
->config
.rv770
.max_backends
= 1;
733 rdev
->config
.rv770
.max_gprs
= 256;
734 rdev
->config
.rv770
.max_threads
= 192;
735 rdev
->config
.rv770
.max_stack_entries
= 256;
736 rdev
->config
.rv770
.max_hw_contexts
= 4;
737 rdev
->config
.rv770
.max_gs_threads
= 8 * 2;
738 rdev
->config
.rv770
.sx_max_export_size
= 128;
739 rdev
->config
.rv770
.sx_max_export_pos_size
= 16;
740 rdev
->config
.rv770
.sx_max_export_smx_size
= 112;
741 rdev
->config
.rv770
.sq_num_cf_insts
= 1;
743 rdev
->config
.rv770
.sx_num_of_sets
= 7;
744 rdev
->config
.rv770
.sc_prim_fifo_size
= 0x40;
745 rdev
->config
.rv770
.sc_hiz_tile_fifo_size
= 0x30;
746 rdev
->config
.rv770
.sc_earlyz_tile_fifo_fize
= 0x130;
749 rdev
->config
.rv770
.max_pipes
= 4;
750 rdev
->config
.rv770
.max_tile_pipes
= 4;
751 rdev
->config
.rv770
.max_simds
= 8;
752 rdev
->config
.rv770
.max_backends
= 4;
753 rdev
->config
.rv770
.max_gprs
= 256;
754 rdev
->config
.rv770
.max_threads
= 248;
755 rdev
->config
.rv770
.max_stack_entries
= 512;
756 rdev
->config
.rv770
.max_hw_contexts
= 8;
757 rdev
->config
.rv770
.max_gs_threads
= 16 * 2;
758 rdev
->config
.rv770
.sx_max_export_size
= 256;
759 rdev
->config
.rv770
.sx_max_export_pos_size
= 32;
760 rdev
->config
.rv770
.sx_max_export_smx_size
= 224;
761 rdev
->config
.rv770
.sq_num_cf_insts
= 2;
763 rdev
->config
.rv770
.sx_num_of_sets
= 7;
764 rdev
->config
.rv770
.sc_prim_fifo_size
= 0x100;
765 rdev
->config
.rv770
.sc_hiz_tile_fifo_size
= 0x30;
766 rdev
->config
.rv770
.sc_earlyz_tile_fifo_fize
= 0x130;
768 if (rdev
->config
.rv770
.sx_max_export_pos_size
> 16) {
769 rdev
->config
.rv770
.sx_max_export_pos_size
-= 16;
770 rdev
->config
.rv770
.sx_max_export_smx_size
+= 16;
779 for (i
= 0; i
< 32; i
++) {
780 WREG32((0x2c14 + j
), 0x00000000);
781 WREG32((0x2c18 + j
), 0x00000000);
782 WREG32((0x2c1c + j
), 0x00000000);
783 WREG32((0x2c20 + j
), 0x00000000);
784 WREG32((0x2c24 + j
), 0x00000000);
788 WREG32(GRBM_CNTL
, GRBM_READ_TIMEOUT(0xff));
790 /* setup tiling, simd, pipe config */
791 mc_arb_ramcfg
= RREG32(MC_ARB_RAMCFG
);
793 shader_pipe_config
= RREG32(CC_GC_SHADER_PIPE_CONFIG
);
794 inactive_pipes
= (shader_pipe_config
& INACTIVE_QD_PIPES_MASK
) >> INACTIVE_QD_PIPES_SHIFT
;
795 for (i
= 0, tmp
= 1, active_number
= 0; i
< R7XX_MAX_PIPES
; i
++) {
796 if (!(inactive_pipes
& tmp
)) {
801 if (active_number
== 1) {
802 WREG32(SPI_CONFIG_CNTL
, DISABLE_INTERP_1
);
804 WREG32(SPI_CONFIG_CNTL
, 0);
807 cc_rb_backend_disable
= RREG32(CC_RB_BACKEND_DISABLE
) & 0x00ff0000;
808 tmp
= R7XX_MAX_BACKENDS
- r600_count_pipe_bits(cc_rb_backend_disable
>> 16);
809 if (tmp
< rdev
->config
.rv770
.max_backends
) {
810 rdev
->config
.rv770
.max_backends
= tmp
;
813 cc_gc_shader_pipe_config
= RREG32(CC_GC_SHADER_PIPE_CONFIG
) & 0xffffff00;
814 tmp
= R7XX_MAX_PIPES
- r600_count_pipe_bits((cc_gc_shader_pipe_config
>> 8) & R7XX_MAX_PIPES_MASK
);
815 if (tmp
< rdev
->config
.rv770
.max_pipes
) {
816 rdev
->config
.rv770
.max_pipes
= tmp
;
818 tmp
= R7XX_MAX_SIMDS
- r600_count_pipe_bits((cc_gc_shader_pipe_config
>> 16) & R7XX_MAX_SIMDS_MASK
);
819 if (tmp
< rdev
->config
.rv770
.max_simds
) {
820 rdev
->config
.rv770
.max_simds
= tmp
;
823 switch (rdev
->config
.rv770
.max_tile_pipes
) {
826 gb_tiling_config
= PIPE_TILING(0);
829 gb_tiling_config
= PIPE_TILING(1);
832 gb_tiling_config
= PIPE_TILING(2);
835 gb_tiling_config
= PIPE_TILING(3);
838 rdev
->config
.rv770
.tiling_npipes
= rdev
->config
.rv770
.max_tile_pipes
;
840 disabled_rb_mask
= (RREG32(CC_RB_BACKEND_DISABLE
) >> 16) & R7XX_MAX_BACKENDS_MASK
;
841 tmp
= (gb_tiling_config
& PIPE_TILING__MASK
) >> PIPE_TILING__SHIFT
;
842 tmp
= r6xx_remap_render_backend(rdev
, tmp
, rdev
->config
.rv770
.max_backends
,
843 R7XX_MAX_BACKENDS
, disabled_rb_mask
);
844 gb_tiling_config
|= tmp
<< 16;
845 rdev
->config
.rv770
.backend_map
= tmp
;
847 if (rdev
->family
== CHIP_RV770
)
848 gb_tiling_config
|= BANK_TILING(1);
850 if ((mc_arb_ramcfg
& NOOFBANK_MASK
) >> NOOFBANK_SHIFT
)
851 gb_tiling_config
|= BANK_TILING(1);
853 gb_tiling_config
|= BANK_TILING(0);
855 rdev
->config
.rv770
.tiling_nbanks
= 4 << ((gb_tiling_config
>> 4) & 0x3);
856 gb_tiling_config
|= GROUP_SIZE((mc_arb_ramcfg
& BURSTLENGTH_MASK
) >> BURSTLENGTH_SHIFT
);
857 if (((mc_arb_ramcfg
& NOOFROWS_MASK
) >> NOOFROWS_SHIFT
) > 3) {
858 gb_tiling_config
|= ROW_TILING(3);
859 gb_tiling_config
|= SAMPLE_SPLIT(3);
862 ROW_TILING(((mc_arb_ramcfg
& NOOFROWS_MASK
) >> NOOFROWS_SHIFT
));
864 SAMPLE_SPLIT(((mc_arb_ramcfg
& NOOFROWS_MASK
) >> NOOFROWS_SHIFT
));
867 gb_tiling_config
|= BANK_SWAPS(1);
868 rdev
->config
.rv770
.tile_config
= gb_tiling_config
;
870 WREG32(GB_TILING_CONFIG
, gb_tiling_config
);
871 WREG32(DCP_TILING_CONFIG
, (gb_tiling_config
& 0xffff));
872 WREG32(HDP_TILING_CONFIG
, (gb_tiling_config
& 0xffff));
873 WREG32(DMA_TILING_CONFIG
, (gb_tiling_config
& 0xffff));
874 WREG32(DMA_TILING_CONFIG2
, (gb_tiling_config
& 0xffff));
875 if (rdev
->family
== CHIP_RV730
) {
876 WREG32(UVD_UDEC_DB_TILING_CONFIG
, (gb_tiling_config
& 0xffff));
877 WREG32(UVD_UDEC_DBW_TILING_CONFIG
, (gb_tiling_config
& 0xffff));
878 WREG32(UVD_UDEC_TILING_CONFIG
, (gb_tiling_config
& 0xffff));
881 WREG32(CGTS_SYS_TCC_DISABLE
, 0);
882 WREG32(CGTS_TCC_DISABLE
, 0);
883 WREG32(CGTS_USER_SYS_TCC_DISABLE
, 0);
884 WREG32(CGTS_USER_TCC_DISABLE
, 0);
887 num_qd_pipes
= R7XX_MAX_PIPES
- r600_count_pipe_bits((cc_gc_shader_pipe_config
& INACTIVE_QD_PIPES_MASK
) >> 8);
888 WREG32(VGT_OUT_DEALLOC_CNTL
, (num_qd_pipes
* 4) & DEALLOC_DIST_MASK
);
889 WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL
, ((num_qd_pipes
* 4) - 2) & VTX_REUSE_DEPTH_MASK
);
891 /* set HW defaults for 3D engine */
892 WREG32(CP_QUEUE_THRESHOLDS
, (ROQ_IB1_START(0x16) |
893 ROQ_IB2_START(0x2b)));
895 WREG32(CP_MEQ_THRESHOLDS
, STQ_SPLIT(0x30));
897 ta_aux_cntl
= RREG32(TA_CNTL_AUX
);
898 WREG32(TA_CNTL_AUX
, ta_aux_cntl
| DISABLE_CUBE_ANISO
);
900 sx_debug_1
= RREG32(SX_DEBUG_1
);
901 sx_debug_1
|= ENABLE_NEW_SMX_ADDRESS
;
902 WREG32(SX_DEBUG_1
, sx_debug_1
);
904 smx_dc_ctl0
= RREG32(SMX_DC_CTL0
);
905 smx_dc_ctl0
&= ~CACHE_DEPTH(0x1ff);
906 smx_dc_ctl0
|= CACHE_DEPTH((rdev
->config
.rv770
.sx_num_of_sets
* 64) - 1);
907 WREG32(SMX_DC_CTL0
, smx_dc_ctl0
);
909 if (rdev
->family
!= CHIP_RV740
)
910 WREG32(SMX_EVENT_CTL
, (ES_FLUSH_CTL(4) |
915 if (rdev
->family
!= CHIP_RV770
)
916 WREG32(SMX_SAR_CTL0
, 0x00003f3f);
918 db_debug3
= RREG32(DB_DEBUG3
);
919 db_debug3
&= ~DB_CLK_OFF_DELAY(0x1f);
920 switch (rdev
->family
) {
923 db_debug3
|= DB_CLK_OFF_DELAY(0x1f);
928 db_debug3
|= DB_CLK_OFF_DELAY(2);
931 WREG32(DB_DEBUG3
, db_debug3
);
933 if (rdev
->family
!= CHIP_RV770
) {
934 db_debug4
= RREG32(DB_DEBUG4
);
935 db_debug4
|= DISABLE_TILE_COVERED_FOR_PS_ITER
;
936 WREG32(DB_DEBUG4
, db_debug4
);
939 WREG32(SX_EXPORT_BUFFER_SIZES
, (COLOR_BUFFER_SIZE((rdev
->config
.rv770
.sx_max_export_size
/ 4) - 1) |
940 POSITION_BUFFER_SIZE((rdev
->config
.rv770
.sx_max_export_pos_size
/ 4) - 1) |
941 SMX_BUFFER_SIZE((rdev
->config
.rv770
.sx_max_export_smx_size
/ 4) - 1)));
943 WREG32(PA_SC_FIFO_SIZE
, (SC_PRIM_FIFO_SIZE(rdev
->config
.rv770
.sc_prim_fifo_size
) |
944 SC_HIZ_TILE_FIFO_SIZE(rdev
->config
.rv770
.sc_hiz_tile_fifo_size
) |
945 SC_EARLYZ_TILE_FIFO_SIZE(rdev
->config
.rv770
.sc_earlyz_tile_fifo_fize
)));
947 WREG32(PA_SC_MULTI_CHIP_CNTL
, 0);
949 WREG32(VGT_NUM_INSTANCES
, 1);
951 WREG32(SPI_CONFIG_CNTL_1
, VTX_DONE_DELAY(4));
953 WREG32(CP_PERFMON_CNTL
, 0);
955 sq_ms_fifo_sizes
= (CACHE_FIFO_SIZE(16 * rdev
->config
.rv770
.sq_num_cf_insts
) |
956 DONE_FIFO_HIWATER(0xe0) |
957 ALU_UPDATE_FIFO_HIWATER(0x8));
958 switch (rdev
->family
) {
962 sq_ms_fifo_sizes
|= FETCH_FIFO_HIWATER(0x1);
966 sq_ms_fifo_sizes
|= FETCH_FIFO_HIWATER(0x4);
969 WREG32(SQ_MS_FIFO_SIZES
, sq_ms_fifo_sizes
);
971 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
972 * should be adjusted as needed by the 2D/3D drivers. This just sets default values
974 sq_config
= RREG32(SQ_CONFIG
);
975 sq_config
&= ~(PS_PRIO(3) |
979 sq_config
|= (DX9_CONSTS
|
986 if (rdev
->family
== CHIP_RV710
)
987 /* no vertex cache */
988 sq_config
&= ~VC_ENABLE
;
990 WREG32(SQ_CONFIG
, sq_config
);
992 WREG32(SQ_GPR_RESOURCE_MGMT_1
, (NUM_PS_GPRS((rdev
->config
.rv770
.max_gprs
* 24)/64) |
993 NUM_VS_GPRS((rdev
->config
.rv770
.max_gprs
* 24)/64) |
994 NUM_CLAUSE_TEMP_GPRS(((rdev
->config
.rv770
.max_gprs
* 24)/64)/2)));
996 WREG32(SQ_GPR_RESOURCE_MGMT_2
, (NUM_GS_GPRS((rdev
->config
.rv770
.max_gprs
* 7)/64) |
997 NUM_ES_GPRS((rdev
->config
.rv770
.max_gprs
* 7)/64)));
999 sq_thread_resource_mgmt
= (NUM_PS_THREADS((rdev
->config
.rv770
.max_threads
* 4)/8) |
1000 NUM_VS_THREADS((rdev
->config
.rv770
.max_threads
* 2)/8) |
1001 NUM_ES_THREADS((rdev
->config
.rv770
.max_threads
* 1)/8));
1002 if (((rdev
->config
.rv770
.max_threads
* 1) / 8) > rdev
->config
.rv770
.max_gs_threads
)
1003 sq_thread_resource_mgmt
|= NUM_GS_THREADS(rdev
->config
.rv770
.max_gs_threads
);
1005 sq_thread_resource_mgmt
|= NUM_GS_THREADS((rdev
->config
.rv770
.max_gs_threads
* 1)/8);
1006 WREG32(SQ_THREAD_RESOURCE_MGMT
, sq_thread_resource_mgmt
);
1008 WREG32(SQ_STACK_RESOURCE_MGMT_1
, (NUM_PS_STACK_ENTRIES((rdev
->config
.rv770
.max_stack_entries
* 1)/4) |
1009 NUM_VS_STACK_ENTRIES((rdev
->config
.rv770
.max_stack_entries
* 1)/4)));
1011 WREG32(SQ_STACK_RESOURCE_MGMT_2
, (NUM_GS_STACK_ENTRIES((rdev
->config
.rv770
.max_stack_entries
* 1)/4) |
1012 NUM_ES_STACK_ENTRIES((rdev
->config
.rv770
.max_stack_entries
* 1)/4)));
1014 sq_dyn_gpr_size_simd_ab_0
= (SIMDA_RING0((rdev
->config
.rv770
.max_gprs
* 38)/64) |
1015 SIMDA_RING1((rdev
->config
.rv770
.max_gprs
* 38)/64) |
1016 SIMDB_RING0((rdev
->config
.rv770
.max_gprs
* 38)/64) |
1017 SIMDB_RING1((rdev
->config
.rv770
.max_gprs
* 38)/64));
1019 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0
, sq_dyn_gpr_size_simd_ab_0
);
1020 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1
, sq_dyn_gpr_size_simd_ab_0
);
1021 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2
, sq_dyn_gpr_size_simd_ab_0
);
1022 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3
, sq_dyn_gpr_size_simd_ab_0
);
1023 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4
, sq_dyn_gpr_size_simd_ab_0
);
1024 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5
, sq_dyn_gpr_size_simd_ab_0
);
1025 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6
, sq_dyn_gpr_size_simd_ab_0
);
1026 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7
, sq_dyn_gpr_size_simd_ab_0
);
1028 WREG32(PA_SC_FORCE_EOV_MAX_CNTS
, (FORCE_EOV_MAX_CLK_CNT(4095) |
1029 FORCE_EOV_MAX_REZ_CNT(255)));
1031 if (rdev
->family
== CHIP_RV710
)
1032 WREG32(VGT_CACHE_INVALIDATION
, (CACHE_INVALIDATION(TC_ONLY
) |
1033 AUTO_INVLD_EN(ES_AND_GS_AUTO
)));
1035 WREG32(VGT_CACHE_INVALIDATION
, (CACHE_INVALIDATION(VC_AND_TC
) |
1036 AUTO_INVLD_EN(ES_AND_GS_AUTO
)));
1038 switch (rdev
->family
) {
1042 gs_prim_buffer_depth
= 384;
1045 gs_prim_buffer_depth
= 128;
1051 num_gs_verts_per_thread
= rdev
->config
.rv770
.max_pipes
* 16;
1052 vgt_gs_per_es
= gs_prim_buffer_depth
+ num_gs_verts_per_thread
;
1053 /* Max value for this is 256 */
1054 if (vgt_gs_per_es
> 256)
1055 vgt_gs_per_es
= 256;
1057 WREG32(VGT_ES_PER_GS
, 128);
1058 WREG32(VGT_GS_PER_ES
, vgt_gs_per_es
);
1059 WREG32(VGT_GS_PER_VS
, 2);
1061 /* more default values. 2D/3D driver should adjust as needed */
1062 WREG32(VGT_GS_VERTEX_REUSE
, 16);
1063 WREG32(PA_SC_LINE_STIPPLE_STATE
, 0);
1064 WREG32(VGT_STRMOUT_EN
, 0);
1066 WREG32(PA_SC_MODE_CNTL
, 0);
1067 WREG32(PA_SC_EDGERULE
, 0xaaaaaaaa);
1068 WREG32(PA_SC_AA_CONFIG
, 0);
1069 WREG32(PA_SC_CLIPRECT_RULE
, 0xffff);
1070 WREG32(PA_SC_LINE_STIPPLE
, 0);
1071 WREG32(SPI_INPUT_Z
, 0);
1072 WREG32(SPI_PS_IN_CONTROL_0
, NUM_INTERP(2));
1073 WREG32(CB_COLOR7_FRAG
, 0);
1075 /* clear render buffer base addresses */
1076 WREG32(CB_COLOR0_BASE
, 0);
1077 WREG32(CB_COLOR1_BASE
, 0);
1078 WREG32(CB_COLOR2_BASE
, 0);
1079 WREG32(CB_COLOR3_BASE
, 0);
1080 WREG32(CB_COLOR4_BASE
, 0);
1081 WREG32(CB_COLOR5_BASE
, 0);
1082 WREG32(CB_COLOR6_BASE
, 0);
1083 WREG32(CB_COLOR7_BASE
, 0);
1085 WREG32(TCP_CNTL
, 0);
1087 hdp_host_path_cntl
= RREG32(HDP_HOST_PATH_CNTL
);
1088 WREG32(HDP_HOST_PATH_CNTL
, hdp_host_path_cntl
);
1090 WREG32(PA_SC_MULTI_CHIP_CNTL
, 0);
1092 WREG32(PA_CL_ENHANCE
, (CLIP_VTX_REORDER_ENA
|
1094 WREG32(VC_ENHANCE
, 0);
1097 void r700_vram_gtt_location(struct radeon_device
*rdev
, struct radeon_mc
*mc
)
1099 u64 size_bf
, size_af
;
1101 if (mc
->mc_vram_size
> 0xE0000000) {
1102 /* leave room for at least 512M GTT */
1103 dev_warn(rdev
->dev
, "limiting VRAM\n");
1104 mc
->real_vram_size
= 0xE0000000;
1105 mc
->mc_vram_size
= 0xE0000000;
1107 if (rdev
->flags
& RADEON_IS_AGP
) {
1108 size_bf
= mc
->gtt_start
;
1109 size_af
= mc
->mc_mask
- mc
->gtt_end
;
1110 if (size_bf
> size_af
) {
1111 if (mc
->mc_vram_size
> size_bf
) {
1112 dev_warn(rdev
->dev
, "limiting VRAM\n");
1113 mc
->real_vram_size
= size_bf
;
1114 mc
->mc_vram_size
= size_bf
;
1116 mc
->vram_start
= mc
->gtt_start
- mc
->mc_vram_size
;
1118 if (mc
->mc_vram_size
> size_af
) {
1119 dev_warn(rdev
->dev
, "limiting VRAM\n");
1120 mc
->real_vram_size
= size_af
;
1121 mc
->mc_vram_size
= size_af
;
1123 mc
->vram_start
= mc
->gtt_end
+ 1;
1125 mc
->vram_end
= mc
->vram_start
+ mc
->mc_vram_size
- 1;
1126 dev_info(rdev
->dev
, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n",
1127 mc
->mc_vram_size
>> 20, mc
->vram_start
,
1128 mc
->vram_end
, mc
->real_vram_size
>> 20);
1130 radeon_vram_location(rdev
, &rdev
->mc
, 0);
1131 rdev
->mc
.gtt_base_align
= 0;
1132 radeon_gtt_location(rdev
, mc
);
1136 static int rv770_mc_init(struct radeon_device
*rdev
)
1139 int chansize
, numchan
;
1141 /* Get VRAM informations */
1142 rdev
->mc
.vram_is_ddr
= true;
1143 tmp
= RREG32(MC_ARB_RAMCFG
);
1144 if (tmp
& CHANSIZE_OVERRIDE
) {
1146 } else if (tmp
& CHANSIZE_MASK
) {
1151 tmp
= RREG32(MC_SHARED_CHMAP
);
1152 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
1167 rdev
->mc
.vram_width
= numchan
* chansize
;
1168 /* Could aper size report 0 ? */
1169 rdev
->mc
.aper_base
= pci_resource_start(rdev
->pdev
, 0);
1170 rdev
->mc
.aper_size
= pci_resource_len(rdev
->pdev
, 0);
1171 /* Setup GPU memory space */
1172 rdev
->mc
.mc_vram_size
= RREG32(CONFIG_MEMSIZE
);
1173 rdev
->mc
.real_vram_size
= RREG32(CONFIG_MEMSIZE
);
1174 rdev
->mc
.visible_vram_size
= rdev
->mc
.aper_size
;
1175 r700_vram_gtt_location(rdev
, &rdev
->mc
);
1176 radeon_update_bandwidth_info(rdev
);
1182 * rv770_copy_dma - copy pages using the DMA engine
1184 * @rdev: radeon_device pointer
1185 * @src_offset: src GPU address
1186 * @dst_offset: dst GPU address
1187 * @num_gpu_pages: number of GPU pages to xfer
1188 * @fence: radeon fence object
1190 * Copy GPU paging using the DMA engine (r7xx).
1191 * Used by the radeon ttm implementation to move pages if
1192 * registered as the asic copy callback.
1194 int rv770_copy_dma(struct radeon_device
*rdev
,
1195 uint64_t src_offset
, uint64_t dst_offset
,
1196 unsigned num_gpu_pages
,
1197 struct radeon_fence
**fence
)
1199 struct radeon_semaphore
*sem
= NULL
;
1200 int ring_index
= rdev
->asic
->copy
.dma_ring_index
;
1201 struct radeon_ring
*ring
= &rdev
->ring
[ring_index
];
1202 u32 size_in_dw
, cur_size_in_dw
;
1206 r
= radeon_semaphore_create(rdev
, &sem
);
1208 DRM_ERROR("radeon: moving bo (%d).\n", r
);
1212 size_in_dw
= (num_gpu_pages
<< RADEON_GPU_PAGE_SHIFT
) / 4;
1213 num_loops
= DIV_ROUND_UP(size_in_dw
, 0xFFFF);
1214 r
= radeon_ring_lock(rdev
, ring
, num_loops
* 5 + 8);
1216 DRM_ERROR("radeon: moving bo (%d).\n", r
);
1217 radeon_semaphore_free(rdev
, &sem
, NULL
);
1221 if (radeon_fence_need_sync(*fence
, ring
->idx
)) {
1222 radeon_semaphore_sync_rings(rdev
, sem
, (*fence
)->ring
,
1224 radeon_fence_note_sync(*fence
, ring
->idx
);
1226 radeon_semaphore_free(rdev
, &sem
, NULL
);
1229 for (i
= 0; i
< num_loops
; i
++) {
1230 cur_size_in_dw
= size_in_dw
;
1231 if (cur_size_in_dw
> 0xFFFF)
1232 cur_size_in_dw
= 0xFFFF;
1233 size_in_dw
-= cur_size_in_dw
;
1234 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_COPY
, 0, 0, cur_size_in_dw
));
1235 radeon_ring_write(ring
, dst_offset
& 0xfffffffc);
1236 radeon_ring_write(ring
, src_offset
& 0xfffffffc);
1237 radeon_ring_write(ring
, upper_32_bits(dst_offset
) & 0xff);
1238 radeon_ring_write(ring
, upper_32_bits(src_offset
) & 0xff);
1239 src_offset
+= cur_size_in_dw
* 4;
1240 dst_offset
+= cur_size_in_dw
* 4;
1243 r
= radeon_fence_emit(rdev
, fence
, ring
->idx
);
1245 radeon_ring_unlock_undo(rdev
, ring
);
1249 radeon_ring_unlock_commit(rdev
, ring
);
1250 radeon_semaphore_free(rdev
, &sem
, *fence
);
1255 static int rv770_startup(struct radeon_device
*rdev
)
1257 struct radeon_ring
*ring
;
1260 /* enable pcie gen2 link */
1261 rv770_pcie_gen2_enable(rdev
);
1263 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->rlc_fw
) {
1264 r
= r600_init_microcode(rdev
);
1266 DRM_ERROR("Failed to load firmware!\n");
1271 r
= r600_vram_scratch_init(rdev
);
1275 rv770_mc_program(rdev
);
1276 if (rdev
->flags
& RADEON_IS_AGP
) {
1277 rv770_agp_enable(rdev
);
1279 r
= rv770_pcie_gart_enable(rdev
);
1284 rv770_gpu_init(rdev
);
1285 r
= r600_blit_init(rdev
);
1287 r600_blit_fini(rdev
);
1288 rdev
->asic
->copy
.copy
= NULL
;
1289 dev_warn(rdev
->dev
, "failed blitter (%d) falling back to memcpy\n", r
);
1292 /* allocate wb buffer */
1293 r
= radeon_wb_init(rdev
);
1297 r
= radeon_fence_driver_start_ring(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
1299 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
1303 r
= radeon_fence_driver_start_ring(rdev
, R600_RING_TYPE_DMA_INDEX
);
1305 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
1309 r
= rv770_uvd_resume(rdev
);
1311 r
= radeon_fence_driver_start_ring(rdev
,
1312 R600_RING_TYPE_UVD_INDEX
);
1314 dev_err(rdev
->dev
, "UVD fences init error (%d).\n", r
);
1318 rdev
->ring
[R600_RING_TYPE_UVD_INDEX
].ring_size
= 0;
1321 r
= r600_irq_init(rdev
);
1323 DRM_ERROR("radeon: IH init failed (%d).\n", r
);
1324 radeon_irq_kms_fini(rdev
);
1329 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
1330 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP_RPTR_OFFSET
,
1331 R600_CP_RB_RPTR
, R600_CP_RB_WPTR
,
1332 0, 0xfffff, RADEON_CP_PACKET2
);
1336 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
1337 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, R600_WB_DMA_RPTR_OFFSET
,
1338 DMA_RB_RPTR
, DMA_RB_WPTR
,
1339 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0));
1343 r
= rv770_cp_load_microcode(rdev
);
1346 r
= r600_cp_resume(rdev
);
1350 r
= r600_dma_resume(rdev
);
1354 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
1355 if (ring
->ring_size
) {
1356 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
,
1357 R600_WB_UVD_RPTR_OFFSET
,
1358 UVD_RBC_RB_RPTR
, UVD_RBC_RB_WPTR
,
1359 0, 0xfffff, RADEON_CP_PACKET2
);
1361 r
= r600_uvd_init(rdev
);
1364 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r
);
1367 r
= radeon_ib_pool_init(rdev
);
1369 dev_err(rdev
->dev
, "IB initialization failed (%d).\n", r
);
1373 r
= r600_audio_init(rdev
);
1375 DRM_ERROR("radeon: audio init failed\n");
1382 int rv770_resume(struct radeon_device
*rdev
)
1386 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1387 * posting will perform necessary task to bring back GPU into good
1391 atom_asic_init(rdev
->mode_info
.atom_context
);
1393 rdev
->accel_working
= true;
1394 r
= rv770_startup(rdev
);
1396 DRM_ERROR("r600 startup failed on resume\n");
1397 rdev
->accel_working
= false;
1405 int rv770_suspend(struct radeon_device
*rdev
)
1407 r600_audio_fini(rdev
);
1408 radeon_uvd_suspend(rdev
);
1410 r600_dma_stop(rdev
);
1411 r600_irq_suspend(rdev
);
1412 radeon_wb_disable(rdev
);
1413 rv770_pcie_gart_disable(rdev
);
1418 /* Plan is to move initialization in that function and use
1419 * helper function so that radeon_device_init pretty much
1420 * do nothing more than calling asic specific function. This
1421 * should also allow to remove a bunch of callback function
1424 int rv770_init(struct radeon_device
*rdev
)
1429 if (!radeon_get_bios(rdev
)) {
1430 if (ASIC_IS_AVIVO(rdev
))
1433 /* Must be an ATOMBIOS */
1434 if (!rdev
->is_atom_bios
) {
1435 dev_err(rdev
->dev
, "Expecting atombios for R600 GPU\n");
1438 r
= radeon_atombios_init(rdev
);
1441 /* Post card if necessary */
1442 if (!radeon_card_posted(rdev
)) {
1444 dev_err(rdev
->dev
, "Card not posted and no BIOS - ignoring\n");
1447 DRM_INFO("GPU not posted. posting now...\n");
1448 atom_asic_init(rdev
->mode_info
.atom_context
);
1450 /* Initialize scratch registers */
1451 r600_scratch_init(rdev
);
1452 /* Initialize surface registers */
1453 radeon_surface_init(rdev
);
1454 /* Initialize clocks */
1455 radeon_get_clock_info(rdev
->ddev
);
1457 r
= radeon_fence_driver_init(rdev
);
1460 /* initialize AGP */
1461 if (rdev
->flags
& RADEON_IS_AGP
) {
1462 r
= radeon_agp_init(rdev
);
1464 radeon_agp_disable(rdev
);
1466 r
= rv770_mc_init(rdev
);
1469 /* Memory manager */
1470 r
= radeon_bo_init(rdev
);
1474 r
= radeon_irq_kms_init(rdev
);
1478 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ring_obj
= NULL
;
1479 r600_ring_init(rdev
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
], 1024 * 1024);
1481 rdev
->ring
[R600_RING_TYPE_DMA_INDEX
].ring_obj
= NULL
;
1482 r600_ring_init(rdev
, &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
], 64 * 1024);
1484 r
= radeon_uvd_init(rdev
);
1486 rdev
->ring
[R600_RING_TYPE_UVD_INDEX
].ring_obj
= NULL
;
1487 r600_ring_init(rdev
, &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
],
1491 rdev
->ih
.ring_obj
= NULL
;
1492 r600_ih_ring_init(rdev
, 64 * 1024);
1494 r
= r600_pcie_gart_init(rdev
);
1498 rdev
->accel_working
= true;
1499 r
= rv770_startup(rdev
);
1501 dev_err(rdev
->dev
, "disabling GPU acceleration\n");
1503 r600_dma_fini(rdev
);
1504 r600_irq_fini(rdev
);
1505 radeon_wb_fini(rdev
);
1506 radeon_ib_pool_fini(rdev
);
1507 radeon_irq_kms_fini(rdev
);
1508 rv770_pcie_gart_fini(rdev
);
1509 rdev
->accel_working
= false;
1515 void rv770_fini(struct radeon_device
*rdev
)
1517 r600_blit_fini(rdev
);
1519 r600_dma_fini(rdev
);
1520 r600_irq_fini(rdev
);
1521 radeon_wb_fini(rdev
);
1522 radeon_ib_pool_fini(rdev
);
1523 radeon_irq_kms_fini(rdev
);
1524 rv770_pcie_gart_fini(rdev
);
1525 radeon_uvd_fini(rdev
);
1526 r600_vram_scratch_fini(rdev
);
1527 radeon_gem_fini(rdev
);
1528 radeon_fence_driver_fini(rdev
);
1529 radeon_agp_fini(rdev
);
1530 radeon_bo_fini(rdev
);
1531 radeon_atombios_fini(rdev
);
1536 static void rv770_pcie_gen2_enable(struct radeon_device
*rdev
)
1538 u32 link_width_cntl
, lanes
, speed_cntl
, tmp
;
1543 if (radeon_pcie_gen2
== 0)
1546 if (rdev
->flags
& RADEON_IS_IGP
)
1549 if (!(rdev
->flags
& RADEON_IS_PCIE
))
1552 /* x2 cards have a special sequence */
1553 if (ASIC_IS_X2(rdev
))
1556 ret
= drm_pcie_get_speed_cap_mask(rdev
->ddev
, &mask
);
1560 if (!(mask
& DRM_PCIE_SPEED_50
))
1563 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
1565 /* advertise upconfig capability */
1566 link_width_cntl
= RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
);
1567 link_width_cntl
&= ~LC_UPCONFIGURE_DIS
;
1568 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, link_width_cntl
);
1569 link_width_cntl
= RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
);
1570 if (link_width_cntl
& LC_RENEGOTIATION_SUPPORT
) {
1571 lanes
= (link_width_cntl
& LC_LINK_WIDTH_RD_MASK
) >> LC_LINK_WIDTH_RD_SHIFT
;
1572 link_width_cntl
&= ~(LC_LINK_WIDTH_MASK
|
1573 LC_RECONFIG_ARC_MISSING_ESCAPE
);
1574 link_width_cntl
|= lanes
| LC_RECONFIG_NOW
|
1575 LC_RENEGOTIATE_EN
| LC_UPCONFIGURE_SUPPORT
;
1576 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, link_width_cntl
);
1578 link_width_cntl
|= LC_UPCONFIGURE_DIS
;
1579 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, link_width_cntl
);
1582 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
1583 if ((speed_cntl
& LC_OTHER_SIDE_EVER_SENT_GEN2
) &&
1584 (speed_cntl
& LC_OTHER_SIDE_SUPPORTS_GEN2
)) {
1586 tmp
= RREG32(0x541c);
1587 WREG32(0x541c, tmp
| 0x8);
1588 WREG32(MM_CFGREGS_CNTL
, MM_WR_TO_CFG_EN
);
1589 link_cntl2
= RREG16(0x4088);
1590 link_cntl2
&= ~TARGET_LINK_SPEED_MASK
;
1592 WREG16(0x4088, link_cntl2
);
1593 WREG32(MM_CFGREGS_CNTL
, 0);
1595 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
1596 speed_cntl
&= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN
;
1597 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
1599 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
1600 speed_cntl
|= LC_CLR_FAILED_SPD_CHANGE_CNT
;
1601 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
1603 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
1604 speed_cntl
&= ~LC_CLR_FAILED_SPD_CHANGE_CNT
;
1605 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
1607 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
1608 speed_cntl
|= LC_GEN2_EN_STRAP
;
1609 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
1612 link_width_cntl
= RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
);
1613 /* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
1615 link_width_cntl
|= LC_UPCONFIGURE_DIS
;
1617 link_width_cntl
&= ~LC_UPCONFIGURE_DIS
;
1618 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, link_width_cntl
);