2 * Copyright 1993-2003 NVIDIA, Corporation
3 * Copyright 2007-2009 Stuart Bennett
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "nouveau_drv.h"
26 #include "nouveau_hw.h"
28 /****************************************************************************\
30 * The video arbitration routines calculate some "magic" numbers. Fixes *
31 * the snow seen when accessing the framebuffer without it. *
32 * It just works (I hope). *
34 \****************************************************************************/
54 nv04_calc_arb(struct nv_fifo_info
*fifo
, struct nv_sim_state
*arb
)
56 int pagemiss
, cas
, width
, bpp
;
57 int nvclks
, mclks
, pclks
, crtpagemiss
;
58 int found
, mclk_extra
, mclk_loop
, cbs
, m1
, p1
;
59 int mclk_freq
, pclk_freq
, nvclk_freq
;
60 int us_m
, us_n
, us_p
, crtc_drain_rate
;
61 int cpm_us
, us_crt
, clwm
;
63 pclk_freq
= arb
->pclk_khz
;
64 mclk_freq
= arb
->mclk_khz
;
65 nvclk_freq
= arb
->nvclk_khz
;
66 pagemiss
= arb
->mem_page_miss
;
67 cas
= arb
->mem_latency
;
68 width
= arb
->memory_width
>> 6;
81 mclk_loop
= mclks
+ mclk_extra
;
82 us_m
= mclk_loop
* 1000 * 1000 / mclk_freq
;
83 us_n
= nvclks
* 1000 * 1000 / nvclk_freq
;
84 us_p
= nvclks
* 1000 * 1000 / pclk_freq
;
86 crtc_drain_rate
= pclk_freq
* bpp
/ 8;
89 cpm_us
= crtpagemiss
* pagemiss
* 1000 * 1000 / mclk_freq
;
90 us_crt
= cpm_us
+ us_m
+ us_n
+ us_p
;
91 clwm
= us_crt
* crtc_drain_rate
/ (1000 * 1000);
94 m1
= clwm
+ cbs
- 512;
95 p1
= m1
* pclk_freq
/ mclk_freq
;
97 if ((p1
< m1
&& m1
> 0) || clwm
> 519) {
110 nv10_calc_arb(struct nv_fifo_info
*fifo
, struct nv_sim_state
*arb
)
112 int fill_rate
, drain_rate
;
113 int pclks
, nvclks
, mclks
, xclks
;
114 int pclk_freq
, nvclk_freq
, mclk_freq
;
115 int fill_lat
, extra_lat
;
116 int max_burst_o
, max_burst_l
;
117 int fifo_len
, min_lwm
, max_lwm
;
118 const int burst_lat
= 80; /* Maximum allowable latency due
119 * to the CRTC FIFO burst. (ns) */
121 pclk_freq
= arb
->pclk_khz
;
122 nvclk_freq
= arb
->nvclk_khz
;
123 mclk_freq
= arb
->mclk_khz
;
125 fill_rate
= mclk_freq
* arb
->memory_width
/ 8; /* kB/s */
126 drain_rate
= pclk_freq
* arb
->bpp
/ 8; /* kB/s */
128 fifo_len
= arb
->two_heads
? 1536 : 1024; /* B */
130 /* Fixed FIFO refill latency. */
132 pclks
= 4; /* lwm detect. */
134 nvclks
= 3 /* lwm -> sync. */
135 + 2 /* fbi bus cycles (1 req + 1 busy) */
136 + 1 /* 2 edge sync. may be very close to edge so
138 + 1 /* fbi_d_rdv_n */
139 + 1 /* Fbi_d_rdata */
140 + 1; /* crtfifo load */
142 mclks
= 1 /* 2 edge sync. may be very close to edge so
145 + 5 /* tiling pipeline */
146 + 2 /* latency fifo */
147 + 2 /* memory request to fbio block */
148 + 7; /* data returned from fbio block */
150 /* Need to accumulate 256 bits for read */
151 mclks
+= (arb
->memory_type
== 0 ? 2 : 1)
152 * arb
->memory_width
/ 32;
154 fill_lat
= mclks
* 1000 * 1000 / mclk_freq
/* minimum mclk latency */
155 + nvclks
* 1000 * 1000 / nvclk_freq
/* nvclk latency */
156 + pclks
* 1000 * 1000 / pclk_freq
; /* pclk latency */
158 /* Conditional FIFO refill latency. */
160 xclks
= 2 * arb
->mem_page_miss
+ mclks
/* Extra latency due to
162 + 2 * arb
->mem_page_miss
/* Extra pagemiss latency. */
163 + (arb
->bpp
== 32 ? 8 : 4); /* Margin of error. */
165 extra_lat
= xclks
* 1000 * 1000 / mclk_freq
;
168 /* Account for another CRTC. */
169 extra_lat
+= fill_lat
+ extra_lat
+ burst_lat
;
173 /* Max burst not leading to overflows. */
174 max_burst_o
= (1 + fifo_len
- extra_lat
* drain_rate
/ (1000 * 1000))
175 * (fill_rate
/ 1000) / ((fill_rate
- drain_rate
) / 1000);
176 fifo
->burst
= min(max_burst_o
, 1024);
178 /* Max burst value with an acceptable latency. */
179 max_burst_l
= burst_lat
* fill_rate
/ (1000 * 1000);
180 fifo
->burst
= min(max_burst_l
, fifo
->burst
);
182 fifo
->burst
= rounddown_pow_of_two(fifo
->burst
);
184 /* FIFO low watermark */
186 min_lwm
= (fill_lat
+ extra_lat
) * drain_rate
/ (1000 * 1000) + 1;
187 max_lwm
= fifo_len
- fifo
->burst
188 + fill_lat
* drain_rate
/ (1000 * 1000)
189 + fifo
->burst
* drain_rate
/ fill_rate
;
191 fifo
->lwm
= min_lwm
+ 10 * (max_lwm
- min_lwm
) / 100; /* Empirical. */
195 nv04_update_arb(struct drm_device
*dev
, int VClk
, int bpp
,
196 int *burst
, int *lwm
)
198 struct drm_nouveau_private
*dev_priv
= dev
->dev_private
;
199 struct nv_fifo_info fifo_data
;
200 struct nv_sim_state sim_data
;
201 int MClk
= nouveau_hw_get_clock(dev
, MPLL
);
202 int NVClk
= nouveau_hw_get_clock(dev
, NVPLL
);
203 uint32_t cfg1
= nvReadFB(dev
, NV_PFB_CFG1
);
205 sim_data
.pclk_khz
= VClk
;
206 sim_data
.mclk_khz
= MClk
;
207 sim_data
.nvclk_khz
= NVClk
;
209 sim_data
.two_heads
= nv_two_heads(dev
);
210 if ((dev
->pci_device
& 0xffff) == 0x01a0 /*CHIPSET_NFORCE*/ ||
211 (dev
->pci_device
& 0xffff) == 0x01f0 /*CHIPSET_NFORCE2*/) {
214 pci_read_config_dword(pci_get_bus_and_slot(0, 1), 0x7c, &type
);
216 sim_data
.memory_type
= (type
>> 12) & 1;
217 sim_data
.memory_width
= 64;
218 sim_data
.mem_latency
= 3;
219 sim_data
.mem_page_miss
= 10;
221 sim_data
.memory_type
= nvReadFB(dev
, NV_PFB_CFG0
) & 0x1;
222 sim_data
.memory_width
= (nvReadEXTDEV(dev
, NV_PEXTDEV_BOOT_0
) & 0x10) ? 128 : 64;
223 sim_data
.mem_latency
= cfg1
& 0xf;
224 sim_data
.mem_page_miss
= ((cfg1
>> 4) & 0xf) + ((cfg1
>> 31) & 0x1);
227 if (dev_priv
->card_type
== NV_04
)
228 nv04_calc_arb(&fifo_data
, &sim_data
);
230 nv10_calc_arb(&fifo_data
, &sim_data
);
232 *burst
= ilog2(fifo_data
.burst
>> 4);
233 *lwm
= fifo_data
.lwm
>> 3;
237 nv30_update_arb(int *burst
, int *lwm
)
239 unsigned int fifo_size
, burst_size
, graphics_lwm
;
243 graphics_lwm
= fifo_size
- burst_size
;
245 *burst
= ilog2(burst_size
>> 5);
246 *lwm
= graphics_lwm
>> 3;
250 nouveau_calc_arb(struct drm_device
*dev
, int vclk
, int bpp
, int *burst
, int *lwm
)
252 struct drm_nouveau_private
*dev_priv
= dev
->dev_private
;
254 if (dev_priv
->card_type
< NV_30
)
255 nv04_update_arb(dev
, vclk
, bpp
, burst
, lwm
);
256 else if ((dev
->pci_device
& 0xfff0) == 0x0240 /*CHIPSET_C51*/ ||
257 (dev
->pci_device
& 0xfff0) == 0x03d0 /*CHIPSET_C512*/) {
261 nv30_update_arb(burst
, lwm
);
265 getMNP_single(struct drm_device
*dev
, struct pll_lims
*pll_lim
, int clk
,
266 struct nouveau_pll_vals
*bestpv
)
268 /* Find M, N and P for a single stage PLL
270 * Note that some bioses (NV3x) have lookup tables of precomputed MNP
271 * values, but we're too lazy to use those atm
273 * "clk" parameter in kHz
274 * returns calculated clock
276 struct drm_nouveau_private
*dev_priv
= dev
->dev_private
;
277 int cv
= dev_priv
->vbios
.chip_version
;
278 int minvco
= pll_lim
->vco1
.minfreq
, maxvco
= pll_lim
->vco1
.maxfreq
;
279 int minM
= pll_lim
->vco1
.min_m
, maxM
= pll_lim
->vco1
.max_m
;
280 int minN
= pll_lim
->vco1
.min_n
, maxN
= pll_lim
->vco1
.max_n
;
281 int minU
= pll_lim
->vco1
.min_inputfreq
;
282 int maxU
= pll_lim
->vco1
.max_inputfreq
;
283 int minP
= pll_lim
->max_p
? pll_lim
->min_p
: 0;
284 int maxP
= pll_lim
->max_p
? pll_lim
->max_p
: pll_lim
->max_usable_log2p
;
285 int crystal
= pll_lim
->refclk
;
288 int delta
, bestdelta
= INT_MAX
;
291 /* this division verified for nv20, nv18, nv28 (Haiku), and nv34 */
292 /* possibly correlated with introduction of 27MHz crystal */
293 if (dev_priv
->card_type
< NV_50
) {
294 if (cv
< 0x17 || cv
== 0x1a || cv
== 0x20) {
299 } else if (cv
< 0x40) {
309 P
= pll_lim
->max_p
? maxP
: (1 << maxP
);
310 if ((clk
* P
) < minvco
) {
315 if (clk
+ clk
/200 > maxvco
) /* +0.5% */
316 maxvco
= clk
+ clk
/200;
318 /* NV34 goes maxlog2P->0, NV20 goes 0->maxlog2P */
319 for (thisP
= minP
; thisP
<= maxP
; thisP
++) {
320 P
= pll_lim
->max_p
? thisP
: (1 << thisP
);
328 for (M
= minM
; M
<= maxM
; M
++) {
329 if (crystal
/M
< minU
)
331 if (crystal
/M
> maxU
)
334 /* add crystal/2 to round better */
335 N
= (clkP
* M
+ crystal
/2) / crystal
;
342 /* more rounding additions */
343 calcclk
= ((N
* crystal
+ P
/2) / P
+ M
/2) / M
;
344 delta
= abs(calcclk
- clk
);
345 /* we do an exhaustive search rather than terminating
346 * on an optimality condition...
348 if (delta
< bestdelta
) {
353 bestpv
->log2P
= thisP
;
354 if (delta
== 0) /* except this one */
364 getMNP_double(struct drm_device
*dev
, struct pll_lims
*pll_lim
, int clk
,
365 struct nouveau_pll_vals
*bestpv
)
367 /* Find M, N and P for a two stage PLL
369 * Note that some bioses (NV30+) have lookup tables of precomputed MNP
370 * values, but we're too lazy to use those atm
372 * "clk" parameter in kHz
373 * returns calculated clock
375 struct drm_nouveau_private
*dev_priv
= dev
->dev_private
;
376 int chip_version
= dev_priv
->vbios
.chip_version
;
377 int minvco1
= pll_lim
->vco1
.minfreq
, maxvco1
= pll_lim
->vco1
.maxfreq
;
378 int minvco2
= pll_lim
->vco2
.minfreq
, maxvco2
= pll_lim
->vco2
.maxfreq
;
379 int minU1
= pll_lim
->vco1
.min_inputfreq
, minU2
= pll_lim
->vco2
.min_inputfreq
;
380 int maxU1
= pll_lim
->vco1
.max_inputfreq
, maxU2
= pll_lim
->vco2
.max_inputfreq
;
381 int minM1
= pll_lim
->vco1
.min_m
, maxM1
= pll_lim
->vco1
.max_m
;
382 int minN1
= pll_lim
->vco1
.min_n
, maxN1
= pll_lim
->vco1
.max_n
;
383 int minM2
= pll_lim
->vco2
.min_m
, maxM2
= pll_lim
->vco2
.max_m
;
384 int minN2
= pll_lim
->vco2
.min_n
, maxN2
= pll_lim
->vco2
.max_n
;
385 int maxlog2P
= pll_lim
->max_usable_log2p
;
386 int crystal
= pll_lim
->refclk
;
387 bool fixedgain2
= (minM2
== maxM2
&& minN2
== maxN2
);
388 int M1
, N1
, M2
, N2
, log2P
;
389 int clkP
, calcclk1
, calcclk2
, calcclkout
;
390 int delta
, bestdelta
= INT_MAX
;
393 int vco2
= (maxvco2
- maxvco2
/200) / 2;
394 for (log2P
= 0; clk
&& log2P
< maxlog2P
&& clk
<= (vco2
>> log2P
); log2P
++)
398 if (maxvco2
< clk
+ clk
/200) /* +0.5% */
399 maxvco2
= clk
+ clk
/200;
401 for (M1
= minM1
; M1
<= maxM1
; M1
++) {
402 if (crystal
/M1
< minU1
)
404 if (crystal
/M1
> maxU1
)
407 for (N1
= minN1
; N1
<= maxN1
; N1
++) {
408 calcclk1
= crystal
* N1
/ M1
;
409 if (calcclk1
< minvco1
)
411 if (calcclk1
> maxvco1
)
414 for (M2
= minM2
; M2
<= maxM2
; M2
++) {
415 if (calcclk1
/M2
< minU2
)
417 if (calcclk1
/M2
> maxU2
)
420 /* add calcclk1/2 to round better */
421 N2
= (clkP
* M2
+ calcclk1
/2) / calcclk1
;
428 if (chip_version
< 0x60)
429 if (N2
/M2
< 4 || N2
/M2
> 10)
432 calcclk2
= calcclk1
* N2
/ M2
;
433 if (calcclk2
< minvco2
)
435 if (calcclk2
> maxvco2
)
440 calcclkout
= calcclk2
>> log2P
;
441 delta
= abs(calcclkout
- clk
);
442 /* we do an exhaustive search rather than terminating
443 * on an optimality condition...
445 if (delta
< bestdelta
) {
447 bestclk
= calcclkout
;
452 bestpv
->log2P
= log2P
;
453 if (delta
== 0) /* except this one */
464 nouveau_calc_pll_mnp(struct drm_device
*dev
, struct pll_lims
*pll_lim
, int clk
,
465 struct nouveau_pll_vals
*pv
)
469 if (!pll_lim
->vco2
.maxfreq
)
470 outclk
= getMNP_single(dev
, pll_lim
, clk
, pv
);
472 outclk
= getMNP_double(dev
, pll_lim
, clk
, pv
);
475 NV_ERROR(dev
, "Could not find a compatible set of PLL values\n");