1 /* SPDX-License-Identifier: GPL-2.0-only */
4 #include <commonlib/helpers.h>
5 #include <console/console.h>
6 #include <cpu/intel/model_206ax/model_206ax.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <northbridge/intel/sandybridge/chip.h>
10 #include <device/pci_def.h>
14 #include "raminit_common.h"
15 #include "raminit_tables.h"
16 #include "sandybridge.h"
18 /* FIXME: no support for 3-channel chipsets */
20 static void sfence(void)
22 asm volatile ("sfence");
25 /* Toggle IO reset bit */
26 static void toggle_io_reset(void)
28 u32 r32
= mchbar_read32(MC_INIT_STATE_G
);
29 mchbar_write32(MC_INIT_STATE_G
, r32
| (1 << 5));
31 mchbar_write32(MC_INIT_STATE_G
, r32
& ~(1 << 5));
35 static u32
get_XOVER_CLK(u8 rankmap
)
40 static u32
get_XOVER_CMD(u8 rankmap
)
44 /* Enable xover cmd */
47 /* Enable xover ctl */
57 void dram_find_common_params(ramctr_timing
*ctrl
)
61 dimm_info
*dimms
= &ctrl
->info
;
63 ctrl
->cas_supported
= (1 << (MAX_CAS
- MIN_CAS
+ 1)) - 1;
66 FOR_ALL_CHANNELS
for (slot
= 0; slot
< 2; slot
++) {
67 const struct dimm_attr_ddr3_st
*dimm
= &dimms
->dimm
[channel
][slot
];
68 if (dimm
->dram_type
!= SPD_MEMORY_TYPE_SDRAM_DDR3
)
73 /* Find all possible CAS combinations */
74 ctrl
->cas_supported
&= dimm
->cas_supported
;
76 /* Find the smallest common latencies supported by all DIMMs */
77 ctrl
->tCK
= MAX(ctrl
->tCK
, dimm
->tCK
);
78 ctrl
->tAA
= MAX(ctrl
->tAA
, dimm
->tAA
);
79 ctrl
->tWR
= MAX(ctrl
->tWR
, dimm
->tWR
);
80 ctrl
->tRCD
= MAX(ctrl
->tRCD
, dimm
->tRCD
);
81 ctrl
->tRRD
= MAX(ctrl
->tRRD
, dimm
->tRRD
);
82 ctrl
->tRP
= MAX(ctrl
->tRP
, dimm
->tRP
);
83 ctrl
->tRAS
= MAX(ctrl
->tRAS
, dimm
->tRAS
);
84 ctrl
->tRFC
= MAX(ctrl
->tRFC
, dimm
->tRFC
);
85 ctrl
->tWTR
= MAX(ctrl
->tWTR
, dimm
->tWTR
);
86 ctrl
->tRTP
= MAX(ctrl
->tRTP
, dimm
->tRTP
);
87 ctrl
->tFAW
= MAX(ctrl
->tFAW
, dimm
->tFAW
);
88 ctrl
->tCWL
= MAX(ctrl
->tCWL
, dimm
->tCWL
);
89 ctrl
->tCMD
= MAX(ctrl
->tCMD
, dimm
->tCMD
);
92 if (!ctrl
->cas_supported
)
93 die("Unsupported DIMM combination. DIMMS do not support common CAS latency");
96 die("No valid DIMMs found");
99 void dram_xover(ramctr_timing
*ctrl
)
105 /* Enable xover clk */
106 reg
= get_XOVER_CLK(ctrl
->rankmap
[channel
]);
107 printram("XOVER CLK [%x] = %x\n", GDCRCKPICODE_ch(channel
), reg
);
108 mchbar_write32(GDCRCKPICODE_ch(channel
), reg
);
110 /* Enable xover ctl & xover cmd */
111 reg
= get_XOVER_CMD(ctrl
->rankmap
[channel
]);
112 printram("XOVER CMD [%x] = %x\n", GDCRCMDPICODING_ch(channel
), reg
);
113 mchbar_write32(GDCRCMDPICODING_ch(channel
), reg
);
117 static void dram_odt_stretch(ramctr_timing
*ctrl
, int channel
)
121 stretch
= ctrl
->ref_card_offset
[channel
];
124 * Delay ODT signal by stretch value. Useful for multi DIMM setups on the same channel.
126 if (IS_SANDY_CPU(ctrl
->cpu
) && IS_SANDY_CPU_C(ctrl
->cpu
)) {
130 addr
= SCHED_SECOND_CBIT_ch(channel
);
131 mchbar_clrsetbits32(addr
, 0xf << 10, stretch
<< 12 | stretch
<< 10);
132 printk(RAM_DEBUG
, "OTHP Workaround [%x] = %x\n", addr
, mchbar_read32(addr
));
134 addr
= TC_OTHP_ch(channel
);
135 union tc_othp_reg tc_othp
= {
136 .raw
= mchbar_read32(addr
),
138 tc_othp
.odt_delay_d0
= stretch
;
139 tc_othp
.odt_delay_d1
= stretch
;
140 mchbar_write32(addr
, tc_othp
.raw
);
141 printk(RAM_DEBUG
, "OTHP [%x] = %x\n", addr
, mchbar_read32(addr
));
145 void dram_timing_regs(ramctr_timing
*ctrl
)
150 const union tc_dbp_reg tc_dbp
= {
158 /* Regular access parameters */
159 const union tc_rap_reg tc_rap
= {
169 /* Other parameters */
170 const union tc_othp_reg tc_othp
= {
171 .tXPDLL
= MIN(ctrl
->tXPDLL
, 31),
172 .tXP
= MIN(ctrl
->tXP
, 7),
173 .tAONPD
= ctrl
->tAONPD
,
179 * If tXP and tXPDLL are very high, they no longer fit in the bitfields
180 * of the TC_OTHP register. If so, we set bits in TC_DTP to compensate.
181 * This can only happen on Ivy Bridge, and when overclocking the RAM.
183 const union tc_dtp_reg tc_dtp
= {
184 .overclock_tXP
= ctrl
->tXP
>= 8,
185 .overclock_tXPDLL
= ctrl
->tXPDLL
>= 32,
189 * TC-Refresh timing parameters:
190 * The tREFIx9 field should be programmed to minimum of 8.9 * tREFI (to allow
191 * for possible delays from ZQ or isoc) and tRASmax (70us) divided by 1024.
193 const u32 val32
= MIN((ctrl
->tREFI
* 89) / 10, (70000 << 8) / ctrl
->tCK
);
195 const union tc_rftp_reg tc_rftp
= {
196 .tREFI
= ctrl
->tREFI
,
198 .tREFIx9
= val32
/ 1024,
201 /* Self-refresh timing parameters */
202 const union tc_srftp_reg tc_srftp
= {
204 .tXS_offset
= ctrl
->tXSOffset
,
205 .tZQOPER
= tDLLK
- ctrl
->tXSOffset
,
206 .tMOD
= ctrl
->tMOD
- 8,
210 printram("DBP [%x] = %x\n", TC_DBP_ch(channel
), tc_dbp
.raw
);
211 mchbar_write32(TC_DBP_ch(channel
), tc_dbp
.raw
);
213 printram("RAP [%x] = %x\n", TC_RAP_ch(channel
), tc_rap
.raw
);
214 mchbar_write32(TC_RAP_ch(channel
), tc_rap
.raw
);
216 printram("OTHP [%x] = %x\n", TC_OTHP_ch(channel
), tc_othp
.raw
);
217 mchbar_write32(TC_OTHP_ch(channel
), tc_othp
.raw
);
219 if (IS_IVY_CPU(ctrl
->cpu
)) {
220 /* Debug parameters - only applies to Ivy Bridge */
221 mchbar_write32(TC_DTP_ch(channel
), tc_dtp
.raw
);
224 dram_odt_stretch(ctrl
, channel
);
226 printram("REFI [%x] = %x\n", TC_RFTP_ch(channel
), tc_rftp
.raw
);
227 mchbar_write32(TC_RFTP_ch(channel
), tc_rftp
.raw
);
229 union tc_rfp_reg tc_rfp
= {
230 .raw
= mchbar_read32(TC_RFP_ch(channel
)),
232 tc_rfp
.oref_ri
= 0xff;
233 mchbar_write32(TC_RFP_ch(channel
), tc_rfp
.raw
);
235 printram("SRFTP [%x] = %x\n", TC_SRFTP_ch(channel
), tc_srftp
.raw
);
236 mchbar_write32(TC_SRFTP_ch(channel
), tc_srftp
.raw
);
240 void dram_dimm_mapping(ramctr_timing
*ctrl
)
243 dimm_info
*info
= &ctrl
->info
;
246 struct dimm_attr_ddr3_st
*dimmA
, *dimmB
;
249 if (info
->dimm
[channel
][0].size_mb
>= info
->dimm
[channel
][1].size_mb
) {
250 dimmA
= &info
->dimm
[channel
][0];
251 dimmB
= &info
->dimm
[channel
][1];
254 dimmA
= &info
->dimm
[channel
][1];
255 dimmB
= &info
->dimm
[channel
][0];
259 if (dimmA
&& (dimmA
->ranks
> 0)) {
260 reg
|= (dimmA
->size_mb
/ 256) << 0;
261 reg
|= (dimmA
->ranks
- 1) << 17;
262 reg
|= (dimmA
->width
/ 8 - 1) << 19;
265 if (dimmB
&& (dimmB
->ranks
> 0)) {
266 reg
|= (dimmB
->size_mb
/ 256) << 8;
267 reg
|= (dimmB
->ranks
- 1) << 18;
268 reg
|= (dimmB
->width
/ 8 - 1) << 20;
272 * Rank interleave: Bit 16 of the physical address space sets
273 * the rank to use in a dual single rank DIMM configuration.
274 * That results in every 64KiB being interleaved between two ranks.
277 /* Enhanced interleave */
280 if ((dimmA
&& (dimmA
->ranks
> 0)) || (dimmB
&& (dimmB
->ranks
> 0))) {
281 ctrl
->mad_dimm
[channel
] = reg
;
283 ctrl
->mad_dimm
[channel
] = 0;
288 void dram_dimm_set_mapping(ramctr_timing
*ctrl
, int training
)
293 if (ctrl
->ecc_enabled
)
294 ecc
= training
? (1 << 24) : (3 << 24);
299 mchbar_write32(MAD_DIMM(channel
), ctrl
->mad_dimm
[channel
] | ecc
);
302 if (ctrl
->ecc_enabled
)
306 void dram_zones(ramctr_timing
*ctrl
, int training
)
308 u32 reg
, ch0size
, ch1size
;
314 ch0size
= ctrl
->channel_size_mb
[0] ? 256 : 0;
315 ch1size
= ctrl
->channel_size_mb
[1] ? 256 : 0;
317 ch0size
= ctrl
->channel_size_mb
[0];
318 ch1size
= ctrl
->channel_size_mb
[1];
321 if (ch0size
>= ch1size
) {
322 reg
= mchbar_read32(MAD_ZR
);
324 reg
= (reg
& ~0xff000000) | val
<< 24;
325 reg
= (reg
& ~0x00ff0000) | (2 * val
) << 16;
326 mchbar_write32(MAD_ZR
, reg
);
327 mchbar_write32(MAD_CHNL
, 0x24);
330 reg
= mchbar_read32(MAD_ZR
);
332 reg
= (reg
& ~0xff000000) | val
<< 24;
333 reg
= (reg
& ~0x00ff0000) | (2 * val
) << 16;
334 mchbar_write32(MAD_ZR
, reg
);
335 mchbar_write32(MAD_CHNL
, 0x21);
340 * Returns the ECC mode the NB is running at. It takes precedence over ECC capability.
341 * The ME/PCU/.. has the ability to change this.
342 * Return 0: ECC is optional
343 * Return 1: ECC is forced
345 bool get_host_ecc_forced(void)
347 /* read Capabilities A Register */
348 const u32 reg32
= pci_read_config32(HOST_BRIDGE
, CAPID0_A
);
349 return !!(reg32
& (1 << 24));
353 * Returns the ECC capability.
354 * The ME/PCU/.. has the ability to change this.
355 * Return 0: ECC is disabled
356 * Return 1: ECC is possible
358 bool get_host_ecc_cap(void)
360 /* read Capabilities A Register */
361 const u32 reg32
= pci_read_config32(HOST_BRIDGE
, CAPID0_A
);
362 return !(reg32
& (1 << 25));
365 #define DEFAULT_PCI_MMIO_SIZE 2048
367 void dram_memorymap(ramctr_timing
*ctrl
, int me_uma_size
)
369 u32 reg
, val
, reclaim
, tom
, gfxstolen
, gttsize
;
370 size_t tsegbase
, toludbase
, remapbase
, gfxstolenbase
, mmiosize
, gttbase
;
371 size_t tsegsize
, touudbase
, remaplimit
, mestolenbase
, tsegbasedelta
;
374 mmiosize
= DEFAULT_PCI_MMIO_SIZE
;
376 ggc
= pci_read_config16(HOST_BRIDGE
, GGC
);
378 gfxstolen
= ((ggc
>> 3) & 0x1f) * 32;
379 gttsize
= ((ggc
>> 8) & 0x3);
385 tsegsize
= CONFIG_SMM_TSEG_SIZE
>> 20;
387 tom
= ctrl
->channel_size_mb
[0] + ctrl
->channel_size_mb
[1];
389 mestolenbase
= tom
- me_uma_size
;
391 toludbase
= MIN(4096 - mmiosize
+ gfxstolen
+ gttsize
+ tsegsize
, tom
- me_uma_size
);
393 gfxstolenbase
= toludbase
- gfxstolen
;
394 gttbase
= gfxstolenbase
- gttsize
;
396 tsegbase
= gttbase
- tsegsize
;
398 /* Round tsegbase down to nearest address aligned to tsegsize */
399 tsegbasedelta
= tsegbase
& (tsegsize
- 1);
400 tsegbase
&= ~(tsegsize
- 1);
402 gttbase
-= tsegbasedelta
;
403 gfxstolenbase
-= tsegbasedelta
;
404 toludbase
-= tsegbasedelta
;
406 /* Test if it is possible to reclaim a hole in the RAM addressing */
407 if (tom
- me_uma_size
> toludbase
) {
408 /* Reclaim is possible */
410 remapbase
= MAX(4096, tom
- me_uma_size
);
411 remaplimit
= remapbase
+ MIN(4096, tom
- me_uma_size
) - toludbase
- 1;
412 touudbase
= remaplimit
+ 1;
414 /* Reclaim not possible */
416 touudbase
= tom
- me_uma_size
;
419 /* Update memory map in PCIe configuration space */
420 printk(BIOS_DEBUG
, "Update PCI-E configuration space:\n");
422 /* TOM (top of memory) */
423 reg
= pci_read_config32(HOST_BRIDGE
, TOM
);
425 reg
= (reg
& ~0xfff00000) | (val
<< 20);
426 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", TOM
, reg
);
427 pci_write_config32(HOST_BRIDGE
, TOM
, reg
);
429 reg
= pci_read_config32(HOST_BRIDGE
, TOM
+ 4);
430 val
= tom
& 0xfffff000;
431 reg
= (reg
& ~0x000fffff) | (val
>> 12);
432 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", TOM
+ 4, reg
);
433 pci_write_config32(HOST_BRIDGE
, TOM
+ 4, reg
);
435 /* TOLUD (Top Of Low Usable DRAM) */
436 reg
= pci_read_config32(HOST_BRIDGE
, TOLUD
);
437 val
= toludbase
& 0xfff;
438 reg
= (reg
& ~0xfff00000) | (val
<< 20);
439 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", TOLUD
, reg
);
440 pci_write_config32(HOST_BRIDGE
, TOLUD
, reg
);
442 /* TOUUD LSB (Top Of Upper Usable DRAM) */
443 reg
= pci_read_config32(HOST_BRIDGE
, TOUUD
);
444 val
= touudbase
& 0xfff;
445 reg
= (reg
& ~0xfff00000) | (val
<< 20);
446 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", TOUUD
, reg
);
447 pci_write_config32(HOST_BRIDGE
, TOUUD
, reg
);
450 reg
= pci_read_config32(HOST_BRIDGE
, TOUUD
+ 4);
451 val
= touudbase
& 0xfffff000;
452 reg
= (reg
& ~0x000fffff) | (val
>> 12);
453 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", TOUUD
+ 4, reg
);
454 pci_write_config32(HOST_BRIDGE
, TOUUD
+ 4, reg
);
458 pci_write_config32(HOST_BRIDGE
, REMAPBASE
, remapbase
<< 20);
459 pci_write_config32(HOST_BRIDGE
, REMAPBASE
+ 4, remapbase
>> 12);
462 pci_write_config32(HOST_BRIDGE
, REMAPLIMIT
, remaplimit
<< 20);
463 pci_write_config32(HOST_BRIDGE
, REMAPLIMIT
+ 4, remaplimit
>> 12);
466 reg
= pci_read_config32(HOST_BRIDGE
, TSEGMB
);
467 val
= tsegbase
& 0xfff;
468 reg
= (reg
& ~0xfff00000) | (val
<< 20);
469 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", TSEGMB
, reg
);
470 pci_write_config32(HOST_BRIDGE
, TSEGMB
, reg
);
472 /* GFX stolen memory */
473 reg
= pci_read_config32(HOST_BRIDGE
, BDSM
);
474 val
= gfxstolenbase
& 0xfff;
475 reg
= (reg
& ~0xfff00000) | (val
<< 20);
476 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", BDSM
, reg
);
477 pci_write_config32(HOST_BRIDGE
, BDSM
, reg
);
479 /* GTT stolen memory */
480 reg
= pci_read_config32(HOST_BRIDGE
, BGSM
);
481 val
= gttbase
& 0xfff;
482 reg
= (reg
& ~0xfff00000) | (val
<< 20);
483 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", BGSM
, reg
);
484 pci_write_config32(HOST_BRIDGE
, BGSM
, reg
);
487 reg
= pci_read_config32(HOST_BRIDGE
, MESEG_MASK
+ 4);
488 val
= (0x80000 - me_uma_size
) & 0xfffff000;
489 reg
= (reg
& ~0x000fffff) | (val
>> 12);
490 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", MESEG_MASK
+ 4, reg
);
491 pci_write_config32(HOST_BRIDGE
, MESEG_MASK
+ 4, reg
);
494 reg
= pci_read_config32(HOST_BRIDGE
, MESEG_BASE
);
495 val
= mestolenbase
& 0xfff;
496 reg
= (reg
& ~0xfff00000) | (val
<< 20);
497 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", MESEG_BASE
, reg
);
498 pci_write_config32(HOST_BRIDGE
, MESEG_BASE
, reg
);
500 reg
= pci_read_config32(HOST_BRIDGE
, MESEG_BASE
+ 4);
501 val
= mestolenbase
& 0xfffff000;
502 reg
= (reg
& ~0x000fffff) | (val
>> 12);
503 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", MESEG_BASE
+ 4, reg
);
504 pci_write_config32(HOST_BRIDGE
, MESEG_BASE
+ 4, reg
);
507 reg
= pci_read_config32(HOST_BRIDGE
, MESEG_MASK
);
508 val
= (0x80000 - me_uma_size
) & 0xfff;
509 reg
= (reg
& ~0xfff00000) | (val
<< 20);
510 reg
= reg
| ME_STLEN_EN
; /* Set ME memory enable */
511 reg
= reg
| MELCK
; /* Set lock bit on ME mem */
512 printk(BIOS_DEBUG
, "PCI(0, 0, 0)[%x] = %x\n", MESEG_MASK
, reg
);
513 pci_write_config32(HOST_BRIDGE
, MESEG_MASK
, reg
);
517 static void write_reset(ramctr_timing
*ctrl
)
519 int channel
, slotrank
;
521 /* Choose a populated channel */
522 channel
= (ctrl
->rankmap
[0]) ? 0 : 1;
524 wait_for_iosav(channel
);
526 /* Choose a populated rank */
527 slotrank
= (ctrl
->rankmap
[channel
] & 1) ? 0 : 2;
529 iosav_write_zqcs_sequence(channel
, slotrank
, 3, 8, 0);
531 /* This is actually using the IOSAV state machine as a timer */
532 iosav_run_queue(channel
, 1, 1);
534 wait_for_iosav(channel
);
537 void dram_jedecreset(ramctr_timing
*ctrl
)
542 while (!(mchbar_read32(RCOMP_TIMER
) & (1 << 16)))
545 reg
= mchbar_read32(IOSAV_STATUS_ch(0));
546 } while ((reg
& 0x14) == 0);
548 /* Set state of memory controller */
550 mchbar_write32(MC_INIT_STATE_G
, reg
);
551 mchbar_write32(MC_INIT_STATE
, 0);
552 reg
|= 2; /* DDR reset */
553 mchbar_write32(MC_INIT_STATE_G
, reg
);
555 /* Assert DIMM reset signal */
556 mchbar_clrbits32(MC_INIT_STATE_G
, 1 << 1);
561 /* Deassert DIMM reset signal */
562 mchbar_setbits32(MC_INIT_STATE_G
, 1 << 1);
568 mchbar_setbits32(MC_INIT_STATE_G
, 1 << 2);
574 /* Set valid rank CKE */
575 reg
= ctrl
->rankmap
[channel
];
576 mchbar_write32(MC_INIT_STATE_ch(channel
), reg
);
578 /* Wait 10ns for ranks to settle */
581 reg
= (reg
& ~0xf0) | (ctrl
->rankmap
[channel
] << 4);
582 mchbar_write32(MC_INIT_STATE_ch(channel
), reg
);
584 /* Write reset using a NOP */
590 * DDR3 Rank1 Address mirror swap the following pins:
591 * A3<->A4, A5<->A6, A7<->A8, BA0<->BA1
593 static void ddr3_mirror_mrreg(int *bank
, u32
*addr
)
595 *bank
= ((*bank
>> 1) & 1) | ((*bank
<< 1) & 2);
596 *addr
= (*addr
& ~0x1f8) | ((*addr
>> 1) & 0xa8) | ((*addr
& 0xa8) << 1);
599 static void write_mrreg(ramctr_timing
*ctrl
, int channel
, int slotrank
, int reg
, u32 val
)
601 wait_for_iosav(channel
);
603 if (ctrl
->rank_mirror
[channel
][slotrank
])
604 ddr3_mirror_mrreg(®
, &val
);
606 const struct iosav_ssq sequence
[] = {
607 /* DRAM command MRS */
610 .command
= IOSAV_MRS
,
616 .data_direction
= SSQ_NA
,
625 /* DRAM command MRS */
628 .command
= IOSAV_MRS
,
635 .data_direction
= SSQ_NA
,
644 /* DRAM command MRS */
647 .command
= IOSAV_MRS
,
652 .post_ssq_wait
= ctrl
->tMOD
,
653 .data_direction
= SSQ_NA
,
663 iosav_write_sequence(channel
, sequence
, ARRAY_SIZE(sequence
));
665 iosav_run_once_and_wait(channel
);
668 /* Obtain optimal power down mode for current configuration */
669 static enum power_down_mode
get_power_down_mode(ramctr_timing
*ctrl
, int channel
)
676 if (ctrl
->tXPDLL
> 32)
679 FOR_ALL_POPULATED_RANKS
680 if (!ctrl
->info
.dimm
[channel
][slotrank
>> 1].flags
.dll_off_mode
)
683 if (CONFIG(RAMINIT_ALWAYS_ALLOW_DLL_OFF
) || get_platform_type() == PLATFORM_MOBILE
)
689 static u32
make_mr0(ramctr_timing
*ctrl
, int channel
, u8 rank
)
691 u16 mr0reg
, mch_cas
, mch_wr
;
692 static const u8 mch_wr_t
[12] = { 1, 2, 3, 4, 0, 5, 0, 6, 0, 7, 0, 0 };
694 const enum power_down_mode power_down
= get_power_down_mode(ctrl
, channel
);
696 const bool slow_exit
= power_down
== PDM_DLL_OFF
|| power_down
== PDM_APD_DLL_OFF
;
698 /* Convert CAS to MCH register friendly */
699 if (ctrl
->CAS
< 12) {
700 mch_cas
= (u16
)((ctrl
->CAS
- 4) << 1);
702 mch_cas
= (u16
)(ctrl
->CAS
- 12);
703 mch_cas
= ((mch_cas
<< 1) | 0x1);
706 /* Convert tWR to MCH register friendly */
707 mch_wr
= mch_wr_t
[ctrl
->tWR
- 5];
709 /* DLL Reset - self clearing - set after CLK frequency has been changed */
712 mr0reg
|= (mch_cas
& 0x1) << 2;
713 mr0reg
|= (mch_cas
& 0xe) << 3;
714 mr0reg
|= mch_wr
<< 9;
716 /* Precharge PD - Use slow exit when DLL-off is used - mostly power-saving feature */
717 mr0reg
|= !slow_exit
<< 12;
721 static void dram_mr0(ramctr_timing
*ctrl
, u8 rank
, int channel
)
723 write_mrreg(ctrl
, channel
, rank
, 0, make_mr0(ctrl
, channel
, rank
));
726 static odtmap
get_ODT(ramctr_timing
*ctrl
, int channel
)
728 /* Get ODT based on rankmap */
729 int dimms_per_ch
= (ctrl
->rankmap
[channel
] & 1) + ((ctrl
->rankmap
[channel
] >> 2) & 1);
731 if (dimms_per_ch
== 1) {
732 return (const odtmap
){60, 60};
734 return (const odtmap
){120, 30};
738 static u32
encode_odt(u32 odt
)
742 return (1 << 9) | (1 << 2); /* RZQ/8, RZQ/4 */
744 return (1 << 2); /* RZQ/4 */
746 return (1 << 6); /* RZQ/2 */
753 static u32
make_mr1(ramctr_timing
*ctrl
, u8 rank
, int channel
)
758 odt
= get_ODT(ctrl
, channel
);
761 mr1reg
|= encode_odt(odt
.rttnom
);
766 static void dram_mr1(ramctr_timing
*ctrl
, u8 rank
, int channel
)
770 mr1reg
= make_mr1(ctrl
, rank
, channel
);
772 write_mrreg(ctrl
, channel
, rank
, 1, mr1reg
);
775 static void dram_mr2(ramctr_timing
*ctrl
, u8 rank
, int channel
)
778 const u16 cwl
= ctrl
->CWL
- 5;
779 const odtmap odt
= get_ODT(ctrl
, channel
);
782 if (IS_IVY_CPU(ctrl
->cpu
) && ctrl
->tCK
>= TCK_1066MHZ
)
783 srt
= ctrl
->extended_temperature_range
&& !ctrl
->auto_self_refresh
;
788 mr2reg
|= ctrl
->auto_self_refresh
<< 6;
790 mr2reg
|= (odt
.rttwr
/ 60) << 9;
792 write_mrreg(ctrl
, channel
, rank
, 2, mr2reg
);
794 /* Program MR2 shadow */
795 u32 reg32
= mchbar_read32(TC_MR2_SHADOW_ch(channel
));
797 reg32
&= 3 << 14 | 3 << 6;
799 reg32
|= mr2reg
& ~(3 << 6);
802 reg32
|= 1 << (rank
/ 2 + 6);
804 if (ctrl
->rank_mirror
[channel
][rank
])
805 reg32
|= 1 << (rank
/ 2 + 14);
807 mchbar_write32(TC_MR2_SHADOW_ch(channel
), reg32
);
810 static void dram_mr3(ramctr_timing
*ctrl
, u8 rank
, int channel
)
812 write_mrreg(ctrl
, channel
, rank
, 3, 0);
815 void dram_mrscommands(ramctr_timing
*ctrl
)
820 FOR_ALL_POPULATED_CHANNELS
{
821 FOR_ALL_POPULATED_RANKS
{
823 dram_mr2(ctrl
, slotrank
, channel
);
826 dram_mr3(ctrl
, slotrank
, channel
);
829 dram_mr1(ctrl
, slotrank
, channel
);
832 dram_mr0(ctrl
, slotrank
, channel
);
836 const struct iosav_ssq zqcl_sequence
[] = {
837 /* DRAM command NOP (without ODT nor chip selects) */
840 .command
= IOSAV_NOP
& ~(0xff << 8),
846 .data_direction
= SSQ_NA
,
855 /* DRAM command ZQCL */
858 .command
= IOSAV_ZQCS
,
864 .post_ssq_wait
= 400,
865 .data_direction
= SSQ_NA
,
879 iosav_write_sequence(BROADCAST_CH
, zqcl_sequence
, ARRAY_SIZE(zqcl_sequence
));
881 iosav_run_queue(BROADCAST_CH
, 4, 0);
884 wait_for_iosav(channel
);
888 mchbar_setbits32(MC_INIT_STATE_G
, 1 << 3);
890 FOR_ALL_POPULATED_CHANNELS
{
891 mchbar_clrbits32(SCHED_CBIT_ch(channel
), 1 << 21);
893 wait_for_iosav(channel
);
895 slotrank
= (ctrl
->rankmap
[channel
] & 1) ? 0 : 2;
897 wait_for_iosav(channel
);
899 iosav_write_zqcs_sequence(channel
, slotrank
, 4, 101, 31);
901 iosav_run_once_and_wait(channel
);
905 static const u32 lane_base
[] = {
906 LANEBASE_B0
, LANEBASE_B1
, LANEBASE_B2
, LANEBASE_B3
,
907 LANEBASE_B4
, LANEBASE_B5
, LANEBASE_B6
, LANEBASE_B7
,
911 /* Maximum delay for command, control, clock */
912 #define CCC_MAX_PI (2 * QCLK_PI - 1)
914 void program_timings(ramctr_timing
*ctrl
, int channel
)
916 u32 reg_roundtrip_latency
, reg_io_latency
;
920 u32 ctl_delay
[NUM_SLOTS
] = { 0 };
923 /* Enable CLK XOVER */
924 u32 clk_pi_coding
= get_XOVER_CLK(ctrl
->rankmap
[channel
]);
925 u32 clk_logic_dly
= 0;
928 * Compute command timing as abs() of the most negative PI code
929 * across all ranks. Use zero if none of the values is negative.
931 FOR_ALL_POPULATED_RANKS
{
932 cmd_delay
= MAX(cmd_delay
, -ctrl
->timings
[channel
][slotrank
].pi_coding
);
934 if (cmd_delay
> CCC_MAX_PI
) {
935 printk(BIOS_ERR
, "C%d command delay overflow: %d\n", channel
, cmd_delay
);
936 cmd_delay
= CCC_MAX_PI
;
939 for (slot
= 0; slot
< NUM_SLOTS
; slot
++) {
940 const int pi_coding_0
= ctrl
->timings
[channel
][2 * slot
+ 0].pi_coding
;
941 const int pi_coding_1
= ctrl
->timings
[channel
][2 * slot
+ 1].pi_coding
;
943 const u8 slot_map
= (ctrl
->rankmap
[channel
] >> (2 * slot
)) & 3;
946 ctl_delay
[slot
] += pi_coding_0
+ cmd_delay
;
949 ctl_delay
[slot
] += pi_coding_1
+ cmd_delay
;
951 /* If both ranks in a slot are populated, use the average */
953 ctl_delay
[slot
] /= 2;
955 if (ctl_delay
[slot
] > CCC_MAX_PI
) {
956 printk(BIOS_ERR
, "C%dS%d control delay overflow: %d\n",
957 channel
, slot
, ctl_delay
[slot
]);
958 ctl_delay
[slot
] = CCC_MAX_PI
;
961 FOR_ALL_POPULATED_RANKS
{
962 int clk_delay
= ctrl
->timings
[channel
][slotrank
].pi_coding
+ cmd_delay
;
965 * Clock is a differential signal, whereas command and control are not.
966 * This affects its timing, and it is also why it needs a magic offset.
968 clk_delay
+= ctrl
->pi_code_offset
;
970 /* Can never happen with valid values */
972 printk(BIOS_ERR
, "C%dR%d clock delay underflow: %d\n",
973 channel
, slotrank
, clk_delay
);
977 /* Clock can safely wrap around because it is a periodic signal */
978 clk_delay
%= CCC_MAX_PI
+ 1;
980 clk_pi_coding
|= (clk_delay
% QCLK_PI
) << (6 * slotrank
);
981 clk_logic_dly
|= (clk_delay
/ QCLK_PI
) << slotrank
;
984 /* Enable CMD XOVER */
985 union gdcr_cmd_pi_coding_reg cmd_pi_coding
= {
986 .raw
= get_XOVER_CMD(ctrl
->rankmap
[channel
]),
988 cmd_pi_coding
.cmd_pi_code
= cmd_delay
% QCLK_PI
;
989 cmd_pi_coding
.cmd_logic_delay
= cmd_delay
/ QCLK_PI
;
991 cmd_pi_coding
.ctl_pi_code_d0
= ctl_delay
[0] % QCLK_PI
;
992 cmd_pi_coding
.ctl_pi_code_d1
= ctl_delay
[1] % QCLK_PI
;
993 cmd_pi_coding
.ctl_logic_delay_d0
= ctl_delay
[0] / QCLK_PI
;
994 cmd_pi_coding
.ctl_logic_delay_d1
= ctl_delay
[1] / QCLK_PI
;
996 mchbar_write32(GDCRCMDPICODING_ch(channel
), cmd_pi_coding
.raw
);
998 mchbar_write32(GDCRCKPICODE_ch(channel
), clk_pi_coding
);
999 mchbar_write32(GDCRCKLOGICDELAY_ch(channel
), clk_logic_dly
);
1001 reg_io_latency
= mchbar_read32(SC_IO_LATENCY_ch(channel
));
1002 reg_io_latency
&= ~0xffff;
1004 reg_roundtrip_latency
= 0;
1006 FOR_ALL_POPULATED_RANKS
{
1007 reg_io_latency
|= ctrl
->timings
[channel
][slotrank
].io_latency
<< (4 * slotrank
);
1009 reg_roundtrip_latency
|=
1010 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
<< (8 * slotrank
);
1013 const u16 rcven
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
;
1014 const u8 dqs_p
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_p
;
1015 const u8 dqs_n
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_n
;
1016 const union gdcr_rx_reg gdcr_rx
= {
1017 .rcven_pi_code
= rcven
% QCLK_PI
,
1018 .rx_dqs_p_pi_code
= dqs_p
,
1019 .rcven_logic_delay
= rcven
/ QCLK_PI
,
1020 .rx_dqs_n_pi_code
= dqs_n
,
1022 mchbar_write32(lane_base
[lane
] + GDCRRX(channel
, slotrank
),
1025 const u16 tx_dqs
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dqs
;
1026 const int tx_dq
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dq
;
1027 const union gdcr_tx_reg gdcr_tx
= {
1028 .tx_dq_pi_code
= tx_dq
% QCLK_PI
,
1029 .tx_dqs_pi_code
= tx_dqs
% QCLK_PI
,
1030 .tx_dqs_logic_delay
= tx_dqs
/ QCLK_PI
,
1031 .tx_dq_logic_delay
= tx_dq
/ QCLK_PI
,
1033 mchbar_write32(lane_base
[lane
] + GDCRTX(channel
, slotrank
),
1037 mchbar_write32(SC_ROUNDT_LAT_ch(channel
), reg_roundtrip_latency
);
1038 mchbar_write32(SC_IO_LATENCY_ch(channel
), reg_io_latency
);
1041 static void test_rcven(ramctr_timing
*ctrl
, int channel
, int slotrank
)
1043 wait_for_iosav(channel
);
1045 /* Send a burst of 16 back-to-back read commands (4 DCLK apart) */
1046 iosav_write_read_mpr_sequence(channel
, slotrank
, ctrl
->tMOD
, 1, 3, 15, ctrl
->CAS
+ 36);
1048 iosav_run_once_and_wait(channel
);
1051 static int does_lane_work(ramctr_timing
*ctrl
, int channel
, int slotrank
, int lane
)
1053 u32 rcven
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
;
1055 return (mchbar_read32(lane_base
[lane
] +
1056 GDCRTRAININGRESULT(channel
, (rcven
/ 32) & 1)) >> (rcven
% 32)) & 1;
1067 static struct run
get_longest_zero_run(int *seq
, int sz
)
1074 for (i
= 0; i
< 2 * sz
; i
++)
1083 ret
.middle
= sz
/ 2;
1091 ret
.start
= bs
% sz
;
1092 ret
.end
= (bs
+ bl
- 1) % sz
;
1093 ret
.middle
= (bs
+ (bl
- 1) / 2) % sz
;
1100 #define RCVEN_COARSE_PI_LENGTH (2 * QCLK_PI)
1102 static void find_rcven_pi_coarse(ramctr_timing
*ctrl
, int channel
, int slotrank
, int *upperA
)
1105 int statistics
[NUM_LANES
][RCVEN_COARSE_PI_LENGTH
];
1108 for (rcven
= 0; rcven
< RCVEN_COARSE_PI_LENGTH
; rcven
++) {
1110 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
= rcven
;
1112 program_timings(ctrl
, channel
);
1114 test_rcven(ctrl
, channel
, slotrank
);
1117 statistics
[lane
][rcven
] =
1118 !does_lane_work(ctrl
, channel
, slotrank
, lane
);
1122 struct run rn
= get_longest_zero_run(statistics
[lane
], RCVEN_COARSE_PI_LENGTH
);
1123 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
= rn
.middle
;
1124 upperA
[lane
] = rn
.end
;
1125 if (upperA
[lane
] < rn
.middle
)
1126 upperA
[lane
] += 2 * QCLK_PI
;
1128 printram("rcven: %d, %d, %d: % 4d-% 4d-% 4d\n",
1129 channel
, slotrank
, lane
, rn
.start
, rn
.middle
, rn
.end
);
1133 static void fine_tune_rcven_pi(ramctr_timing
*ctrl
, int channel
, int slotrank
, int *upperA
)
1136 int statistics
[NUM_LANES
][51] = {0};
1139 for (rcven_delta
= -25; rcven_delta
<= 25; rcven_delta
++) {
1141 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
1142 = upperA
[lane
] + rcven_delta
+ QCLK_PI
;
1144 program_timings(ctrl
, channel
);
1146 for (i
= 0; i
< 100; i
++) {
1147 test_rcven(ctrl
, channel
, slotrank
);
1149 statistics
[lane
][rcven_delta
+ 25] +=
1150 does_lane_work(ctrl
, channel
, slotrank
, lane
);
1155 int last_zero
, first_all
;
1157 for (last_zero
= -25; last_zero
<= 25; last_zero
++)
1158 if (statistics
[lane
][last_zero
+ 25])
1162 for (first_all
= -25; first_all
<= 25; first_all
++)
1163 if (statistics
[lane
][first_all
+ 25] == 100)
1166 printram("lane %d: %d, %d\n", lane
, last_zero
, first_all
);
1168 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
=
1169 (last_zero
+ first_all
) / 2 + upperA
[lane
];
1171 printram("Aval: %d, %d, %d: % 4d\n", channel
, slotrank
,
1172 lane
, ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
);
1177 * Once the DQS high phase has been found (for each DRAM) the next stage
1178 * is to find out the round trip latency, by locating the preamble cycle.
1179 * This is achieved by trying smaller and smaller roundtrip values until
1180 * the strobe sampling is done on the preamble cycle.
1182 static int find_roundtrip_latency(ramctr_timing
*ctrl
, int channel
, int slotrank
, int *upperA
)
1184 int works
[NUM_LANES
];
1188 int all_works
= 1, some_works
= 0;
1190 program_timings(ctrl
, channel
);
1191 test_rcven(ctrl
, channel
, slotrank
);
1194 works
[lane
] = !does_lane_work(ctrl
, channel
, slotrank
, lane
);
1202 /* If every lane is working, exit */
1207 * If all bits are one (everyone is failing), decrement
1208 * the roundtrip value by two, and do another iteration.
1211 /* Guard against roundtrip latency underflow */
1212 if (ctrl
->timings
[channel
][slotrank
].roundtrip_latency
< 2) {
1213 printk(BIOS_EMERG
, "Roundtrip latency underflow: %d, %d\n",
1217 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
-= 2;
1218 printram("4024 -= 2;\n");
1223 * Else (if some lanes are failing), increase the rank's
1224 * I/O latency by 2, and increase rcven logic delay by 2
1225 * on the working lanes, then perform another iteration.
1227 ctrl
->timings
[channel
][slotrank
].io_latency
+= 2;
1228 printram("4028 += 2;\n");
1230 /* Guard against I/O latency overflow */
1231 if (ctrl
->timings
[channel
][slotrank
].io_latency
>= 16) {
1232 printk(BIOS_EMERG
, "I/O latency overflow: %d, %d\n",
1236 FOR_ALL_LANES
if (works
[lane
]) {
1237 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
+= 2 * QCLK_PI
;
1238 upperA
[lane
] += 2 * QCLK_PI
;
1239 printram("increment %d, %d, %d\n", channel
, slotrank
, lane
);
1245 static int get_logic_delay_delta(ramctr_timing
*ctrl
, int channel
, int slotrank
)
1248 u16 logic_delay_min
= 7;
1249 u16 logic_delay_max
= 0;
1252 const u16 logic_delay
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
>> 6;
1254 logic_delay_min
= MIN(logic_delay_min
, logic_delay
);
1255 logic_delay_max
= MAX(logic_delay_max
, logic_delay
);
1258 if (logic_delay_max
< logic_delay_min
) {
1259 printk(BIOS_EMERG
, "Logic delay max < min (%u < %u): %d, %d\n",
1260 logic_delay_max
, logic_delay_min
, channel
, slotrank
);
1263 assert(logic_delay_max
>= logic_delay_min
);
1265 return logic_delay_max
- logic_delay_min
;
1268 static int align_rt_io_latency(ramctr_timing
*ctrl
, int channel
, int slotrank
, int prev
)
1270 int latency_offset
= 0;
1272 /* Get changed maxima */
1273 const int post
= get_logic_delay_delta(ctrl
, channel
, slotrank
);
1276 latency_offset
= +1;
1278 else if (prev
> post
)
1279 latency_offset
= -1;
1284 ctrl
->timings
[channel
][slotrank
].io_latency
+= latency_offset
;
1285 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
+= latency_offset
;
1286 printram("4024 += %d;\n", latency_offset
);
1287 printram("4028 += %d;\n", latency_offset
);
1292 static void compute_final_logic_delay(ramctr_timing
*ctrl
, int channel
, int slotrank
)
1294 u16 logic_delay_min
= 7;
1298 const u16 logic_delay
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
>> 6;
1300 logic_delay_min
= MIN(logic_delay_min
, logic_delay
);
1303 if (logic_delay_min
>= 2) {
1304 printk(BIOS_WARNING
, "Logic delay %u greater than 1: %d %d\n",
1305 logic_delay_min
, channel
, slotrank
);
1309 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
-= logic_delay_min
<< 6;
1311 ctrl
->timings
[channel
][slotrank
].io_latency
-= logic_delay_min
;
1312 printram("4028 -= %d;\n", logic_delay_min
);
1315 int receive_enable_calibration(ramctr_timing
*ctrl
)
1317 int channel
, slotrank
, lane
;
1320 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
1321 int all_high
, some_high
;
1322 int upperA
[NUM_LANES
];
1325 wait_for_iosav(channel
);
1327 iosav_write_prea_sequence(channel
, slotrank
, ctrl
->tRP
, 0);
1329 iosav_run_once_and_wait(channel
);
1331 const union gdcr_training_mod_reg training_mod
= {
1332 .receive_enable_mode
= 1,
1333 .training_rank_sel
= slotrank
,
1336 mchbar_write32(GDCRTRAININGMOD
, training_mod
.raw
);
1338 ctrl
->timings
[channel
][slotrank
].io_latency
= 4;
1339 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
= 55;
1340 program_timings(ctrl
, channel
);
1342 find_rcven_pi_coarse(ctrl
, channel
, slotrank
, upperA
);
1347 if (ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
>= QCLK_PI
)
1354 ctrl
->timings
[channel
][slotrank
].io_latency
--;
1355 printram("4028--;\n");
1357 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
-= QCLK_PI
;
1358 upperA
[lane
] -= QCLK_PI
;
1360 } else if (some_high
) {
1361 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
++;
1362 ctrl
->timings
[channel
][slotrank
].io_latency
++;
1363 printram("4024++;\n");
1364 printram("4028++;\n");
1367 program_timings(ctrl
, channel
);
1369 prev
= get_logic_delay_delta(ctrl
, channel
, slotrank
);
1371 err
= find_roundtrip_latency(ctrl
, channel
, slotrank
, upperA
);
1375 prev
= align_rt_io_latency(ctrl
, channel
, slotrank
, prev
);
1377 fine_tune_rcven_pi(ctrl
, channel
, slotrank
, upperA
);
1379 prev
= align_rt_io_latency(ctrl
, channel
, slotrank
, prev
);
1381 compute_final_logic_delay(ctrl
, channel
, slotrank
);
1383 align_rt_io_latency(ctrl
, channel
, slotrank
, prev
);
1385 printram("4/8: %d, %d, % 4d, % 4d\n", channel
, slotrank
,
1386 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
,
1387 ctrl
->timings
[channel
][slotrank
].io_latency
);
1389 printram("final results:\n");
1391 printram("Aval: %d, %d, %d: % 4d\n", channel
, slotrank
, lane
,
1392 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
);
1394 mchbar_write32(GDCRTRAININGMOD
, 0);
1399 FOR_ALL_POPULATED_CHANNELS
{
1400 program_timings(ctrl
, channel
);
1406 static void test_tx_dq(ramctr_timing
*ctrl
, int channel
, int slotrank
)
1411 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel
, lane
), 0);
1412 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel
, lane
));
1415 wait_for_iosav(channel
);
1417 iosav_write_misc_write_sequence(ctrl
, channel
, slotrank
,
1418 MAX(ctrl
->tRRD
, (ctrl
->tFAW
>> 2) + 1), 4, 4, 500, 18);
1420 iosav_run_once_and_wait(channel
);
1422 iosav_write_prea_act_read_sequence(ctrl
, channel
, slotrank
);
1424 iosav_run_once_and_wait(channel
);
1427 static void tx_dq_threshold_process(int *data
, const int count
)
1432 for (i
= 1; i
< count
; i
++) {
1439 int threshold
= min
/ 2 + max
/ 2;
1440 for (i
= 0; i
< count
; i
++)
1441 data
[i
] = data
[i
] > threshold
;
1443 printram("threshold=%d min=%d max=%d\n", threshold
, min
, max
);
1446 static int tx_dq_write_leveling(ramctr_timing
*ctrl
, int channel
, int slotrank
)
1449 int stats
[NUM_LANES
][MAX_TX_DQ
+ 1];
1452 wait_for_iosav(channel
);
1454 iosav_write_prea_sequence(channel
, slotrank
, ctrl
->tRP
, 18);
1456 iosav_run_once_and_wait(channel
);
1458 for (tx_dq
= 0; tx_dq
<= MAX_TX_DQ
; tx_dq
++) {
1459 FOR_ALL_LANES ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dq
= tx_dq
;
1460 program_timings(ctrl
, channel
);
1462 test_tx_dq(ctrl
, channel
, slotrank
);
1465 stats
[lane
][tx_dq
] = mchbar_read32(
1466 IOSAV_By_ERROR_COUNT_ch(channel
, lane
));
1470 struct run rn
= get_longest_zero_run(stats
[lane
], ARRAY_SIZE(stats
[lane
]));
1472 if (rn
.all
|| rn
.length
< 8) {
1473 printk(BIOS_EMERG
, "tx_dq write leveling failed: %d, %d, %d\n",
1474 channel
, slotrank
, lane
);
1476 * With command training not being done yet, the lane can be erroneous.
1477 * Take the average as reference and try again to find a run.
1479 tx_dq_threshold_process(stats
[lane
], ARRAY_SIZE(stats
[lane
]));
1480 rn
= get_longest_zero_run(stats
[lane
], ARRAY_SIZE(stats
[lane
]));
1482 if (rn
.all
|| rn
.length
< 8) {
1483 printk(BIOS_EMERG
, "tx_dq recovery failed\n");
1487 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dq
= rn
.middle
;
1488 printram("tx_dq: %d, %d, %d: % 4d-% 4d-% 4d\n",
1489 channel
, slotrank
, lane
, rn
.start
, rn
.middle
, rn
.end
);
1494 static int get_precedening_channels(ramctr_timing
*ctrl
, int target_channel
)
1496 int channel
, ret
= 0;
1498 FOR_ALL_POPULATED_CHANNELS
if (channel
< target_channel
)
1504 /* Each cacheline is 64 bits long */
1505 static void program_wdb_pattern_length(int channel
, const unsigned int num_cachelines
)
1507 mchbar_write8(IOSAV_DATA_CTL_ch(channel
), num_cachelines
/ 8 - 1);
1510 static void fill_pattern0(ramctr_timing
*ctrl
, int channel
, u32 a
, u32 b
)
1513 unsigned int channel_offset
= get_precedening_channels(ctrl
, channel
) * 64;
1516 for (j
= 0; j
< 16; j
++) {
1517 addr
= 0x04000000 + channel_offset
+ 4 * j
;
1518 write32p(addr
, j
& 2 ? b
: a
);
1523 program_wdb_pattern_length(channel
, 8);
1526 static int num_of_channels(const ramctr_timing
*ctrl
)
1530 FOR_ALL_POPULATED_CHANNELS ret
++;
1534 static void fill_pattern1(ramctr_timing
*ctrl
, int channel
)
1537 unsigned int channel_offset
= get_precedening_channels(ctrl
, channel
) * 64;
1538 unsigned int channel_step
= 64 * num_of_channels(ctrl
);
1541 for (j
= 0; j
< 16; j
++) {
1542 addr
= 0x04000000 + channel_offset
+ j
* 4;
1543 write32p(addr
, 0xffffffff);
1545 for (j
= 0; j
< 16; j
++) {
1546 addr
= 0x04000000 + channel_offset
+ channel_step
+ j
* 4;
1551 program_wdb_pattern_length(channel
, 16);
1554 #define TX_DQS_PI_LENGTH (2 * QCLK_PI)
1556 static int write_level_rank(ramctr_timing
*ctrl
, int channel
, int slotrank
)
1559 int statistics
[NUM_LANES
][TX_DQS_PI_LENGTH
];
1562 const union gdcr_training_mod_reg training_mod
= {
1563 .write_leveling_mode
= 1,
1564 .training_rank_sel
= slotrank
,
1567 .force_drive_enable
= 1,
1569 mchbar_write32(GDCRTRAININGMOD
, training_mod
.raw
);
1571 u32 mr1reg
= make_mr1(ctrl
, slotrank
, channel
) | 1 << 7;
1574 if (ctrl
->rank_mirror
[channel
][slotrank
])
1575 ddr3_mirror_mrreg(&bank
, &mr1reg
);
1577 wait_for_iosav(channel
);
1579 iosav_write_jedec_write_leveling_sequence(ctrl
, channel
, slotrank
, bank
, mr1reg
);
1581 for (tx_dqs
= 0; tx_dqs
< TX_DQS_PI_LENGTH
; tx_dqs
++) {
1583 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dqs
= tx_dqs
;
1585 program_timings(ctrl
, channel
);
1587 iosav_run_once_and_wait(channel
);
1590 statistics
[lane
][tx_dqs
] = !((mchbar_read32(lane_base
[lane
] +
1591 GDCRTRAININGRESULT(channel
, (tx_dqs
/ 32) & 1)) >>
1592 (tx_dqs
% 32)) & 1);
1596 struct run rn
= get_longest_zero_run(statistics
[lane
], TX_DQS_PI_LENGTH
);
1598 * tx_dq is a direct function of tx_dqs's 6 LSBs. Some tests increment the value
1599 * of tx_dqs by a small value, which might cause the 6-bit value to overflow if
1600 * it's close to 0x3f. Increment the value by a small offset if it's likely
1601 * to overflow, to make sure it won't overflow while running tests and bricks
1602 * the system due to a non matching tx_dq.
1604 * TODO: find out why some tests (edge write discovery) increment tx_dqs.
1606 if ((rn
.start
& 0x3f) == 0x3e)
1608 else if ((rn
.start
& 0x3f) == 0x3f)
1611 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dqs
= rn
.start
;
1613 printk(BIOS_EMERG
, "JEDEC write leveling failed: %d, %d, %d\n",
1614 channel
, slotrank
, lane
);
1618 printram("tx_dqs: %d, %d, %d: % 4d-% 4d-% 4d\n",
1619 channel
, slotrank
, lane
, rn
.start
, rn
.middle
, rn
.end
);
1624 static int get_dqs_flyby_adjust(u64 val
)
1627 /* DQS is good enough */
1628 if (val
== 0xffffffffffffffffLL
)
1630 if (val
>= 0xf000000000000000LL
) {
1631 /* DQS is late, needs negative adjustment */
1632 for (i
= 0; i
< 8; i
++)
1633 if (val
<< (8 * (7 - i
) + 4))
1636 /* DQS is early, needs positive adjustment */
1637 for (i
= 0; i
< 8; i
++)
1638 if (val
>> (8 * (7 - i
) + 4))
1644 static void train_write_flyby(ramctr_timing
*ctrl
)
1646 int channel
, slotrank
, lane
, old
;
1648 const union gdcr_training_mod_reg training_mod
= {
1649 .dq_dqs_training_res
= 1,
1651 mchbar_write32(GDCRTRAININGMOD
, training_mod
.raw
);
1653 FOR_ALL_POPULATED_CHANNELS
{
1654 fill_pattern1(ctrl
, channel
);
1656 FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS
{
1657 /* Reset read and write WDB pointers */
1658 mchbar_write32(IOSAV_DATA_CTL_ch(channel
), 0x10001);
1660 wait_for_iosav(channel
);
1662 iosav_write_misc_write_sequence(ctrl
, channel
, slotrank
, 3, 1, 3, 3, 31);
1664 iosav_run_once_and_wait(channel
);
1666 const struct iosav_ssq rd_sequence
[] = {
1667 /* DRAM command PREA */
1670 .command
= IOSAV_PRE
,
1674 .cmd_executions
= 1,
1676 .post_ssq_wait
= ctrl
->tRP
,
1677 .data_direction
= SSQ_NA
,
1689 /* DRAM command ACT */
1692 .command
= IOSAV_ACT
,
1696 .cmd_executions
= 1,
1698 .post_ssq_wait
= ctrl
->tRCD
,
1699 .data_direction
= SSQ_NA
,
1708 /* DRAM command RDA */
1711 .command
= IOSAV_RD
,
1715 .cmd_executions
= 1,
1717 .post_ssq_wait
= ctrl
->tRP
+
1718 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
+
1719 ctrl
->timings
[channel
][slotrank
].io_latency
,
1720 .data_direction
= SSQ_RD
,
1730 iosav_write_sequence(channel
, rd_sequence
, ARRAY_SIZE(rd_sequence
));
1732 iosav_run_once_and_wait(channel
);
1735 u64 res
= mchbar_read32(lane_base
[lane
] + GDCRTRAININGRESULT1(channel
));
1736 res
|= ((u64
)mchbar_read32(lane_base
[lane
] +
1737 GDCRTRAININGRESULT2(channel
))) << 32;
1739 old
= ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dqs
;
1740 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dqs
+=
1741 get_dqs_flyby_adjust(res
) * QCLK_PI
;
1743 printram("High adjust %d:%016llx\n", lane
, res
);
1744 printram("Bval+: %d, %d, %d, % 4d -> % 4d\n", channel
, slotrank
, lane
,
1745 old
, ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dqs
);
1748 mchbar_write32(GDCRTRAININGMOD
, 0);
1751 static void disable_refresh_machine(ramctr_timing
*ctrl
)
1755 FOR_ALL_POPULATED_CHANNELS
{
1756 /* choose an existing rank */
1757 const int slotrank
= !(ctrl
->rankmap
[channel
] & 1) ? 2 : 0;
1759 iosav_write_zqcs_sequence(channel
, slotrank
, 4, 4, 31);
1761 iosav_run_once_and_wait(channel
);
1763 mchbar_setbits32(SCHED_CBIT_ch(channel
), 1 << 21);
1766 /* Refresh disable */
1767 mchbar_clrbits32(MC_INIT_STATE_G
, 1 << 3);
1769 FOR_ALL_POPULATED_CHANNELS
{
1770 /* Execute the same command queue */
1771 iosav_run_once_and_wait(channel
);
1776 * Compensate the skew between CMD/ADDR/CLK and DQ/DQS lanes.
1778 * Since DDR3 uses a fly-by topology, the data and strobes signals reach the chips at different
1779 * times with respect to command, address and clock signals. By delaying either all DQ/DQS or
1780 * all CMD/ADDR/CLK signals, a full phase shift can be introduced. It is assumed that the
1781 * CLK/ADDR/CMD signals have the same routing delay.
1783 * To find the required phase shift the DRAM is placed in "write leveling" mode. In this mode,
1784 * the DRAM-chip samples the CLK on every DQS edge and feeds back the sampled value on the data
1787 static int jedec_write_leveling(ramctr_timing
*ctrl
)
1789 int channel
, slotrank
;
1791 disable_refresh_machine(ctrl
);
1793 /* Enable write leveling on all ranks
1794 Disable all DQ outputs
1795 Only NOP is allowed in this mode */
1796 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
1797 write_mrreg(ctrl
, channel
, slotrank
, 1,
1798 make_mr1(ctrl
, slotrank
, channel
) | 1 << 12 | 1 << 7);
1800 /* Needs to be programmed before I/O reset below */
1801 const union gdcr_training_mod_reg training_mod
= {
1802 .write_leveling_mode
= 1,
1805 .force_drive_enable
= 1,
1807 mchbar_write32(GDCRTRAININGMOD
, training_mod
.raw
);
1811 /* Set any valid value for tx_dqs, it gets corrected later */
1812 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
1813 const int err
= write_level_rank(ctrl
, channel
, slotrank
);
1818 /* Disable write leveling on all ranks */
1819 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
1820 write_mrreg(ctrl
, channel
, slotrank
, 1, make_mr1(ctrl
, slotrank
, channel
));
1822 mchbar_write32(GDCRTRAININGMOD
, 0);
1824 FOR_ALL_POPULATED_CHANNELS
1825 wait_for_iosav(channel
);
1827 /* Refresh enable */
1828 mchbar_setbits32(MC_INIT_STATE_G
, 1 << 3);
1830 FOR_ALL_POPULATED_CHANNELS
{
1831 mchbar_clrbits32(SCHED_CBIT_ch(channel
), 1 << 21);
1832 mchbar_read32(IOSAV_STATUS_ch(channel
));
1833 wait_for_iosav(channel
);
1835 iosav_write_zqcs_sequence(channel
, 0, 4, 101, 31);
1837 iosav_run_once_and_wait(channel
);
1845 int write_training(ramctr_timing
*ctrl
)
1847 int channel
, slotrank
;
1851 * Set the DEC_WRD bit, required for the write flyby algorithm.
1852 * Needs to be done before starting the write training procedure.
1854 FOR_ALL_POPULATED_CHANNELS
1855 mchbar_setbits32(TC_RWP_ch(channel
), 1 << 27);
1859 err
= jedec_write_leveling(ctrl
);
1865 FOR_ALL_POPULATED_CHANNELS
{
1866 fill_pattern0(ctrl
, channel
, 0xaaaaaaaa, 0x55555555);
1869 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
1870 err
= tx_dq_write_leveling(ctrl
, channel
, slotrank
);
1875 FOR_ALL_POPULATED_CHANNELS
1876 program_timings(ctrl
, channel
);
1878 /* measure and adjust tx_dqs timings */
1879 train_write_flyby(ctrl
);
1881 FOR_ALL_POPULATED_CHANNELS
1882 program_timings(ctrl
, channel
);
1887 static int test_command_training(ramctr_timing
*ctrl
, int channel
, int slotrank
)
1889 struct ram_rank_timings saved_rt
= ctrl
->timings
[channel
][slotrank
];
1895 for (tx_dq_delta
= -5; tx_dq_delta
<= 5; tx_dq_delta
++) {
1897 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dq
=
1898 saved_rt
.lanes
[lane
].tx_dq
+ tx_dq_delta
;
1900 program_timings(ctrl
, channel
);
1902 mchbar_write32(IOSAV_By_ERROR_COUNT(lane
), 0);
1905 /* Reset read WDB pointer */
1906 mchbar_write32(IOSAV_DATA_CTL_ch(channel
), 0x1f);
1908 wait_for_iosav(channel
);
1910 iosav_write_command_training_sequence(ctrl
, channel
, slotrank
, ctr
);
1912 /* Program LFSR for the RD/WR subsequences */
1913 mchbar_write32(IOSAV_n_ADDRESS_LFSR_ch(channel
, 1), 0x389abcd);
1914 mchbar_write32(IOSAV_n_ADDRESS_LFSR_ch(channel
, 2), 0x389abcd);
1916 iosav_run_once_and_wait(channel
);
1919 u32 r32
= mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel
, lane
));
1922 lanes_ok
|= 1 << lane
;
1925 if (lanes_ok
== ((1 << ctrl
->lanes
) - 1))
1929 ctrl
->timings
[channel
][slotrank
] = saved_rt
;
1931 return lanes_ok
!= ((1 << ctrl
->lanes
) - 1);
1934 static void fill_pattern5(ramctr_timing
*ctrl
, int channel
, int patno
)
1937 unsigned int offset
= get_precedening_channels(ctrl
, channel
) * 64;
1938 unsigned int step
= 64 * num_of_channels(ctrl
);
1942 u8 base8
= 0x80 >> ((patno
- 1) % 8);
1943 u32 base
= base8
| (base8
<< 8) | (base8
<< 16) | (base8
<< 24);
1944 for (i
= 0; i
< 32; i
++) {
1945 for (j
= 0; j
< 16; j
++) {
1946 u32 val
= use_base
[patno
- 1][i
] & (1 << (j
/ 2)) ? base
: 0;
1948 if (invert
[patno
- 1][i
] & (1 << (j
/ 2)))
1951 addr
= (1 << 26) + offset
+ i
* step
+ j
* 4;
1952 write32p(addr
, val
);
1956 for (i
= 0; i
< ARRAY_SIZE(pattern
); i
++) {
1957 for (j
= 0; j
< 16; j
++) {
1958 const u32 val
= pattern
[i
][j
];
1959 addr
= (1 << 26) + offset
+ i
* step
+ j
* 4;
1960 write32p(addr
, val
);
1966 program_wdb_pattern_length(channel
, 256);
1969 static void reprogram_320c(ramctr_timing
*ctrl
)
1971 disable_refresh_machine(ctrl
);
1974 dram_jedecreset(ctrl
);
1977 dram_mrscommands(ctrl
);
1982 #define CT_MIN_PI (-CCC_MAX_PI)
1983 #define CT_MAX_PI (+CCC_MAX_PI + 1)
1984 #define CT_PI_LENGTH (CT_MAX_PI - CT_MIN_PI + 1)
1986 #define MIN_C320C_LEN 13
1988 static int try_cmd_stretch(ramctr_timing
*ctrl
, int channel
, int cmd_stretch
)
1990 struct ram_rank_timings saved_timings
[NUM_CHANNELS
][NUM_SLOTRANKS
];
1993 int stat
[NUM_SLOTRANKS
][CT_PI_LENGTH
];
1996 printram("Trying cmd_stretch %d on channel %d\n", cmd_stretch
, channel
);
1998 FOR_ALL_POPULATED_RANKS
{
1999 saved_timings
[channel
][slotrank
] = ctrl
->timings
[channel
][slotrank
];
2002 ctrl
->cmd_stretch
[channel
] = cmd_stretch
;
2004 const union tc_rap_reg tc_rap
= {
2011 .tCMD
= ctrl
->cmd_stretch
[channel
],
2013 mchbar_write32(TC_RAP_ch(channel
), tc_rap
.raw
);
2015 if (ctrl
->cmd_stretch
[channel
] == 2)
2017 else if (ctrl
->cmd_stretch
[channel
] == 0)
2020 FOR_ALL_POPULATED_RANKS
{
2021 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
-= delta
;
2024 for (command_pi
= CT_MIN_PI
; command_pi
< CT_MAX_PI
; command_pi
++) {
2025 FOR_ALL_POPULATED_RANKS
{
2026 ctrl
->timings
[channel
][slotrank
].pi_coding
= command_pi
;
2028 program_timings(ctrl
, channel
);
2029 reprogram_320c(ctrl
);
2030 FOR_ALL_POPULATED_RANKS
{
2031 stat
[slotrank
][command_pi
- CT_MIN_PI
] =
2032 test_command_training(ctrl
, channel
, slotrank
);
2035 FOR_ALL_POPULATED_RANKS
{
2036 struct run rn
= get_longest_zero_run(stat
[slotrank
], CT_PI_LENGTH
- 1);
2038 ctrl
->timings
[channel
][slotrank
].pi_coding
= rn
.middle
+ CT_MIN_PI
;
2039 printram("cmd_stretch: %d, %d: % 4d-% 4d-% 4d\n",
2040 channel
, slotrank
, rn
.start
, rn
.middle
, rn
.end
);
2042 if (rn
.all
|| rn
.length
< MIN_C320C_LEN
) {
2043 FOR_ALL_POPULATED_RANKS
{
2044 ctrl
->timings
[channel
][slotrank
] =
2045 saved_timings
[channel
][slotrank
];
2055 * Adjust CMD phase shift and try multiple command rates.
2056 * A command rate of 2T doubles the time needed for address and command decode.
2058 int command_training(ramctr_timing
*ctrl
)
2062 FOR_ALL_POPULATED_CHANNELS
{
2063 fill_pattern5(ctrl
, channel
, 0);
2066 FOR_ALL_POPULATED_CHANNELS
{
2070 * Dual DIMM per channel:
2072 * While command training seems to succeed, raminit will fail in write training.
2075 * Skip 1T in dual DIMM mode, that's only supported by a few DIMMs.
2076 * Only try 1T mode for XMP DIMMs that request it in dual DIMM mode.
2078 * Single DIMM per channel:
2079 * Try command rate 1T and 2T
2081 cmdrate
= ((ctrl
->rankmap
[channel
] & 0x5) == 0x5);
2083 /* XMP gives the CMD rate in clock ticks, not ns */
2084 cmdrate
= MIN(DIV_ROUND_UP(ctrl
->tCMD
, 256) - 1, 1);
2086 for (; cmdrate
< 2; cmdrate
++) {
2087 err
= try_cmd_stretch(ctrl
, channel
, cmdrate
<< 1);
2094 printk(BIOS_EMERG
, "Command training failed: %d\n", channel
);
2098 printram("Using CMD rate %uT on channel %u\n", cmdrate
+ 1, channel
);
2101 FOR_ALL_POPULATED_CHANNELS
2102 program_timings(ctrl
, channel
);
2104 reprogram_320c(ctrl
);
2108 static int find_read_mpr_margin(ramctr_timing
*ctrl
, int channel
, int slotrank
, int *edges
)
2111 int stats
[NUM_LANES
][MAX_EDGE_TIMING
+ 1];
2114 for (dqs_pi
= 0; dqs_pi
<= MAX_EDGE_TIMING
; dqs_pi
++) {
2116 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_p
= dqs_pi
;
2117 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_n
= dqs_pi
;
2119 program_timings(ctrl
, channel
);
2122 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel
, lane
), 0);
2123 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel
, lane
));
2126 wait_for_iosav(channel
);
2128 iosav_write_read_mpr_sequence(
2129 channel
, slotrank
, ctrl
->tMOD
, 500, 4, 1, ctrl
->CAS
+ 8);
2131 iosav_run_once_and_wait(channel
);
2134 stats
[lane
][dqs_pi
] = mchbar_read32(
2135 IOSAV_By_ERROR_COUNT_ch(channel
, lane
));
2140 struct run rn
= get_longest_zero_run(stats
[lane
], MAX_EDGE_TIMING
+ 1);
2141 edges
[lane
] = rn
.middle
;
2144 printk(BIOS_EMERG
, "Read MPR training failed: %d, %d, %d\n", channel
,
2148 printram("eval %d, %d, %d: % 4d\n", channel
, slotrank
, lane
, edges
[lane
]);
2153 static void find_predefined_pattern(ramctr_timing
*ctrl
, const int channel
)
2157 fill_pattern0(ctrl
, channel
, 0, 0);
2159 mchbar_write32(IOSAV_By_BW_MASK_ch(channel
, lane
), 0);
2160 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel
, lane
));
2163 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES
{
2164 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_n
= 16;
2165 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_p
= 16;
2168 program_timings(ctrl
, channel
);
2170 FOR_ALL_POPULATED_RANKS
{
2171 wait_for_iosav(channel
);
2173 iosav_write_read_mpr_sequence(
2174 channel
, slotrank
, ctrl
->tMOD
, 3, 4, 1, ctrl
->CAS
+ 8);
2176 iosav_run_once_and_wait(channel
);
2179 /* XXX: check any measured value ? */
2181 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES
{
2182 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_n
= 48;
2183 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_p
= 48;
2186 program_timings(ctrl
, channel
);
2188 FOR_ALL_POPULATED_RANKS
{
2189 wait_for_iosav(channel
);
2191 iosav_write_read_mpr_sequence(
2192 channel
, slotrank
, ctrl
->tMOD
, 3, 4, 1, ctrl
->CAS
+ 8);
2194 iosav_run_once_and_wait(channel
);
2197 /* XXX: check any measured value ? */
2200 mchbar_write32(IOSAV_By_BW_MASK_ch(channel
, lane
),
2201 ~mchbar_read32(IOSAV_By_BW_SERROR_ch(channel
, lane
)) & 0xff);
2205 int read_mpr_training(ramctr_timing
*ctrl
)
2207 int falling_edges
[NUM_CHANNELS
][NUM_SLOTRANKS
][NUM_LANES
];
2208 int rising_edges
[NUM_CHANNELS
][NUM_SLOTRANKS
][NUM_LANES
];
2209 int channel
, slotrank
, lane
;
2212 mchbar_write32(GDCRTRAININGMOD
, 0);
2216 FOR_ALL_POPULATED_CHANNELS
{
2217 find_predefined_pattern(ctrl
, channel
);
2219 fill_pattern0(ctrl
, channel
, 0, 0xffffffff);
2223 * FIXME: Under some conditions, vendor BIOS sets both edges to the same value. It will
2224 * also use a single loop. It would seem that it is a debugging configuration.
2226 mchbar_write32(IOSAV_DC_MASK
, 3 << 8);
2227 printram("discover falling edges:\n[%x] = %x\n", IOSAV_DC_MASK
, 3 << 8);
2229 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
2230 err
= find_read_mpr_margin(ctrl
, channel
, slotrank
,
2231 falling_edges
[channel
][slotrank
]);
2236 mchbar_write32(IOSAV_DC_MASK
, 2 << 8);
2237 printram("discover rising edges:\n[%x] = %x\n", IOSAV_DC_MASK
, 2 << 8);
2239 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
2240 err
= find_read_mpr_margin(ctrl
, channel
, slotrank
,
2241 rising_edges
[channel
][slotrank
]);
2246 mchbar_write32(IOSAV_DC_MASK
, 0);
2248 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES
{
2249 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_n
=
2250 falling_edges
[channel
][slotrank
][lane
];
2251 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_p
=
2252 rising_edges
[channel
][slotrank
][lane
];
2255 FOR_ALL_POPULATED_CHANNELS
{
2256 program_timings(ctrl
, channel
);
2259 FOR_ALL_POPULATED_CHANNELS FOR_ALL_LANES
{
2260 mchbar_write32(IOSAV_By_BW_MASK_ch(channel
, lane
), 0);
2265 static int find_agrsv_read_margin(ramctr_timing
*ctrl
, int channel
, int slotrank
, int *edges
)
2267 const int rd_vref_offsets
[] = { 0, 0xc, 0x2c };
2269 u32 raw_stats
[MAX_EDGE_TIMING
+ 1];
2270 int lower
[NUM_LANES
];
2271 int upper
[NUM_LANES
];
2272 int lane
, i
, read_pi
, pat
;
2276 upper
[lane
] = MAX_EDGE_TIMING
;
2279 for (i
= 0; i
< ARRAY_SIZE(rd_vref_offsets
); i
++) {
2280 const union gdcr_training_mod_reg training_mod
= {
2281 .vref_gen_ctl
= rd_vref_offsets
[i
],
2283 mchbar_write32(GDCRTRAININGMOD_ch(channel
), training_mod
.raw
);
2284 printram("[%x] = 0x%08x\n", GDCRTRAININGMOD_ch(channel
), training_mod
.raw
);
2286 for (pat
= 0; pat
< NUM_PATTERNS
; pat
++) {
2287 fill_pattern5(ctrl
, channel
, pat
);
2288 printram("using pattern %d\n", pat
);
2290 for (read_pi
= 0; read_pi
<= MAX_EDGE_TIMING
; read_pi
++) {
2292 ctrl
->timings
[channel
][slotrank
].lanes
[lane
]
2293 .rx_dqs_p
= read_pi
;
2294 ctrl
->timings
[channel
][slotrank
].lanes
[lane
]
2295 .rx_dqs_n
= read_pi
;
2297 program_timings(ctrl
, channel
);
2300 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel
, lane
),
2302 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel
, lane
));
2304 wait_for_iosav(channel
);
2306 iosav_write_data_write_sequence(ctrl
, channel
, slotrank
);
2308 iosav_run_once_and_wait(channel
);
2311 mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel
, lane
));
2314 /* FIXME: This register only exists on Ivy Bridge */
2315 raw_stats
[read_pi
] = mchbar_read32(
2316 IOSAV_BYTE_SERROR_C_ch(channel
));
2320 int stats
[MAX_EDGE_TIMING
+ 1];
2323 for (read_pi
= 0; read_pi
<= MAX_EDGE_TIMING
; read_pi
++)
2324 stats
[read_pi
] = !!(raw_stats
[read_pi
] & (1 << lane
));
2326 rn
= get_longest_zero_run(stats
, MAX_EDGE_TIMING
+ 1);
2328 printram("edges: %d, %d, %d: % 4d-% 4d-% 4d, "
2329 "% 4d-% 4d\n", channel
, slotrank
, i
, rn
.start
,
2330 rn
.middle
, rn
.end
, rn
.start
+ ctrl
->edge_offset
[i
],
2331 rn
.end
- ctrl
->edge_offset
[i
]);
2333 lower
[lane
] = MAX(rn
.start
+ ctrl
->edge_offset
[i
], lower
[lane
]);
2334 upper
[lane
] = MIN(rn
.end
- ctrl
->edge_offset
[i
], upper
[lane
]);
2336 edges
[lane
] = (lower
[lane
] + upper
[lane
]) / 2;
2337 if (rn
.all
|| (lower
[lane
] > upper
[lane
])) {
2338 printk(BIOS_EMERG
, "Aggressive read training failed: "
2339 "%d, %d, %d\n", channel
, slotrank
, lane
);
2347 /* Restore nominal Vref after training */
2348 mchbar_write32(GDCRTRAININGMOD_ch(channel
), 0);
2353 int aggressive_read_training(ramctr_timing
*ctrl
)
2355 int falling_edges
[NUM_CHANNELS
][NUM_SLOTRANKS
][NUM_LANES
];
2356 int rising_edges
[NUM_CHANNELS
][NUM_SLOTRANKS
][NUM_LANES
];
2357 int channel
, slotrank
, lane
, err
;
2360 * FIXME: Under some conditions, vendor BIOS sets both edges to the same value. It will
2361 * also use a single loop. It would seem that it is a debugging configuration.
2363 mchbar_write32(IOSAV_DC_MASK
, 3 << 8);
2364 printram("discover falling edges aggressive:\n[%x] = %x\n", IOSAV_DC_MASK
, 3 << 8);
2366 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
2367 err
= find_agrsv_read_margin(ctrl
, channel
, slotrank
,
2368 falling_edges
[channel
][slotrank
]);
2373 mchbar_write32(IOSAV_DC_MASK
, 2 << 8);
2374 printram("discover rising edges aggressive:\n[%x] = %x\n", IOSAV_DC_MASK
, 2 << 8);
2376 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
2377 err
= find_agrsv_read_margin(ctrl
, channel
, slotrank
,
2378 rising_edges
[channel
][slotrank
]);
2383 mchbar_write32(IOSAV_DC_MASK
, 0);
2385 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES
{
2386 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_n
=
2387 falling_edges
[channel
][slotrank
][lane
];
2389 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rx_dqs_p
=
2390 rising_edges
[channel
][slotrank
][lane
];
2393 FOR_ALL_POPULATED_CHANNELS
2394 program_timings(ctrl
, channel
);
2399 static void test_aggressive_write(ramctr_timing
*ctrl
, int channel
, int slotrank
)
2401 wait_for_iosav(channel
);
2403 iosav_write_aggressive_write_read_sequence(ctrl
, channel
, slotrank
);
2405 iosav_run_once_and_wait(channel
);
2408 static void set_write_vref(const int channel
, const u8 wr_vref
)
2410 mchbar_clrsetbits32(GDCRCMDDEBUGMUXCFG_Cz_S(channel
), 0x3f << 24, wr_vref
<< 24);
2414 int aggressive_write_training(ramctr_timing
*ctrl
)
2416 const u8 wr_vref_offsets
[3] = { 0, 0x0f, 0x2f };
2419 int lower
[NUM_CHANNELS
][NUM_SLOTRANKS
][NUM_LANES
];
2420 int upper
[NUM_CHANNELS
][NUM_SLOTRANKS
][NUM_LANES
];
2421 int channel
, slotrank
, lane
;
2423 /* Changing the write Vref is only supported on some Ivy Bridge SKUs */
2424 if (!IS_IVY_CPU(ctrl
->cpu
))
2427 if (!(pci_read_config32(HOST_BRIDGE
, CAPID0_A
) & CAPID_WRTVREF
))
2430 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES
{
2431 lower
[channel
][slotrank
][lane
] = 0;
2432 upper
[channel
][slotrank
][lane
] = MAX_TX_DQ
;
2435 /* Only enable IOSAV_n_SPECIAL_COMMAND_ADDR optimization on later steppings */
2436 const bool enable_iosav_opt
= IS_IVY_CPU_D(ctrl
->cpu
) || IS_IVY_CPU_E(ctrl
->cpu
);
2438 if (enable_iosav_opt
)
2439 mchbar_write32(MCMNTS_SPARE
, 1);
2441 printram("Aggressive write training:\n");
2443 for (i
= 0; i
< ARRAY_SIZE(wr_vref_offsets
); i
++) {
2444 FOR_ALL_POPULATED_CHANNELS
{
2445 set_write_vref(channel
, wr_vref_offsets
[i
]);
2447 for (pat
= 0; pat
< NUM_PATTERNS
; pat
++) {
2448 FOR_ALL_POPULATED_RANKS
{
2450 u32 raw_stats
[MAX_TX_DQ
+ 1];
2451 int stats
[MAX_TX_DQ
+ 1];
2453 /* Make sure rn.start < rn.end */
2454 stats
[MAX_TX_DQ
] = 1;
2456 fill_pattern5(ctrl
, channel
, pat
);
2458 for (tx_dq
= 0; tx_dq
< MAX_TX_DQ
; tx_dq
++) {
2460 ctrl
->timings
[channel
][slotrank
]
2461 .lanes
[lane
].tx_dq
= tx_dq
;
2463 program_timings(ctrl
, channel
);
2465 test_aggressive_write(ctrl
, channel
, slotrank
);
2467 raw_stats
[tx_dq
] = mchbar_read32(
2468 IOSAV_BYTE_SERROR_C_ch(channel
));
2472 for (tx_dq
= 0; tx_dq
< MAX_TX_DQ
; tx_dq
++) {
2473 stats
[tx_dq
] = !!(raw_stats
[tx_dq
]
2477 rn
= get_longest_zero_run(stats
, MAX_TX_DQ
+ 1);
2479 printk(BIOS_EMERG
, "Aggressive "
2480 "write training failed: "
2481 "%d, %d, %d\n", channel
,
2486 printram("tx_dq: %d, %d, %d: "
2488 "% 4d-% 4d\n", channel
, slotrank
,
2489 i
, rn
.start
, rn
.middle
, rn
.end
,
2490 rn
.start
+ ctrl
->tx_dq_offset
[i
],
2491 rn
.end
- ctrl
->tx_dq_offset
[i
]);
2493 lower
[channel
][slotrank
][lane
] =
2494 MAX(rn
.start
+ ctrl
->tx_dq_offset
[i
],
2495 lower
[channel
][slotrank
][lane
]);
2497 upper
[channel
][slotrank
][lane
] =
2498 MIN(rn
.end
- ctrl
->tx_dq_offset
[i
],
2499 upper
[channel
][slotrank
][lane
]);
2507 /* Restore nominal write Vref after training */
2508 set_write_vref(channel
, 0);
2511 /* Disable IOSAV_n_SPECIAL_COMMAND_ADDR optimization */
2512 if (enable_iosav_opt
)
2513 mchbar_write32(MCMNTS_SPARE
, 0);
2517 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES
{
2518 printram("tx_dq %d, %d, %d: % 4d\n", channel
, slotrank
, lane
,
2519 (lower
[channel
][slotrank
][lane
] +
2520 upper
[channel
][slotrank
][lane
]) / 2);
2522 ctrl
->timings
[channel
][slotrank
].lanes
[lane
].tx_dq
=
2523 (lower
[channel
][slotrank
][lane
] +
2524 upper
[channel
][slotrank
][lane
]) / 2;
2526 FOR_ALL_POPULATED_CHANNELS
{
2527 program_timings(ctrl
, channel
);
2532 void normalize_training(ramctr_timing
*ctrl
)
2534 int channel
, slotrank
, lane
;
2537 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
{
2541 MAX(ctrl
->timings
[channel
][slotrank
].lanes
[lane
].rcven
, mat
);
2542 printram("normalize %d, %d, %d: mat %d\n",
2543 channel
, slotrank
, lane
, mat
);
2545 delta
= (mat
>> 6) - ctrl
->timings
[channel
][slotrank
].io_latency
;
2546 printram("normalize %d, %d, %d: delta %d\n",
2547 channel
, slotrank
, lane
, delta
);
2549 ctrl
->timings
[channel
][slotrank
].roundtrip_latency
+= delta
;
2550 ctrl
->timings
[channel
][slotrank
].io_latency
+= delta
;
2553 FOR_ALL_POPULATED_CHANNELS
{
2554 program_timings(ctrl
, channel
);
2558 int channel_test(ramctr_timing
*ctrl
)
2560 int channel
, slotrank
, lane
;
2563 FOR_ALL_POPULATED_CHANNELS
2564 if (mchbar_read32(MC_INIT_STATE_ch(channel
)) & 0xa000) {
2565 printk(BIOS_EMERG
, "Mini channel test failed (1): %d\n", channel
);
2568 FOR_ALL_POPULATED_CHANNELS
{
2569 fill_pattern0(ctrl
, channel
, 0x12345678, 0x98765432);
2572 for (slotrank
= 0; slotrank
< 4; slotrank
++)
2574 if (ctrl
->rankmap
[channel
] & (1 << slotrank
)) {
2576 mchbar_write32(IOSAV_By_ERROR_COUNT(lane
), 0);
2577 mchbar_write32(IOSAV_By_BW_SERROR_C(lane
), 0);
2579 wait_for_iosav(channel
);
2581 iosav_write_memory_test_sequence(ctrl
, channel
, slotrank
);
2583 iosav_run_once_and_wait(channel
);
2586 if (mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel
, lane
))) {
2587 printk(BIOS_EMERG
, "Mini channel test failed (2): %d, %d, %d\n",
2588 channel
, slotrank
, lane
);
2595 void channel_scrub(ramctr_timing
*ctrl
)
2597 int channel
, slotrank
, row
, rowsize
;
2600 FOR_ALL_POPULATED_CHANNELS
{
2601 wait_for_iosav(channel
);
2602 fill_pattern0(ctrl
, channel
, 0, 0);
2606 * During runtime the "scrubber" will periodically scan through the memory in the
2607 * physical address space, to identify and fix CRC errors.
2608 * The following loops writes to every DRAM address, setting the ECC bits to the
2609 * correct value. A read from this location will no longer return a CRC error,
2610 * except when a bit has toggled due to external events.
2611 * The same could be achieved by writing to the physical memory map, but it's
2612 * much more difficult due to SMM remapping, ME stolen memory, GFX stolen memory,
2613 * and firmware running in x86_32.
2615 FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS
{
2616 rowsize
= 1 << ctrl
->info
.dimm
[channel
][slotrank
>> 1].row_bits
;
2617 for (bank
= 0; bank
< 8; bank
++) {
2618 for (row
= 0; row
< rowsize
; row
+= 16) {
2619 u8 gap
= MAX((ctrl
->tFAW
>> 2) + 1, ctrl
->tRRD
);
2620 const struct iosav_ssq sequence
[] = {
2623 * Opens the row for writing.
2627 .command
= IOSAV_ACT
,
2631 .cmd_executions
= 1,
2632 .cmd_delay_gap
= gap
,
2633 .post_ssq_wait
= ctrl
->tRCD
,
2634 .data_direction
= SSQ_NA
,
2649 * Writes (128 + 1) * 8 (burst length) * 8 (bus width)
2654 .command
= IOSAV_WR
,
2658 .cmd_executions
= 129,
2660 .post_ssq_wait
= ctrl
->tWTR
+
2662 .data_direction
= SSQ_WR
,
2681 .command
= IOSAV_PRE
,
2685 .cmd_executions
= 1,
2687 .post_ssq_wait
= ctrl
->tRP
,
2688 .data_direction
= SSQ_NA
,
2701 iosav_write_sequence(channel
, sequence
, ARRAY_SIZE(sequence
));
2703 iosav_run_queue(channel
, 16, 0);
2705 wait_for_iosav(channel
);
2711 void set_scrambling_seed(ramctr_timing
*ctrl
)
2715 /* FIXME: we hardcode seeds. Do we need to use some PRNG for them? I don't think so. */
2716 static u32 seeds
[NUM_CHANNELS
][3] = {
2717 {0x00009a36, 0xbafcfdcf, 0x46d1ab68},
2718 {0x00028bfa, 0x53fe4b49, 0x19ed5483}
2720 FOR_ALL_POPULATED_CHANNELS
{
2721 mchbar_clrbits32(SCHED_CBIT_ch(channel
), 1 << 28);
2722 mchbar_write32(SCRAMBLING_SEED_1_ch(channel
), seeds
[channel
][0]);
2723 mchbar_write32(SCRAMBLING_SEED_2_HI_ch(channel
), seeds
[channel
][1]);
2724 mchbar_write32(SCRAMBLING_SEED_2_LO_ch(channel
), seeds
[channel
][2]);
2728 void set_wmm_behavior(const u32 cpu
)
2730 if (IS_SANDY_CPU(cpu
) && (IS_SANDY_CPU_D0(cpu
) || IS_SANDY_CPU_D1(cpu
))) {
2731 mchbar_write32(SC_WDBWM
, 0x141d1519);
2733 mchbar_write32(SC_WDBWM
, 0x551d1519);
2737 void prepare_training(ramctr_timing
*ctrl
)
2741 FOR_ALL_POPULATED_CHANNELS
{
2742 /* Always drive command bus */
2743 mchbar_setbits32(TC_RAP_ch(channel
), 1 << 29);
2748 FOR_ALL_POPULATED_CHANNELS
{
2749 wait_for_iosav(channel
);
2753 void set_read_write_timings(ramctr_timing
*ctrl
)
2755 /* Use a larger delay when running fast to improve stability */
2756 const u32 tRWDRDD_inc
= ctrl
->tCK
<= TCK_1066MHZ
? 4 : 2;
2758 int channel
, slotrank
;
2760 FOR_ALL_POPULATED_CHANNELS
{
2762 int max_pi
= -10000;
2764 FOR_ALL_POPULATED_RANKS
{
2765 max_pi
= MAX(ctrl
->timings
[channel
][slotrank
].pi_coding
, max_pi
);
2766 min_pi
= MIN(ctrl
->timings
[channel
][slotrank
].pi_coding
, min_pi
);
2769 const u32 tWRDRDD
= (max_pi
- min_pi
> 51) ? 0 : ctrl
->ref_card_offset
[channel
];
2771 const u32 val
= (ctrl
->pi_coding_threshold
< max_pi
- min_pi
) ? 3 : 2;
2773 dram_odt_stretch(ctrl
, channel
);
2775 const union tc_rwp_reg tc_rwp
= {
2780 .tRWDRDD
= ctrl
->ref_card_offset
[channel
] + tRWDRDD_inc
,
2785 mchbar_write32(TC_RWP_ch(channel
), tc_rwp
.raw
);
2789 void set_normal_operation(ramctr_timing
*ctrl
)
2792 FOR_ALL_POPULATED_CHANNELS
{
2793 mchbar_write32(MC_INIT_STATE_ch(channel
), 1 << 12 | ctrl
->rankmap
[channel
]);
2794 mchbar_clrbits32(TC_RAP_ch(channel
), 1 << 29);
2798 /* Encode the watermark latencies in a suitable format for graphics drivers consumption */
2799 static int encode_wm(int ns
)
2801 return (ns
+ 499) / 500;
2804 /* FIXME: values in this function should be hardware revision-dependent */
2805 void final_registers(ramctr_timing
*ctrl
)
2808 int t1_cycles
= 0, t1_ns
= 0, t2_ns
;
2812 if (IS_IVY_CPU(ctrl
->cpu
))
2813 mchbar_write32(WMM_READ_CONFIG
, 0x46);
2816 union tc_othp_reg tc_othp
= {
2817 .raw
= mchbar_read32(TC_OTHP_ch(channel
)),
2819 if (IS_SANDY_CPU(ctrl
->cpu
) && (ctrl
->cpu
& 0xf) < SNB_STEP_D0
)
2823 mchbar_write32(TC_OTHP_ch(channel
), tc_othp
.raw
);
2825 /* 64 DCLKs until idle, decision per rank */
2826 r32
= get_power_down_mode(ctrl
, channel
) << 8 | 64;
2827 mchbar_write32(PM_PDWN_CONFIG_ch(channel
), r32
);
2829 mchbar_write32(PM_TRML_M_CONFIG_ch(channel
), 0x00000aaa);
2832 mchbar_write32(PM_BW_LIMIT_CONFIG
, 0x5f7003ff);
2833 if (IS_SANDY_CPU(ctrl
->cpu
))
2834 mchbar_write32(PM_DLL_CONFIG
, 0x000330f0);
2836 mchbar_write32(PM_DLL_CONFIG
, 0x00073000 | ctrl
->mdll_wake_delay
);
2839 switch (ctrl
->rankmap
[channel
]) {
2840 /* Unpopulated channel */
2842 mchbar_write32(PM_CMD_PWR_ch(channel
), 0);
2844 /* Only single-ranked dimms */
2848 mchbar_write32(PM_CMD_PWR_ch(channel
), 0x00373131);
2850 /* Dual-ranked dimms present */
2852 mchbar_write32(PM_CMD_PWR_ch(channel
), 0x009b6ea1);
2857 mchbar_write32(MEM_TRML_ESTIMATION_CONFIG
, 0xca9171e5);
2858 mchbar_clrsetbits32(MEM_TRML_THRESHOLDS_CONFIG
, 0x00ffffff, 0x00e4d5d0);
2859 mchbar_clrbits32(MEM_TRML_INTERRUPT
, 0x1f);
2862 union tc_rfp_reg tc_rfp
= {
2863 .raw
= mchbar_read32(TC_RFP_ch(channel
)),
2865 tc_rfp
.refresh_2x_control
= 1;
2866 mchbar_write32(TC_RFP_ch(channel
), tc_rfp
.raw
);
2869 mchbar_setbits32(MC_INIT_STATE_G
, 1 << 0);
2870 mchbar_setbits32(MC_INIT_STATE_G
, 1 << 7);
2872 /* Find a populated channel */
2873 FOR_ALL_POPULATED_CHANNELS
2876 t1_cycles
= (mchbar_read32(TC_ZQCAL_ch(channel
)) >> 8) & 0xff;
2877 r32
= mchbar_read32(PM_DLL_CONFIG
);
2878 if (r32
& (1 << 17))
2879 t1_cycles
+= (r32
& 0xfff);
2880 t1_cycles
+= mchbar_read32(TC_SRFTP_ch(channel
)) & 0xfff;
2881 t1_ns
= t1_cycles
* ctrl
->tCK
/ 256 + 544;
2882 if (!(r32
& (1 << 17)))
2885 t2_ns
= 10 * ((mchbar_read32(SAPMTIMERS
) >> 8) & 0xfff);
2886 if (mchbar_read32(SAPMCTL
) & 8) {
2887 t3_ns
= 10 * ((mchbar_read32(BANDTIMERS_IVB
) >> 8) & 0xfff);
2888 t3_ns
+= 10 * (mchbar_read32(SAPMTIMERS2_IVB
) & 0xff);
2893 /* The graphics driver will use these watermark values */
2894 printk(BIOS_DEBUG
, "t123: %d, %d, %d\n", t1_ns
, t2_ns
, t3_ns
);
2895 mchbar_clrsetbits32(SSKPD
, 0x3f3f3f3f,
2896 ((encode_wm(t1_ns
) + encode_wm(t2_ns
)) << 16) | (encode_wm(t1_ns
) << 8) |
2897 ((encode_wm(t3_ns
) + encode_wm(t2_ns
) + encode_wm(t1_ns
)) << 24) | 0x0c);
2900 void restore_timings(ramctr_timing
*ctrl
)
2904 FOR_ALL_POPULATED_CHANNELS
{
2905 const union tc_rap_reg tc_rap
= {
2912 .tCMD
= ctrl
->cmd_stretch
[channel
],
2914 mchbar_write32(TC_RAP_ch(channel
), tc_rap
.raw
);
2919 FOR_ALL_POPULATED_CHANNELS
{
2920 wait_for_iosav(channel
);
2923 FOR_ALL_POPULATED_CHANNELS
2924 mchbar_setbits32(TC_RWP_ch(channel
), 1 << 27);
2926 FOR_ALL_POPULATED_CHANNELS
{
2928 mchbar_setbits32(SCHED_CBIT_ch(channel
), 1 << 21);
2933 mchbar_write32(GDCRTRAININGMOD
, 0);
2934 mchbar_write32(IOSAV_DC_MASK
, 0);
2938 FOR_ALL_POPULATED_CHANNELS
{
2939 program_timings(ctrl
, channel
);
2944 /* Poll for RCOMP */
2945 while (!(mchbar_read32(RCOMP_TIMER
) & (1 << 16)))
2949 reg
= mchbar_read32(IOSAV_STATUS_ch(0));
2950 } while ((reg
& 0x14) == 0);
2952 /* Set state of memory controller */
2953 mchbar_write32(MC_INIT_STATE_G
, 0x116);
2954 mchbar_write32(MC_INIT_STATE
, 0);
2960 /* Set valid rank CKE */
2962 reg
= (reg
& ~0x0f) | ctrl
->rankmap
[channel
];
2963 addr
= MC_INIT_STATE_ch(channel
);
2964 mchbar_write32(addr
, reg
);
2966 /* Wait 10ns for ranks to settle */
2969 reg
= (reg
& ~0xf0) | (ctrl
->rankmap
[channel
] << 4);
2970 mchbar_write32(addr
, reg
);
2972 /* Write reset using a NOP */
2977 dram_mrscommands(ctrl
);
2981 mchbar_write32(GDCRTRAININGMOD_ch(0), 0);
2984 mchbar_clrbits32(GDCRCMDDEBUGMUXCFG_Cz_S(channel
), 0x3f << 24);