mb/google/nissa/var/glassway: Add convertible and clamshell WIFI SAR FW_CONFIG ids
[coreboot.git] / src / northbridge / intel / sandybridge / raminit_common.c
blob51d6786f34ccb9ccd0c7409180ed6b876a400c9f
1 /* SPDX-License-Identifier: GPL-2.0-only */
3 #include <assert.h>
4 #include <commonlib/helpers.h>
5 #include <console/console.h>
6 #include <cpu/intel/model_206ax/model_206ax.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <northbridge/intel/sandybridge/chip.h>
10 #include <device/pci_def.h>
11 #include <delay.h>
12 #include <types.h>
14 #include "raminit_common.h"
15 #include "raminit_tables.h"
16 #include "sandybridge.h"
18 /* FIXME: no support for 3-channel chipsets */
20 static void sfence(void)
22 asm volatile ("sfence");
25 /* Toggle IO reset bit */
26 static void toggle_io_reset(void)
28 u32 r32 = mchbar_read32(MC_INIT_STATE_G);
29 mchbar_write32(MC_INIT_STATE_G, r32 | (1 << 5));
30 udelay(1);
31 mchbar_write32(MC_INIT_STATE_G, r32 & ~(1 << 5));
32 udelay(1);
35 static u32 get_XOVER_CLK(u8 rankmap)
37 return rankmap << 24;
40 static u32 get_XOVER_CMD(u8 rankmap)
42 u32 reg;
44 /* Enable xover cmd */
45 reg = 1 << 14;
47 /* Enable xover ctl */
48 if (rankmap & 0x03)
49 reg |= (1 << 17);
51 if (rankmap & 0x0c)
52 reg |= (1 << 26);
54 return reg;
57 void dram_find_common_params(ramctr_timing *ctrl)
59 size_t valid_dimms;
60 int channel, slot;
61 dimm_info *dimms = &ctrl->info;
63 ctrl->cas_supported = (1 << (MAX_CAS - MIN_CAS + 1)) - 1;
64 valid_dimms = 0;
66 FOR_ALL_CHANNELS for (slot = 0; slot < 2; slot++) {
67 const struct dimm_attr_ddr3_st *dimm = &dimms->dimm[channel][slot];
68 if (dimm->dram_type != SPD_MEMORY_TYPE_SDRAM_DDR3)
69 continue;
71 valid_dimms++;
73 /* Find all possible CAS combinations */
74 ctrl->cas_supported &= dimm->cas_supported;
76 /* Find the smallest common latencies supported by all DIMMs */
77 ctrl->tCK = MAX(ctrl->tCK, dimm->tCK);
78 ctrl->tAA = MAX(ctrl->tAA, dimm->tAA);
79 ctrl->tWR = MAX(ctrl->tWR, dimm->tWR);
80 ctrl->tRCD = MAX(ctrl->tRCD, dimm->tRCD);
81 ctrl->tRRD = MAX(ctrl->tRRD, dimm->tRRD);
82 ctrl->tRP = MAX(ctrl->tRP, dimm->tRP);
83 ctrl->tRAS = MAX(ctrl->tRAS, dimm->tRAS);
84 ctrl->tRFC = MAX(ctrl->tRFC, dimm->tRFC);
85 ctrl->tWTR = MAX(ctrl->tWTR, dimm->tWTR);
86 ctrl->tRTP = MAX(ctrl->tRTP, dimm->tRTP);
87 ctrl->tFAW = MAX(ctrl->tFAW, dimm->tFAW);
88 ctrl->tCWL = MAX(ctrl->tCWL, dimm->tCWL);
89 ctrl->tCMD = MAX(ctrl->tCMD, dimm->tCMD);
92 if (!ctrl->cas_supported)
93 die("Unsupported DIMM combination. DIMMS do not support common CAS latency");
95 if (!valid_dimms)
96 die("No valid DIMMs found");
99 void dram_xover(ramctr_timing *ctrl)
101 u32 reg;
102 int channel;
104 FOR_ALL_CHANNELS {
105 /* Enable xover clk */
106 reg = get_XOVER_CLK(ctrl->rankmap[channel]);
107 printram("XOVER CLK [%x] = %x\n", GDCRCKPICODE_ch(channel), reg);
108 mchbar_write32(GDCRCKPICODE_ch(channel), reg);
110 /* Enable xover ctl & xover cmd */
111 reg = get_XOVER_CMD(ctrl->rankmap[channel]);
112 printram("XOVER CMD [%x] = %x\n", GDCRCMDPICODING_ch(channel), reg);
113 mchbar_write32(GDCRCMDPICODING_ch(channel), reg);
117 static void dram_odt_stretch(ramctr_timing *ctrl, int channel)
119 u32 addr, stretch;
121 stretch = ctrl->ref_card_offset[channel];
123 * ODT stretch:
124 * Delay ODT signal by stretch value. Useful for multi DIMM setups on the same channel.
126 if (IS_SANDY_CPU(ctrl->cpu) && IS_SANDY_CPU_C(ctrl->cpu)) {
127 if (stretch == 2)
128 stretch = 3;
130 addr = SCHED_SECOND_CBIT_ch(channel);
131 mchbar_clrsetbits32(addr, 0xf << 10, stretch << 12 | stretch << 10);
132 printk(RAM_DEBUG, "OTHP Workaround [%x] = %x\n", addr, mchbar_read32(addr));
133 } else {
134 addr = TC_OTHP_ch(channel);
135 union tc_othp_reg tc_othp = {
136 .raw = mchbar_read32(addr),
138 tc_othp.odt_delay_d0 = stretch;
139 tc_othp.odt_delay_d1 = stretch;
140 mchbar_write32(addr, tc_othp.raw);
141 printk(RAM_DEBUG, "OTHP [%x] = %x\n", addr, mchbar_read32(addr));
145 void dram_timing_regs(ramctr_timing *ctrl)
147 int channel;
149 /* BIN parameters */
150 const union tc_dbp_reg tc_dbp = {
151 .tRCD = ctrl->tRCD,
152 .tRP = ctrl->tRP,
153 .tAA = ctrl->CAS,
154 .tCWL = ctrl->CWL,
155 .tRAS = ctrl->tRAS,
158 /* Regular access parameters */
159 const union tc_rap_reg tc_rap = {
160 .tRRD = ctrl->tRRD,
161 .tRTP = ctrl->tRTP,
162 .tCKE = ctrl->tCKE,
163 .tWTR = ctrl->tWTR,
164 .tFAW = ctrl->tFAW,
165 .tWR = ctrl->tWR,
166 .tCMD = 3,
169 /* Other parameters */
170 const union tc_othp_reg tc_othp = {
171 .tXPDLL = MIN(ctrl->tXPDLL, 31),
172 .tXP = MIN(ctrl->tXP, 7),
173 .tAONPD = ctrl->tAONPD,
174 .tCPDED = 1,
175 .tPRPDEN = 1,
179 * If tXP and tXPDLL are very high, they no longer fit in the bitfields
180 * of the TC_OTHP register. If so, we set bits in TC_DTP to compensate.
181 * This can only happen on Ivy Bridge, and when overclocking the RAM.
183 const union tc_dtp_reg tc_dtp = {
184 .overclock_tXP = ctrl->tXP >= 8,
185 .overclock_tXPDLL = ctrl->tXPDLL >= 32,
189 * TC-Refresh timing parameters:
190 * The tREFIx9 field should be programmed to minimum of 8.9 * tREFI (to allow
191 * for possible delays from ZQ or isoc) and tRASmax (70us) divided by 1024.
193 const u32 val32 = MIN((ctrl->tREFI * 89) / 10, (70000 << 8) / ctrl->tCK);
195 const union tc_rftp_reg tc_rftp = {
196 .tREFI = ctrl->tREFI,
197 .tRFC = ctrl->tRFC,
198 .tREFIx9 = val32 / 1024,
201 /* Self-refresh timing parameters */
202 const union tc_srftp_reg tc_srftp = {
203 .tXSDLL = tDLLK,
204 .tXS_offset = ctrl->tXSOffset,
205 .tZQOPER = tDLLK - ctrl->tXSOffset,
206 .tMOD = ctrl->tMOD - 8,
209 FOR_ALL_CHANNELS {
210 printram("DBP [%x] = %x\n", TC_DBP_ch(channel), tc_dbp.raw);
211 mchbar_write32(TC_DBP_ch(channel), tc_dbp.raw);
213 printram("RAP [%x] = %x\n", TC_RAP_ch(channel), tc_rap.raw);
214 mchbar_write32(TC_RAP_ch(channel), tc_rap.raw);
216 printram("OTHP [%x] = %x\n", TC_OTHP_ch(channel), tc_othp.raw);
217 mchbar_write32(TC_OTHP_ch(channel), tc_othp.raw);
219 if (IS_IVY_CPU(ctrl->cpu)) {
220 /* Debug parameters - only applies to Ivy Bridge */
221 mchbar_write32(TC_DTP_ch(channel), tc_dtp.raw);
224 dram_odt_stretch(ctrl, channel);
226 printram("REFI [%x] = %x\n", TC_RFTP_ch(channel), tc_rftp.raw);
227 mchbar_write32(TC_RFTP_ch(channel), tc_rftp.raw);
229 union tc_rfp_reg tc_rfp = {
230 .raw = mchbar_read32(TC_RFP_ch(channel)),
232 tc_rfp.oref_ri = 0xff;
233 mchbar_write32(TC_RFP_ch(channel), tc_rfp.raw);
235 printram("SRFTP [%x] = %x\n", TC_SRFTP_ch(channel), tc_srftp.raw);
236 mchbar_write32(TC_SRFTP_ch(channel), tc_srftp.raw);
240 void dram_dimm_mapping(ramctr_timing *ctrl)
242 int channel;
243 dimm_info *info = &ctrl->info;
245 FOR_ALL_CHANNELS {
246 struct dimm_attr_ddr3_st *dimmA, *dimmB;
247 u32 reg = 0;
249 if (info->dimm[channel][0].size_mb >= info->dimm[channel][1].size_mb) {
250 dimmA = &info->dimm[channel][0];
251 dimmB = &info->dimm[channel][1];
252 reg |= (0 << 16);
253 } else {
254 dimmA = &info->dimm[channel][1];
255 dimmB = &info->dimm[channel][0];
256 reg |= (1 << 16);
259 if (dimmA && (dimmA->ranks > 0)) {
260 reg |= (dimmA->size_mb / 256) << 0;
261 reg |= (dimmA->ranks - 1) << 17;
262 reg |= (dimmA->width / 8 - 1) << 19;
265 if (dimmB && (dimmB->ranks > 0)) {
266 reg |= (dimmB->size_mb / 256) << 8;
267 reg |= (dimmB->ranks - 1) << 18;
268 reg |= (dimmB->width / 8 - 1) << 20;
272 * Rank interleave: Bit 16 of the physical address space sets
273 * the rank to use in a dual single rank DIMM configuration.
274 * That results in every 64KiB being interleaved between two ranks.
276 reg |= 1 << 21;
277 /* Enhanced interleave */
278 reg |= 1 << 22;
280 if ((dimmA && (dimmA->ranks > 0)) || (dimmB && (dimmB->ranks > 0))) {
281 ctrl->mad_dimm[channel] = reg;
282 } else {
283 ctrl->mad_dimm[channel] = 0;
288 void dram_dimm_set_mapping(ramctr_timing *ctrl, int training)
290 int channel;
291 u32 ecc;
293 if (ctrl->ecc_enabled)
294 ecc = training ? (1 << 24) : (3 << 24);
295 else
296 ecc = 0;
298 FOR_ALL_CHANNELS {
299 mchbar_write32(MAD_DIMM(channel), ctrl->mad_dimm[channel] | ecc);
302 if (ctrl->ecc_enabled)
303 udelay(10);
306 void dram_zones(ramctr_timing *ctrl, int training)
308 u32 reg, ch0size, ch1size;
309 u8 val;
310 reg = 0;
311 val = 0;
313 if (training) {
314 ch0size = ctrl->channel_size_mb[0] ? 256 : 0;
315 ch1size = ctrl->channel_size_mb[1] ? 256 : 0;
316 } else {
317 ch0size = ctrl->channel_size_mb[0];
318 ch1size = ctrl->channel_size_mb[1];
321 if (ch0size >= ch1size) {
322 reg = mchbar_read32(MAD_ZR);
323 val = ch1size / 256;
324 reg = (reg & ~0xff000000) | val << 24;
325 reg = (reg & ~0x00ff0000) | (2 * val) << 16;
326 mchbar_write32(MAD_ZR, reg);
327 mchbar_write32(MAD_CHNL, 0x24);
329 } else {
330 reg = mchbar_read32(MAD_ZR);
331 val = ch0size / 256;
332 reg = (reg & ~0xff000000) | val << 24;
333 reg = (reg & ~0x00ff0000) | (2 * val) << 16;
334 mchbar_write32(MAD_ZR, reg);
335 mchbar_write32(MAD_CHNL, 0x21);
340 * Returns the ECC mode the NB is running at. It takes precedence over ECC capability.
341 * The ME/PCU/.. has the ability to change this.
342 * Return 0: ECC is optional
343 * Return 1: ECC is forced
345 bool get_host_ecc_forced(void)
347 /* read Capabilities A Register */
348 const u32 reg32 = pci_read_config32(HOST_BRIDGE, CAPID0_A);
349 return !!(reg32 & (1 << 24));
353 * Returns the ECC capability.
354 * The ME/PCU/.. has the ability to change this.
355 * Return 0: ECC is disabled
356 * Return 1: ECC is possible
358 bool get_host_ecc_cap(void)
360 /* read Capabilities A Register */
361 const u32 reg32 = pci_read_config32(HOST_BRIDGE, CAPID0_A);
362 return !(reg32 & (1 << 25));
365 #define DEFAULT_PCI_MMIO_SIZE 2048
367 void dram_memorymap(ramctr_timing *ctrl, int me_uma_size)
369 u32 reg, val, reclaim, tom, gfxstolen, gttsize;
370 size_t tsegbase, toludbase, remapbase, gfxstolenbase, mmiosize, gttbase;
371 size_t tsegsize, touudbase, remaplimit, mestolenbase, tsegbasedelta;
372 uint16_t ggc;
374 mmiosize = DEFAULT_PCI_MMIO_SIZE;
376 ggc = pci_read_config16(HOST_BRIDGE, GGC);
377 if (!(ggc & 2)) {
378 gfxstolen = ((ggc >> 3) & 0x1f) * 32;
379 gttsize = ((ggc >> 8) & 0x3);
380 } else {
381 gfxstolen = 0;
382 gttsize = 0;
385 tsegsize = CONFIG_SMM_TSEG_SIZE >> 20;
387 tom = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1];
389 mestolenbase = tom - me_uma_size;
391 toludbase = MIN(4096 - mmiosize + gfxstolen + gttsize + tsegsize, tom - me_uma_size);
393 gfxstolenbase = toludbase - gfxstolen;
394 gttbase = gfxstolenbase - gttsize;
396 tsegbase = gttbase - tsegsize;
398 /* Round tsegbase down to nearest address aligned to tsegsize */
399 tsegbasedelta = tsegbase & (tsegsize - 1);
400 tsegbase &= ~(tsegsize - 1);
402 gttbase -= tsegbasedelta;
403 gfxstolenbase -= tsegbasedelta;
404 toludbase -= tsegbasedelta;
406 /* Test if it is possible to reclaim a hole in the RAM addressing */
407 if (tom - me_uma_size > toludbase) {
408 /* Reclaim is possible */
409 reclaim = 1;
410 remapbase = MAX(4096, tom - me_uma_size);
411 remaplimit = remapbase + MIN(4096, tom - me_uma_size) - toludbase - 1;
412 touudbase = remaplimit + 1;
413 } else {
414 /* Reclaim not possible */
415 reclaim = 0;
416 touudbase = tom - me_uma_size;
419 /* Update memory map in PCIe configuration space */
420 printk(BIOS_DEBUG, "Update PCI-E configuration space:\n");
422 /* TOM (top of memory) */
423 reg = pci_read_config32(HOST_BRIDGE, TOM);
424 val = tom & 0xfff;
425 reg = (reg & ~0xfff00000) | (val << 20);
426 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOM, reg);
427 pci_write_config32(HOST_BRIDGE, TOM, reg);
429 reg = pci_read_config32(HOST_BRIDGE, TOM + 4);
430 val = tom & 0xfffff000;
431 reg = (reg & ~0x000fffff) | (val >> 12);
432 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOM + 4, reg);
433 pci_write_config32(HOST_BRIDGE, TOM + 4, reg);
435 /* TOLUD (Top Of Low Usable DRAM) */
436 reg = pci_read_config32(HOST_BRIDGE, TOLUD);
437 val = toludbase & 0xfff;
438 reg = (reg & ~0xfff00000) | (val << 20);
439 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOLUD, reg);
440 pci_write_config32(HOST_BRIDGE, TOLUD, reg);
442 /* TOUUD LSB (Top Of Upper Usable DRAM) */
443 reg = pci_read_config32(HOST_BRIDGE, TOUUD);
444 val = touudbase & 0xfff;
445 reg = (reg & ~0xfff00000) | (val << 20);
446 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOUUD, reg);
447 pci_write_config32(HOST_BRIDGE, TOUUD, reg);
449 /* TOUUD MSB */
450 reg = pci_read_config32(HOST_BRIDGE, TOUUD + 4);
451 val = touudbase & 0xfffff000;
452 reg = (reg & ~0x000fffff) | (val >> 12);
453 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOUUD + 4, reg);
454 pci_write_config32(HOST_BRIDGE, TOUUD + 4, reg);
456 if (reclaim) {
457 /* REMAP BASE */
458 pci_write_config32(HOST_BRIDGE, REMAPBASE, remapbase << 20);
459 pci_write_config32(HOST_BRIDGE, REMAPBASE + 4, remapbase >> 12);
461 /* REMAP LIMIT */
462 pci_write_config32(HOST_BRIDGE, REMAPLIMIT, remaplimit << 20);
463 pci_write_config32(HOST_BRIDGE, REMAPLIMIT + 4, remaplimit >> 12);
465 /* TSEG */
466 reg = pci_read_config32(HOST_BRIDGE, TSEGMB);
467 val = tsegbase & 0xfff;
468 reg = (reg & ~0xfff00000) | (val << 20);
469 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TSEGMB, reg);
470 pci_write_config32(HOST_BRIDGE, TSEGMB, reg);
472 /* GFX stolen memory */
473 reg = pci_read_config32(HOST_BRIDGE, BDSM);
474 val = gfxstolenbase & 0xfff;
475 reg = (reg & ~0xfff00000) | (val << 20);
476 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", BDSM, reg);
477 pci_write_config32(HOST_BRIDGE, BDSM, reg);
479 /* GTT stolen memory */
480 reg = pci_read_config32(HOST_BRIDGE, BGSM);
481 val = gttbase & 0xfff;
482 reg = (reg & ~0xfff00000) | (val << 20);
483 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", BGSM, reg);
484 pci_write_config32(HOST_BRIDGE, BGSM, reg);
486 if (me_uma_size) {
487 reg = pci_read_config32(HOST_BRIDGE, MESEG_MASK + 4);
488 val = (0x80000 - me_uma_size) & 0xfffff000;
489 reg = (reg & ~0x000fffff) | (val >> 12);
490 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_MASK + 4, reg);
491 pci_write_config32(HOST_BRIDGE, MESEG_MASK + 4, reg);
493 /* ME base */
494 reg = pci_read_config32(HOST_BRIDGE, MESEG_BASE);
495 val = mestolenbase & 0xfff;
496 reg = (reg & ~0xfff00000) | (val << 20);
497 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_BASE, reg);
498 pci_write_config32(HOST_BRIDGE, MESEG_BASE, reg);
500 reg = pci_read_config32(HOST_BRIDGE, MESEG_BASE + 4);
501 val = mestolenbase & 0xfffff000;
502 reg = (reg & ~0x000fffff) | (val >> 12);
503 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_BASE + 4, reg);
504 pci_write_config32(HOST_BRIDGE, MESEG_BASE + 4, reg);
506 /* ME mask */
507 reg = pci_read_config32(HOST_BRIDGE, MESEG_MASK);
508 val = (0x80000 - me_uma_size) & 0xfff;
509 reg = (reg & ~0xfff00000) | (val << 20);
510 reg = reg | ME_STLEN_EN; /* Set ME memory enable */
511 reg = reg | MELCK; /* Set lock bit on ME mem */
512 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_MASK, reg);
513 pci_write_config32(HOST_BRIDGE, MESEG_MASK, reg);
517 static void write_reset(ramctr_timing *ctrl)
519 int channel, slotrank;
521 /* Choose a populated channel */
522 channel = (ctrl->rankmap[0]) ? 0 : 1;
524 wait_for_iosav(channel);
526 /* Choose a populated rank */
527 slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2;
529 iosav_write_zqcs_sequence(channel, slotrank, 3, 8, 0);
531 /* This is actually using the IOSAV state machine as a timer */
532 iosav_run_queue(channel, 1, 1);
534 wait_for_iosav(channel);
537 void dram_jedecreset(ramctr_timing *ctrl)
539 u32 reg;
540 int channel;
542 while (!(mchbar_read32(RCOMP_TIMER) & (1 << 16)))
544 do {
545 reg = mchbar_read32(IOSAV_STATUS_ch(0));
546 } while ((reg & 0x14) == 0);
548 /* Set state of memory controller */
549 reg = 0x112;
550 mchbar_write32(MC_INIT_STATE_G, reg);
551 mchbar_write32(MC_INIT_STATE, 0);
552 reg |= 2; /* DDR reset */
553 mchbar_write32(MC_INIT_STATE_G, reg);
555 /* Assert DIMM reset signal */
556 mchbar_clrbits32(MC_INIT_STATE_G, 1 << 1);
558 /* Wait 200us */
559 udelay(200);
561 /* Deassert DIMM reset signal */
562 mchbar_setbits32(MC_INIT_STATE_G, 1 << 1);
564 /* Wait 500us */
565 udelay(500);
567 /* Enable DCLK */
568 mchbar_setbits32(MC_INIT_STATE_G, 1 << 2);
570 /* XXX Wait 20ns */
571 udelay(1);
573 FOR_ALL_CHANNELS {
574 /* Set valid rank CKE */
575 reg = ctrl->rankmap[channel];
576 mchbar_write32(MC_INIT_STATE_ch(channel), reg);
578 /* Wait 10ns for ranks to settle */
579 // udelay(0.01);
581 reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4);
582 mchbar_write32(MC_INIT_STATE_ch(channel), reg);
584 /* Write reset using a NOP */
585 write_reset(ctrl);
590 * DDR3 Rank1 Address mirror swap the following pins:
591 * A3<->A4, A5<->A6, A7<->A8, BA0<->BA1
593 static void ddr3_mirror_mrreg(int *bank, u32 *addr)
595 *bank = ((*bank >> 1) & 1) | ((*bank << 1) & 2);
596 *addr = (*addr & ~0x1f8) | ((*addr >> 1) & 0xa8) | ((*addr & 0xa8) << 1);
599 static void write_mrreg(ramctr_timing *ctrl, int channel, int slotrank, int reg, u32 val)
601 wait_for_iosav(channel);
603 if (ctrl->rank_mirror[channel][slotrank])
604 ddr3_mirror_mrreg(&reg, &val);
606 const struct iosav_ssq sequence[] = {
607 /* DRAM command MRS */
608 [0] = {
609 .sp_cmd_ctrl = {
610 .command = IOSAV_MRS,
612 .subseq_ctrl = {
613 .cmd_executions = 1,
614 .cmd_delay_gap = 4,
615 .post_ssq_wait = 4,
616 .data_direction = SSQ_NA,
618 .sp_cmd_addr = {
619 .address = val,
620 .rowbits = 6,
621 .bank = reg,
622 .rank = slotrank,
625 /* DRAM command MRS */
626 [1] = {
627 .sp_cmd_ctrl = {
628 .command = IOSAV_MRS,
629 .ranksel_ap = 1,
631 .subseq_ctrl = {
632 .cmd_executions = 1,
633 .cmd_delay_gap = 4,
634 .post_ssq_wait = 4,
635 .data_direction = SSQ_NA,
637 .sp_cmd_addr = {
638 .address = val,
639 .rowbits = 6,
640 .bank = reg,
641 .rank = slotrank,
644 /* DRAM command MRS */
645 [2] = {
646 .sp_cmd_ctrl = {
647 .command = IOSAV_MRS,
649 .subseq_ctrl = {
650 .cmd_executions = 1,
651 .cmd_delay_gap = 4,
652 .post_ssq_wait = ctrl->tMOD,
653 .data_direction = SSQ_NA,
655 .sp_cmd_addr = {
656 .address = val,
657 .rowbits = 6,
658 .bank = reg,
659 .rank = slotrank,
663 iosav_write_sequence(channel, sequence, ARRAY_SIZE(sequence));
665 iosav_run_once_and_wait(channel);
668 /* Obtain optimal power down mode for current configuration */
669 static enum power_down_mode get_power_down_mode(ramctr_timing *ctrl, int channel)
671 int slotrank;
673 if (ctrl->tXP > 8)
674 return PDM_NONE;
676 if (ctrl->tXPDLL > 32)
677 return PDM_PPD;
679 FOR_ALL_POPULATED_RANKS
680 if (!ctrl->info.dimm[channel][slotrank >> 1].flags.dll_off_mode)
681 return PDM_APD_PPD;
683 if (CONFIG(RAMINIT_ALWAYS_ALLOW_DLL_OFF) || get_platform_type() == PLATFORM_MOBILE)
684 return PDM_DLL_OFF;
686 return PDM_APD_PPD;
689 static u32 make_mr0(ramctr_timing *ctrl, int channel, u8 rank)
691 u16 mr0reg, mch_cas, mch_wr;
692 static const u8 mch_wr_t[12] = { 1, 2, 3, 4, 0, 5, 0, 6, 0, 7, 0, 0 };
694 const enum power_down_mode power_down = get_power_down_mode(ctrl, channel);
696 const bool slow_exit = power_down == PDM_DLL_OFF || power_down == PDM_APD_DLL_OFF;
698 /* Convert CAS to MCH register friendly */
699 if (ctrl->CAS < 12) {
700 mch_cas = (u16)((ctrl->CAS - 4) << 1);
701 } else {
702 mch_cas = (u16)(ctrl->CAS - 12);
703 mch_cas = ((mch_cas << 1) | 0x1);
706 /* Convert tWR to MCH register friendly */
707 mch_wr = mch_wr_t[ctrl->tWR - 5];
709 /* DLL Reset - self clearing - set after CLK frequency has been changed */
710 mr0reg = 1 << 8;
712 mr0reg |= (mch_cas & 0x1) << 2;
713 mr0reg |= (mch_cas & 0xe) << 3;
714 mr0reg |= mch_wr << 9;
716 /* Precharge PD - Use slow exit when DLL-off is used - mostly power-saving feature */
717 mr0reg |= !slow_exit << 12;
718 return mr0reg;
721 static void dram_mr0(ramctr_timing *ctrl, u8 rank, int channel)
723 write_mrreg(ctrl, channel, rank, 0, make_mr0(ctrl, channel, rank));
726 static odtmap get_ODT(ramctr_timing *ctrl, int channel)
728 /* Get ODT based on rankmap */
729 int dimms_per_ch = (ctrl->rankmap[channel] & 1) + ((ctrl->rankmap[channel] >> 2) & 1);
731 if (dimms_per_ch == 1) {
732 return (const odtmap){60, 60};
733 } else {
734 return (const odtmap){120, 30};
738 static u32 encode_odt(u32 odt)
740 switch (odt) {
741 case 30:
742 return (1 << 9) | (1 << 2); /* RZQ/8, RZQ/4 */
743 case 60:
744 return (1 << 2); /* RZQ/4 */
745 case 120:
746 return (1 << 6); /* RZQ/2 */
747 default:
748 case 0:
749 return 0;
753 static u32 make_mr1(ramctr_timing *ctrl, u8 rank, int channel)
755 odtmap odt;
756 u32 mr1reg;
758 odt = get_ODT(ctrl, channel);
759 mr1reg = 2;
761 mr1reg |= encode_odt(odt.rttnom);
763 return mr1reg;
766 static void dram_mr1(ramctr_timing *ctrl, u8 rank, int channel)
768 u16 mr1reg;
770 mr1reg = make_mr1(ctrl, rank, channel);
772 write_mrreg(ctrl, channel, rank, 1, mr1reg);
775 static void dram_mr2(ramctr_timing *ctrl, u8 rank, int channel)
777 const u16 pasr = 0;
778 const u16 cwl = ctrl->CWL - 5;
779 const odtmap odt = get_ODT(ctrl, channel);
781 int srt = 0;
782 if (IS_IVY_CPU(ctrl->cpu) && ctrl->tCK >= TCK_1066MHZ)
783 srt = ctrl->extended_temperature_range && !ctrl->auto_self_refresh;
785 u16 mr2reg = 0;
786 mr2reg |= pasr;
787 mr2reg |= cwl << 3;
788 mr2reg |= ctrl->auto_self_refresh << 6;
789 mr2reg |= srt << 7;
790 mr2reg |= (odt.rttwr / 60) << 9;
792 write_mrreg(ctrl, channel, rank, 2, mr2reg);
794 /* Program MR2 shadow */
795 u32 reg32 = mchbar_read32(TC_MR2_SHADOW_ch(channel));
797 reg32 &= 3 << 14 | 3 << 6;
799 reg32 |= mr2reg & ~(3 << 6);
801 if (srt)
802 reg32 |= 1 << (rank / 2 + 6);
804 if (ctrl->rank_mirror[channel][rank])
805 reg32 |= 1 << (rank / 2 + 14);
807 mchbar_write32(TC_MR2_SHADOW_ch(channel), reg32);
810 static void dram_mr3(ramctr_timing *ctrl, u8 rank, int channel)
812 write_mrreg(ctrl, channel, rank, 3, 0);
815 void dram_mrscommands(ramctr_timing *ctrl)
817 u8 slotrank;
818 int channel;
820 FOR_ALL_POPULATED_CHANNELS {
821 FOR_ALL_POPULATED_RANKS {
822 /* MR2 */
823 dram_mr2(ctrl, slotrank, channel);
825 /* MR3 */
826 dram_mr3(ctrl, slotrank, channel);
828 /* MR1 */
829 dram_mr1(ctrl, slotrank, channel);
831 /* MR0 */
832 dram_mr0(ctrl, slotrank, channel);
836 const struct iosav_ssq zqcl_sequence[] = {
837 /* DRAM command NOP (without ODT nor chip selects) */
838 [0] = {
839 .sp_cmd_ctrl = {
840 .command = IOSAV_NOP & ~(0xff << 8),
842 .subseq_ctrl = {
843 .cmd_executions = 1,
844 .cmd_delay_gap = 4,
845 .post_ssq_wait = 15,
846 .data_direction = SSQ_NA,
848 .sp_cmd_addr = {
849 .address = 2,
850 .rowbits = 6,
851 .bank = 0,
852 .rank = 0,
855 /* DRAM command ZQCL */
856 [1] = {
857 .sp_cmd_ctrl = {
858 .command = IOSAV_ZQCS,
859 .ranksel_ap = 1,
861 .subseq_ctrl = {
862 .cmd_executions = 1,
863 .cmd_delay_gap = 4,
864 .post_ssq_wait = 400,
865 .data_direction = SSQ_NA,
867 .sp_cmd_addr = {
868 .address = 1 << 10,
869 .rowbits = 6,
870 .bank = 0,
871 .rank = 0,
873 .addr_update = {
874 .inc_rank = 1,
875 .addr_wrap = 20,
879 iosav_write_sequence(BROADCAST_CH, zqcl_sequence, ARRAY_SIZE(zqcl_sequence));
881 iosav_run_queue(BROADCAST_CH, 4, 0);
883 FOR_ALL_CHANNELS {
884 wait_for_iosav(channel);
887 /* Refresh enable */
888 mchbar_setbits32(MC_INIT_STATE_G, 1 << 3);
890 FOR_ALL_POPULATED_CHANNELS {
891 mchbar_clrbits32(SCHED_CBIT_ch(channel), 1 << 21);
893 wait_for_iosav(channel);
895 slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2;
897 wait_for_iosav(channel);
899 iosav_write_zqcs_sequence(channel, slotrank, 4, 101, 31);
901 iosav_run_once_and_wait(channel);
905 static const u32 lane_base[] = {
906 LANEBASE_B0, LANEBASE_B1, LANEBASE_B2, LANEBASE_B3,
907 LANEBASE_B4, LANEBASE_B5, LANEBASE_B6, LANEBASE_B7,
908 LANEBASE_ECC
911 /* Maximum delay for command, control, clock */
912 #define CCC_MAX_PI (2 * QCLK_PI - 1)
914 void program_timings(ramctr_timing *ctrl, int channel)
916 u32 reg_roundtrip_latency, reg_io_latency;
917 int lane;
918 int slotrank, slot;
920 u32 ctl_delay[NUM_SLOTS] = { 0 };
921 int cmd_delay = 0;
923 /* Enable CLK XOVER */
924 u32 clk_pi_coding = get_XOVER_CLK(ctrl->rankmap[channel]);
925 u32 clk_logic_dly = 0;
928 * Compute command timing as abs() of the most negative PI code
929 * across all ranks. Use zero if none of the values is negative.
931 FOR_ALL_POPULATED_RANKS {
932 cmd_delay = MAX(cmd_delay, -ctrl->timings[channel][slotrank].pi_coding);
934 if (cmd_delay > CCC_MAX_PI) {
935 printk(BIOS_ERR, "C%d command delay overflow: %d\n", channel, cmd_delay);
936 cmd_delay = CCC_MAX_PI;
939 for (slot = 0; slot < NUM_SLOTS; slot++) {
940 const int pi_coding_0 = ctrl->timings[channel][2 * slot + 0].pi_coding;
941 const int pi_coding_1 = ctrl->timings[channel][2 * slot + 1].pi_coding;
943 const u8 slot_map = (ctrl->rankmap[channel] >> (2 * slot)) & 3;
945 if (slot_map & 1)
946 ctl_delay[slot] += pi_coding_0 + cmd_delay;
948 if (slot_map & 2)
949 ctl_delay[slot] += pi_coding_1 + cmd_delay;
951 /* If both ranks in a slot are populated, use the average */
952 if (slot_map == 3)
953 ctl_delay[slot] /= 2;
955 if (ctl_delay[slot] > CCC_MAX_PI) {
956 printk(BIOS_ERR, "C%dS%d control delay overflow: %d\n",
957 channel, slot, ctl_delay[slot]);
958 ctl_delay[slot] = CCC_MAX_PI;
961 FOR_ALL_POPULATED_RANKS {
962 int clk_delay = ctrl->timings[channel][slotrank].pi_coding + cmd_delay;
965 * Clock is a differential signal, whereas command and control are not.
966 * This affects its timing, and it is also why it needs a magic offset.
968 clk_delay += ctrl->pi_code_offset;
970 /* Can never happen with valid values */
971 if (clk_delay < 0) {
972 printk(BIOS_ERR, "C%dR%d clock delay underflow: %d\n",
973 channel, slotrank, clk_delay);
974 clk_delay = 0;
977 /* Clock can safely wrap around because it is a periodic signal */
978 clk_delay %= CCC_MAX_PI + 1;
980 clk_pi_coding |= (clk_delay % QCLK_PI) << (6 * slotrank);
981 clk_logic_dly |= (clk_delay / QCLK_PI) << slotrank;
984 /* Enable CMD XOVER */
985 union gdcr_cmd_pi_coding_reg cmd_pi_coding = {
986 .raw = get_XOVER_CMD(ctrl->rankmap[channel]),
988 cmd_pi_coding.cmd_pi_code = cmd_delay % QCLK_PI;
989 cmd_pi_coding.cmd_logic_delay = cmd_delay / QCLK_PI;
991 cmd_pi_coding.ctl_pi_code_d0 = ctl_delay[0] % QCLK_PI;
992 cmd_pi_coding.ctl_pi_code_d1 = ctl_delay[1] % QCLK_PI;
993 cmd_pi_coding.ctl_logic_delay_d0 = ctl_delay[0] / QCLK_PI;
994 cmd_pi_coding.ctl_logic_delay_d1 = ctl_delay[1] / QCLK_PI;
996 mchbar_write32(GDCRCMDPICODING_ch(channel), cmd_pi_coding.raw);
998 mchbar_write32(GDCRCKPICODE_ch(channel), clk_pi_coding);
999 mchbar_write32(GDCRCKLOGICDELAY_ch(channel), clk_logic_dly);
1001 reg_io_latency = mchbar_read32(SC_IO_LATENCY_ch(channel));
1002 reg_io_latency &= ~0xffff;
1004 reg_roundtrip_latency = 0;
1006 FOR_ALL_POPULATED_RANKS {
1007 reg_io_latency |= ctrl->timings[channel][slotrank].io_latency << (4 * slotrank);
1009 reg_roundtrip_latency |=
1010 ctrl->timings[channel][slotrank].roundtrip_latency << (8 * slotrank);
1012 FOR_ALL_LANES {
1013 const u16 rcven = ctrl->timings[channel][slotrank].lanes[lane].rcven;
1014 const u8 dqs_p = ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p;
1015 const u8 dqs_n = ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n;
1016 const union gdcr_rx_reg gdcr_rx = {
1017 .rcven_pi_code = rcven % QCLK_PI,
1018 .rx_dqs_p_pi_code = dqs_p,
1019 .rcven_logic_delay = rcven / QCLK_PI,
1020 .rx_dqs_n_pi_code = dqs_n,
1022 mchbar_write32(lane_base[lane] + GDCRRX(channel, slotrank),
1023 gdcr_rx.raw);
1025 const u16 tx_dqs = ctrl->timings[channel][slotrank].lanes[lane].tx_dqs;
1026 const int tx_dq = ctrl->timings[channel][slotrank].lanes[lane].tx_dq;
1027 const union gdcr_tx_reg gdcr_tx = {
1028 .tx_dq_pi_code = tx_dq % QCLK_PI,
1029 .tx_dqs_pi_code = tx_dqs % QCLK_PI,
1030 .tx_dqs_logic_delay = tx_dqs / QCLK_PI,
1031 .tx_dq_logic_delay = tx_dq / QCLK_PI,
1033 mchbar_write32(lane_base[lane] + GDCRTX(channel, slotrank),
1034 gdcr_tx.raw);
1037 mchbar_write32(SC_ROUNDT_LAT_ch(channel), reg_roundtrip_latency);
1038 mchbar_write32(SC_IO_LATENCY_ch(channel), reg_io_latency);
1041 static void test_rcven(ramctr_timing *ctrl, int channel, int slotrank)
1043 wait_for_iosav(channel);
1045 /* Send a burst of 16 back-to-back read commands (4 DCLK apart) */
1046 iosav_write_read_mpr_sequence(channel, slotrank, ctrl->tMOD, 1, 3, 15, ctrl->CAS + 36);
1048 iosav_run_once_and_wait(channel);
1051 static int does_lane_work(ramctr_timing *ctrl, int channel, int slotrank, int lane)
1053 u32 rcven = ctrl->timings[channel][slotrank].lanes[lane].rcven;
1055 return (mchbar_read32(lane_base[lane] +
1056 GDCRTRAININGRESULT(channel, (rcven / 32) & 1)) >> (rcven % 32)) & 1;
1059 struct run {
1060 int middle;
1061 int end;
1062 int start;
1063 int all;
1064 int length;
1067 static struct run get_longest_zero_run(int *seq, int sz)
1069 int i, ls;
1070 int bl = 0, bs = 0;
1071 struct run ret;
1073 ls = 0;
1074 for (i = 0; i < 2 * sz; i++)
1075 if (seq[i % sz]) {
1076 if (i - ls > bl) {
1077 bl = i - ls;
1078 bs = ls;
1080 ls = i + 1;
1082 if (bl == 0) {
1083 ret.middle = sz / 2;
1084 ret.start = 0;
1085 ret.end = sz;
1086 ret.length = sz;
1087 ret.all = 1;
1088 return ret;
1091 ret.start = bs % sz;
1092 ret.end = (bs + bl - 1) % sz;
1093 ret.middle = (bs + (bl - 1) / 2) % sz;
1094 ret.length = bl;
1095 ret.all = 0;
1097 return ret;
1100 #define RCVEN_COARSE_PI_LENGTH (2 * QCLK_PI)
1102 static void find_rcven_pi_coarse(ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
1104 int rcven;
1105 int statistics[NUM_LANES][RCVEN_COARSE_PI_LENGTH];
1106 int lane;
1108 for (rcven = 0; rcven < RCVEN_COARSE_PI_LENGTH; rcven++) {
1109 FOR_ALL_LANES {
1110 ctrl->timings[channel][slotrank].lanes[lane].rcven = rcven;
1112 program_timings(ctrl, channel);
1114 test_rcven(ctrl, channel, slotrank);
1116 FOR_ALL_LANES {
1117 statistics[lane][rcven] =
1118 !does_lane_work(ctrl, channel, slotrank, lane);
1121 FOR_ALL_LANES {
1122 struct run rn = get_longest_zero_run(statistics[lane], RCVEN_COARSE_PI_LENGTH);
1123 ctrl->timings[channel][slotrank].lanes[lane].rcven = rn.middle;
1124 upperA[lane] = rn.end;
1125 if (upperA[lane] < rn.middle)
1126 upperA[lane] += 2 * QCLK_PI;
1128 printram("rcven: %d, %d, %d: % 4d-% 4d-% 4d\n",
1129 channel, slotrank, lane, rn.start, rn.middle, rn.end);
1133 static void fine_tune_rcven_pi(ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
1135 int rcven_delta;
1136 int statistics[NUM_LANES][51] = {0};
1137 int lane, i;
1139 for (rcven_delta = -25; rcven_delta <= 25; rcven_delta++) {
1140 FOR_ALL_LANES {
1141 ctrl->timings[channel][slotrank].lanes[lane].rcven
1142 = upperA[lane] + rcven_delta + QCLK_PI;
1144 program_timings(ctrl, channel);
1146 for (i = 0; i < 100; i++) {
1147 test_rcven(ctrl, channel, slotrank);
1148 FOR_ALL_LANES {
1149 statistics[lane][rcven_delta + 25] +=
1150 does_lane_work(ctrl, channel, slotrank, lane);
1154 FOR_ALL_LANES {
1155 int last_zero, first_all;
1157 for (last_zero = -25; last_zero <= 25; last_zero++)
1158 if (statistics[lane][last_zero + 25])
1159 break;
1161 last_zero--;
1162 for (first_all = -25; first_all <= 25; first_all++)
1163 if (statistics[lane][first_all + 25] == 100)
1164 break;
1166 printram("lane %d: %d, %d\n", lane, last_zero, first_all);
1168 ctrl->timings[channel][slotrank].lanes[lane].rcven =
1169 (last_zero + first_all) / 2 + upperA[lane];
1171 printram("Aval: %d, %d, %d: % 4d\n", channel, slotrank,
1172 lane, ctrl->timings[channel][slotrank].lanes[lane].rcven);
1177 * Once the DQS high phase has been found (for each DRAM) the next stage
1178 * is to find out the round trip latency, by locating the preamble cycle.
1179 * This is achieved by trying smaller and smaller roundtrip values until
1180 * the strobe sampling is done on the preamble cycle.
1182 static int find_roundtrip_latency(ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
1184 int works[NUM_LANES];
1185 int lane;
1187 while (1) {
1188 int all_works = 1, some_works = 0;
1190 program_timings(ctrl, channel);
1191 test_rcven(ctrl, channel, slotrank);
1193 FOR_ALL_LANES {
1194 works[lane] = !does_lane_work(ctrl, channel, slotrank, lane);
1196 if (works[lane])
1197 some_works = 1;
1198 else
1199 all_works = 0;
1202 /* If every lane is working, exit */
1203 if (all_works)
1204 return 0;
1207 * If all bits are one (everyone is failing), decrement
1208 * the roundtrip value by two, and do another iteration.
1210 if (!some_works) {
1211 /* Guard against roundtrip latency underflow */
1212 if (ctrl->timings[channel][slotrank].roundtrip_latency < 2) {
1213 printk(BIOS_EMERG, "Roundtrip latency underflow: %d, %d\n",
1214 channel, slotrank);
1215 return MAKE_ERR;
1217 ctrl->timings[channel][slotrank].roundtrip_latency -= 2;
1218 printram("4024 -= 2;\n");
1219 continue;
1223 * Else (if some lanes are failing), increase the rank's
1224 * I/O latency by 2, and increase rcven logic delay by 2
1225 * on the working lanes, then perform another iteration.
1227 ctrl->timings[channel][slotrank].io_latency += 2;
1228 printram("4028 += 2;\n");
1230 /* Guard against I/O latency overflow */
1231 if (ctrl->timings[channel][slotrank].io_latency >= 16) {
1232 printk(BIOS_EMERG, "I/O latency overflow: %d, %d\n",
1233 channel, slotrank);
1234 return MAKE_ERR;
1236 FOR_ALL_LANES if (works[lane]) {
1237 ctrl->timings[channel][slotrank].lanes[lane].rcven += 2 * QCLK_PI;
1238 upperA[lane] += 2 * QCLK_PI;
1239 printram("increment %d, %d, %d\n", channel, slotrank, lane);
1242 return 0;
1245 static int get_logic_delay_delta(ramctr_timing *ctrl, int channel, int slotrank)
1247 int lane;
1248 u16 logic_delay_min = 7;
1249 u16 logic_delay_max = 0;
1251 FOR_ALL_LANES {
1252 const u16 logic_delay = ctrl->timings[channel][slotrank].lanes[lane].rcven >> 6;
1254 logic_delay_min = MIN(logic_delay_min, logic_delay);
1255 logic_delay_max = MAX(logic_delay_max, logic_delay);
1258 if (logic_delay_max < logic_delay_min) {
1259 printk(BIOS_EMERG, "Logic delay max < min (%u < %u): %d, %d\n",
1260 logic_delay_max, logic_delay_min, channel, slotrank);
1263 assert(logic_delay_max >= logic_delay_min);
1265 return logic_delay_max - logic_delay_min;
1268 static int align_rt_io_latency(ramctr_timing *ctrl, int channel, int slotrank, int prev)
1270 int latency_offset = 0;
1272 /* Get changed maxima */
1273 const int post = get_logic_delay_delta(ctrl, channel, slotrank);
1275 if (prev < post)
1276 latency_offset = +1;
1278 else if (prev > post)
1279 latency_offset = -1;
1281 else
1282 latency_offset = 0;
1284 ctrl->timings[channel][slotrank].io_latency += latency_offset;
1285 ctrl->timings[channel][slotrank].roundtrip_latency += latency_offset;
1286 printram("4024 += %d;\n", latency_offset);
1287 printram("4028 += %d;\n", latency_offset);
1289 return post;
1292 static void compute_final_logic_delay(ramctr_timing *ctrl, int channel, int slotrank)
1294 u16 logic_delay_min = 7;
1295 int lane;
1297 FOR_ALL_LANES {
1298 const u16 logic_delay = ctrl->timings[channel][slotrank].lanes[lane].rcven >> 6;
1300 logic_delay_min = MIN(logic_delay_min, logic_delay);
1303 if (logic_delay_min >= 2) {
1304 printk(BIOS_WARNING, "Logic delay %u greater than 1: %d %d\n",
1305 logic_delay_min, channel, slotrank);
1308 FOR_ALL_LANES {
1309 ctrl->timings[channel][slotrank].lanes[lane].rcven -= logic_delay_min << 6;
1311 ctrl->timings[channel][slotrank].io_latency -= logic_delay_min;
1312 printram("4028 -= %d;\n", logic_delay_min);
1315 int receive_enable_calibration(ramctr_timing *ctrl)
1317 int channel, slotrank, lane;
1318 int err;
1320 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
1321 int all_high, some_high;
1322 int upperA[NUM_LANES];
1323 int prev;
1325 wait_for_iosav(channel);
1327 iosav_write_prea_sequence(channel, slotrank, ctrl->tRP, 0);
1329 iosav_run_once_and_wait(channel);
1331 const union gdcr_training_mod_reg training_mod = {
1332 .receive_enable_mode = 1,
1333 .training_rank_sel = slotrank,
1334 .odt_always_on = 1,
1336 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1338 ctrl->timings[channel][slotrank].io_latency = 4;
1339 ctrl->timings[channel][slotrank].roundtrip_latency = 55;
1340 program_timings(ctrl, channel);
1342 find_rcven_pi_coarse(ctrl, channel, slotrank, upperA);
1344 all_high = 1;
1345 some_high = 0;
1346 FOR_ALL_LANES {
1347 if (ctrl->timings[channel][slotrank].lanes[lane].rcven >= QCLK_PI)
1348 some_high = 1;
1349 else
1350 all_high = 0;
1353 if (all_high) {
1354 ctrl->timings[channel][slotrank].io_latency--;
1355 printram("4028--;\n");
1356 FOR_ALL_LANES {
1357 ctrl->timings[channel][slotrank].lanes[lane].rcven -= QCLK_PI;
1358 upperA[lane] -= QCLK_PI;
1360 } else if (some_high) {
1361 ctrl->timings[channel][slotrank].roundtrip_latency++;
1362 ctrl->timings[channel][slotrank].io_latency++;
1363 printram("4024++;\n");
1364 printram("4028++;\n");
1367 program_timings(ctrl, channel);
1369 prev = get_logic_delay_delta(ctrl, channel, slotrank);
1371 err = find_roundtrip_latency(ctrl, channel, slotrank, upperA);
1372 if (err)
1373 return err;
1375 prev = align_rt_io_latency(ctrl, channel, slotrank, prev);
1377 fine_tune_rcven_pi(ctrl, channel, slotrank, upperA);
1379 prev = align_rt_io_latency(ctrl, channel, slotrank, prev);
1381 compute_final_logic_delay(ctrl, channel, slotrank);
1383 align_rt_io_latency(ctrl, channel, slotrank, prev);
1385 printram("4/8: %d, %d, % 4d, % 4d\n", channel, slotrank,
1386 ctrl->timings[channel][slotrank].roundtrip_latency,
1387 ctrl->timings[channel][slotrank].io_latency);
1389 printram("final results:\n");
1390 FOR_ALL_LANES
1391 printram("Aval: %d, %d, %d: % 4d\n", channel, slotrank, lane,
1392 ctrl->timings[channel][slotrank].lanes[lane].rcven);
1394 mchbar_write32(GDCRTRAININGMOD, 0);
1396 toggle_io_reset();
1399 FOR_ALL_POPULATED_CHANNELS {
1400 program_timings(ctrl, channel);
1403 return 0;
1406 static void test_tx_dq(ramctr_timing *ctrl, int channel, int slotrank)
1408 int lane;
1410 FOR_ALL_LANES {
1411 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel, lane), 0);
1412 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
1415 wait_for_iosav(channel);
1417 iosav_write_misc_write_sequence(ctrl, channel, slotrank,
1418 MAX(ctrl->tRRD, (ctrl->tFAW >> 2) + 1), 4, 4, 500, 18);
1420 iosav_run_once_and_wait(channel);
1422 iosav_write_prea_act_read_sequence(ctrl, channel, slotrank);
1424 iosav_run_once_and_wait(channel);
1427 static void tx_dq_threshold_process(int *data, const int count)
1429 int min = data[0];
1430 int max = min;
1431 int i;
1432 for (i = 1; i < count; i++) {
1433 if (min > data[i])
1434 min = data[i];
1436 if (max < data[i])
1437 max = data[i];
1439 int threshold = min / 2 + max / 2;
1440 for (i = 0; i < count; i++)
1441 data[i] = data[i] > threshold;
1443 printram("threshold=%d min=%d max=%d\n", threshold, min, max);
1446 static int tx_dq_write_leveling(ramctr_timing *ctrl, int channel, int slotrank)
1448 int tx_dq;
1449 int stats[NUM_LANES][MAX_TX_DQ + 1];
1450 int lane;
1452 wait_for_iosav(channel);
1454 iosav_write_prea_sequence(channel, slotrank, ctrl->tRP, 18);
1456 iosav_run_once_and_wait(channel);
1458 for (tx_dq = 0; tx_dq <= MAX_TX_DQ; tx_dq++) {
1459 FOR_ALL_LANES ctrl->timings[channel][slotrank].lanes[lane].tx_dq = tx_dq;
1460 program_timings(ctrl, channel);
1462 test_tx_dq(ctrl, channel, slotrank);
1464 FOR_ALL_LANES {
1465 stats[lane][tx_dq] = mchbar_read32(
1466 IOSAV_By_ERROR_COUNT_ch(channel, lane));
1469 FOR_ALL_LANES {
1470 struct run rn = get_longest_zero_run(stats[lane], ARRAY_SIZE(stats[lane]));
1472 if (rn.all || rn.length < 8) {
1473 printk(BIOS_EMERG, "tx_dq write leveling failed: %d, %d, %d\n",
1474 channel, slotrank, lane);
1476 * With command training not being done yet, the lane can be erroneous.
1477 * Take the average as reference and try again to find a run.
1479 tx_dq_threshold_process(stats[lane], ARRAY_SIZE(stats[lane]));
1480 rn = get_longest_zero_run(stats[lane], ARRAY_SIZE(stats[lane]));
1482 if (rn.all || rn.length < 8) {
1483 printk(BIOS_EMERG, "tx_dq recovery failed\n");
1484 return MAKE_ERR;
1487 ctrl->timings[channel][slotrank].lanes[lane].tx_dq = rn.middle;
1488 printram("tx_dq: %d, %d, %d: % 4d-% 4d-% 4d\n",
1489 channel, slotrank, lane, rn.start, rn.middle, rn.end);
1491 return 0;
1494 static int get_precedening_channels(ramctr_timing *ctrl, int target_channel)
1496 int channel, ret = 0;
1498 FOR_ALL_POPULATED_CHANNELS if (channel < target_channel)
1499 ret++;
1501 return ret;
1504 /* Each cacheline is 64 bits long */
1505 static void program_wdb_pattern_length(int channel, const unsigned int num_cachelines)
1507 mchbar_write8(IOSAV_DATA_CTL_ch(channel), num_cachelines / 8 - 1);
1510 static void fill_pattern0(ramctr_timing *ctrl, int channel, u32 a, u32 b)
1512 unsigned int j;
1513 unsigned int channel_offset = get_precedening_channels(ctrl, channel) * 64;
1514 uintptr_t addr;
1516 for (j = 0; j < 16; j++) {
1517 addr = 0x04000000 + channel_offset + 4 * j;
1518 write32p(addr, j & 2 ? b : a);
1521 sfence();
1523 program_wdb_pattern_length(channel, 8);
1526 static int num_of_channels(const ramctr_timing *ctrl)
1528 int ret = 0;
1529 int channel;
1530 FOR_ALL_POPULATED_CHANNELS ret++;
1531 return ret;
1534 static void fill_pattern1(ramctr_timing *ctrl, int channel)
1536 unsigned int j;
1537 unsigned int channel_offset = get_precedening_channels(ctrl, channel) * 64;
1538 unsigned int channel_step = 64 * num_of_channels(ctrl);
1539 uintptr_t addr;
1541 for (j = 0; j < 16; j++) {
1542 addr = 0x04000000 + channel_offset + j * 4;
1543 write32p(addr, 0xffffffff);
1545 for (j = 0; j < 16; j++) {
1546 addr = 0x04000000 + channel_offset + channel_step + j * 4;
1547 write32p(addr, 0);
1549 sfence();
1551 program_wdb_pattern_length(channel, 16);
1554 #define TX_DQS_PI_LENGTH (2 * QCLK_PI)
1556 static int write_level_rank(ramctr_timing *ctrl, int channel, int slotrank)
1558 int tx_dqs;
1559 int statistics[NUM_LANES][TX_DQS_PI_LENGTH];
1560 int lane;
1562 const union gdcr_training_mod_reg training_mod = {
1563 .write_leveling_mode = 1,
1564 .training_rank_sel = slotrank,
1565 .enable_dqs_wl = 5,
1566 .odt_always_on = 1,
1567 .force_drive_enable = 1,
1569 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1571 u32 mr1reg = make_mr1(ctrl, slotrank, channel) | 1 << 7;
1572 int bank = 1;
1574 if (ctrl->rank_mirror[channel][slotrank])
1575 ddr3_mirror_mrreg(&bank, &mr1reg);
1577 wait_for_iosav(channel);
1579 iosav_write_jedec_write_leveling_sequence(ctrl, channel, slotrank, bank, mr1reg);
1581 for (tx_dqs = 0; tx_dqs < TX_DQS_PI_LENGTH; tx_dqs++) {
1582 FOR_ALL_LANES {
1583 ctrl->timings[channel][slotrank].lanes[lane].tx_dqs = tx_dqs;
1585 program_timings(ctrl, channel);
1587 iosav_run_once_and_wait(channel);
1589 FOR_ALL_LANES {
1590 statistics[lane][tx_dqs] = !((mchbar_read32(lane_base[lane] +
1591 GDCRTRAININGRESULT(channel, (tx_dqs / 32) & 1)) >>
1592 (tx_dqs % 32)) & 1);
1595 FOR_ALL_LANES {
1596 struct run rn = get_longest_zero_run(statistics[lane], TX_DQS_PI_LENGTH);
1598 * tx_dq is a direct function of tx_dqs's 6 LSBs. Some tests increment the value
1599 * of tx_dqs by a small value, which might cause the 6-bit value to overflow if
1600 * it's close to 0x3f. Increment the value by a small offset if it's likely
1601 * to overflow, to make sure it won't overflow while running tests and bricks
1602 * the system due to a non matching tx_dq.
1604 * TODO: find out why some tests (edge write discovery) increment tx_dqs.
1606 if ((rn.start & 0x3f) == 0x3e)
1607 rn.start += 2;
1608 else if ((rn.start & 0x3f) == 0x3f)
1609 rn.start += 1;
1611 ctrl->timings[channel][slotrank].lanes[lane].tx_dqs = rn.start;
1612 if (rn.all) {
1613 printk(BIOS_EMERG, "JEDEC write leveling failed: %d, %d, %d\n",
1614 channel, slotrank, lane);
1616 return MAKE_ERR;
1618 printram("tx_dqs: %d, %d, %d: % 4d-% 4d-% 4d\n",
1619 channel, slotrank, lane, rn.start, rn.middle, rn.end);
1621 return 0;
1624 static int get_dqs_flyby_adjust(u64 val)
1626 int i;
1627 /* DQS is good enough */
1628 if (val == 0xffffffffffffffffLL)
1629 return 0;
1630 if (val >= 0xf000000000000000LL) {
1631 /* DQS is late, needs negative adjustment */
1632 for (i = 0; i < 8; i++)
1633 if (val << (8 * (7 - i) + 4))
1634 return -i;
1635 } else {
1636 /* DQS is early, needs positive adjustment */
1637 for (i = 0; i < 8; i++)
1638 if (val >> (8 * (7 - i) + 4))
1639 return i;
1641 return 8;
1644 static void train_write_flyby(ramctr_timing *ctrl)
1646 int channel, slotrank, lane, old;
1648 const union gdcr_training_mod_reg training_mod = {
1649 .dq_dqs_training_res = 1,
1651 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1653 FOR_ALL_POPULATED_CHANNELS {
1654 fill_pattern1(ctrl, channel);
1656 FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS {
1657 /* Reset read and write WDB pointers */
1658 mchbar_write32(IOSAV_DATA_CTL_ch(channel), 0x10001);
1660 wait_for_iosav(channel);
1662 iosav_write_misc_write_sequence(ctrl, channel, slotrank, 3, 1, 3, 3, 31);
1664 iosav_run_once_and_wait(channel);
1666 const struct iosav_ssq rd_sequence[] = {
1667 /* DRAM command PREA */
1668 [0] = {
1669 .sp_cmd_ctrl = {
1670 .command = IOSAV_PRE,
1671 .ranksel_ap = 1,
1673 .subseq_ctrl = {
1674 .cmd_executions = 1,
1675 .cmd_delay_gap = 3,
1676 .post_ssq_wait = ctrl->tRP,
1677 .data_direction = SSQ_NA,
1679 .sp_cmd_addr = {
1680 .address = 1 << 10,
1681 .rowbits = 6,
1682 .bank = 0,
1683 .rank = slotrank,
1685 .addr_update = {
1686 .addr_wrap = 18,
1689 /* DRAM command ACT */
1690 [1] = {
1691 .sp_cmd_ctrl = {
1692 .command = IOSAV_ACT,
1693 .ranksel_ap = 1,
1695 .subseq_ctrl = {
1696 .cmd_executions = 1,
1697 .cmd_delay_gap = 3,
1698 .post_ssq_wait = ctrl->tRCD,
1699 .data_direction = SSQ_NA,
1701 .sp_cmd_addr = {
1702 .address = 0,
1703 .rowbits = 6,
1704 .bank = 0,
1705 .rank = slotrank,
1708 /* DRAM command RDA */
1709 [2] = {
1710 .sp_cmd_ctrl = {
1711 .command = IOSAV_RD,
1712 .ranksel_ap = 3,
1714 .subseq_ctrl = {
1715 .cmd_executions = 1,
1716 .cmd_delay_gap = 3,
1717 .post_ssq_wait = ctrl->tRP +
1718 ctrl->timings[channel][slotrank].roundtrip_latency +
1719 ctrl->timings[channel][slotrank].io_latency,
1720 .data_direction = SSQ_RD,
1722 .sp_cmd_addr = {
1723 .address = 8,
1724 .rowbits = 6,
1725 .bank = 0,
1726 .rank = slotrank,
1730 iosav_write_sequence(channel, rd_sequence, ARRAY_SIZE(rd_sequence));
1732 iosav_run_once_and_wait(channel);
1734 FOR_ALL_LANES {
1735 u64 res = mchbar_read32(lane_base[lane] + GDCRTRAININGRESULT1(channel));
1736 res |= ((u64)mchbar_read32(lane_base[lane] +
1737 GDCRTRAININGRESULT2(channel))) << 32;
1739 old = ctrl->timings[channel][slotrank].lanes[lane].tx_dqs;
1740 ctrl->timings[channel][slotrank].lanes[lane].tx_dqs +=
1741 get_dqs_flyby_adjust(res) * QCLK_PI;
1743 printram("High adjust %d:%016llx\n", lane, res);
1744 printram("Bval+: %d, %d, %d, % 4d -> % 4d\n", channel, slotrank, lane,
1745 old, ctrl->timings[channel][slotrank].lanes[lane].tx_dqs);
1748 mchbar_write32(GDCRTRAININGMOD, 0);
1751 static void disable_refresh_machine(ramctr_timing *ctrl)
1753 int channel;
1755 FOR_ALL_POPULATED_CHANNELS {
1756 /* choose an existing rank */
1757 const int slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0;
1759 iosav_write_zqcs_sequence(channel, slotrank, 4, 4, 31);
1761 iosav_run_once_and_wait(channel);
1763 mchbar_setbits32(SCHED_CBIT_ch(channel), 1 << 21);
1766 /* Refresh disable */
1767 mchbar_clrbits32(MC_INIT_STATE_G, 1 << 3);
1769 FOR_ALL_POPULATED_CHANNELS {
1770 /* Execute the same command queue */
1771 iosav_run_once_and_wait(channel);
1776 * Compensate the skew between CMD/ADDR/CLK and DQ/DQS lanes.
1778 * Since DDR3 uses a fly-by topology, the data and strobes signals reach the chips at different
1779 * times with respect to command, address and clock signals. By delaying either all DQ/DQS or
1780 * all CMD/ADDR/CLK signals, a full phase shift can be introduced. It is assumed that the
1781 * CLK/ADDR/CMD signals have the same routing delay.
1783 * To find the required phase shift the DRAM is placed in "write leveling" mode. In this mode,
1784 * the DRAM-chip samples the CLK on every DQS edge and feeds back the sampled value on the data
1785 * lanes (DQ).
1787 static int jedec_write_leveling(ramctr_timing *ctrl)
1789 int channel, slotrank;
1791 disable_refresh_machine(ctrl);
1793 /* Enable write leveling on all ranks
1794 Disable all DQ outputs
1795 Only NOP is allowed in this mode */
1796 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
1797 write_mrreg(ctrl, channel, slotrank, 1,
1798 make_mr1(ctrl, slotrank, channel) | 1 << 12 | 1 << 7);
1800 /* Needs to be programmed before I/O reset below */
1801 const union gdcr_training_mod_reg training_mod = {
1802 .write_leveling_mode = 1,
1803 .enable_dqs_wl = 5,
1804 .odt_always_on = 1,
1805 .force_drive_enable = 1,
1807 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1809 toggle_io_reset();
1811 /* Set any valid value for tx_dqs, it gets corrected later */
1812 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
1813 const int err = write_level_rank(ctrl, channel, slotrank);
1814 if (err)
1815 return err;
1818 /* Disable write leveling on all ranks */
1819 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
1820 write_mrreg(ctrl, channel, slotrank, 1, make_mr1(ctrl, slotrank, channel));
1822 mchbar_write32(GDCRTRAININGMOD, 0);
1824 FOR_ALL_POPULATED_CHANNELS
1825 wait_for_iosav(channel);
1827 /* Refresh enable */
1828 mchbar_setbits32(MC_INIT_STATE_G, 1 << 3);
1830 FOR_ALL_POPULATED_CHANNELS {
1831 mchbar_clrbits32(SCHED_CBIT_ch(channel), 1 << 21);
1832 mchbar_read32(IOSAV_STATUS_ch(channel));
1833 wait_for_iosav(channel);
1835 iosav_write_zqcs_sequence(channel, 0, 4, 101, 31);
1837 iosav_run_once_and_wait(channel);
1840 toggle_io_reset();
1842 return 0;
1845 int write_training(ramctr_timing *ctrl)
1847 int channel, slotrank;
1848 int err;
1851 * Set the DEC_WRD bit, required for the write flyby algorithm.
1852 * Needs to be done before starting the write training procedure.
1854 FOR_ALL_POPULATED_CHANNELS
1855 mchbar_setbits32(TC_RWP_ch(channel), 1 << 27);
1857 printram("CPE\n");
1859 err = jedec_write_leveling(ctrl);
1860 if (err)
1861 return err;
1863 printram("CPF\n");
1865 FOR_ALL_POPULATED_CHANNELS {
1866 fill_pattern0(ctrl, channel, 0xaaaaaaaa, 0x55555555);
1869 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
1870 err = tx_dq_write_leveling(ctrl, channel, slotrank);
1871 if (err)
1872 return err;
1875 FOR_ALL_POPULATED_CHANNELS
1876 program_timings(ctrl, channel);
1878 /* measure and adjust tx_dqs timings */
1879 train_write_flyby(ctrl);
1881 FOR_ALL_POPULATED_CHANNELS
1882 program_timings(ctrl, channel);
1884 return 0;
1887 static int test_command_training(ramctr_timing *ctrl, int channel, int slotrank)
1889 struct ram_rank_timings saved_rt = ctrl->timings[channel][slotrank];
1890 int tx_dq_delta;
1891 int lanes_ok = 0;
1892 int ctr = 0;
1893 int lane;
1895 for (tx_dq_delta = -5; tx_dq_delta <= 5; tx_dq_delta++) {
1896 FOR_ALL_LANES {
1897 ctrl->timings[channel][slotrank].lanes[lane].tx_dq =
1898 saved_rt.lanes[lane].tx_dq + tx_dq_delta;
1900 program_timings(ctrl, channel);
1901 FOR_ALL_LANES {
1902 mchbar_write32(IOSAV_By_ERROR_COUNT(lane), 0);
1905 /* Reset read WDB pointer */
1906 mchbar_write32(IOSAV_DATA_CTL_ch(channel), 0x1f);
1908 wait_for_iosav(channel);
1910 iosav_write_command_training_sequence(ctrl, channel, slotrank, ctr);
1912 /* Program LFSR for the RD/WR subsequences */
1913 mchbar_write32(IOSAV_n_ADDRESS_LFSR_ch(channel, 1), 0x389abcd);
1914 mchbar_write32(IOSAV_n_ADDRESS_LFSR_ch(channel, 2), 0x389abcd);
1916 iosav_run_once_and_wait(channel);
1918 FOR_ALL_LANES {
1919 u32 r32 = mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel, lane));
1921 if (r32 == 0)
1922 lanes_ok |= 1 << lane;
1924 ctr++;
1925 if (lanes_ok == ((1 << ctrl->lanes) - 1))
1926 break;
1929 ctrl->timings[channel][slotrank] = saved_rt;
1931 return lanes_ok != ((1 << ctrl->lanes) - 1);
1934 static void fill_pattern5(ramctr_timing *ctrl, int channel, int patno)
1936 unsigned int i, j;
1937 unsigned int offset = get_precedening_channels(ctrl, channel) * 64;
1938 unsigned int step = 64 * num_of_channels(ctrl);
1939 uintptr_t addr;
1941 if (patno) {
1942 u8 base8 = 0x80 >> ((patno - 1) % 8);
1943 u32 base = base8 | (base8 << 8) | (base8 << 16) | (base8 << 24);
1944 for (i = 0; i < 32; i++) {
1945 for (j = 0; j < 16; j++) {
1946 u32 val = use_base[patno - 1][i] & (1 << (j / 2)) ? base : 0;
1948 if (invert[patno - 1][i] & (1 << (j / 2)))
1949 val = ~val;
1951 addr = (1 << 26) + offset + i * step + j * 4;
1952 write32p(addr, val);
1955 } else {
1956 for (i = 0; i < ARRAY_SIZE(pattern); i++) {
1957 for (j = 0; j < 16; j++) {
1958 const u32 val = pattern[i][j];
1959 addr = (1 << 26) + offset + i * step + j * 4;
1960 write32p(addr, val);
1963 sfence();
1966 program_wdb_pattern_length(channel, 256);
1969 static void reprogram_320c(ramctr_timing *ctrl)
1971 disable_refresh_machine(ctrl);
1973 /* JEDEC reset */
1974 dram_jedecreset(ctrl);
1976 /* MRS commands */
1977 dram_mrscommands(ctrl);
1979 toggle_io_reset();
1982 #define CT_MIN_PI (-CCC_MAX_PI)
1983 #define CT_MAX_PI (+CCC_MAX_PI + 1)
1984 #define CT_PI_LENGTH (CT_MAX_PI - CT_MIN_PI + 1)
1986 #define MIN_C320C_LEN 13
1988 static int try_cmd_stretch(ramctr_timing *ctrl, int channel, int cmd_stretch)
1990 struct ram_rank_timings saved_timings[NUM_CHANNELS][NUM_SLOTRANKS];
1991 int slotrank;
1992 int command_pi;
1993 int stat[NUM_SLOTRANKS][CT_PI_LENGTH];
1994 int delta = 0;
1996 printram("Trying cmd_stretch %d on channel %d\n", cmd_stretch, channel);
1998 FOR_ALL_POPULATED_RANKS {
1999 saved_timings[channel][slotrank] = ctrl->timings[channel][slotrank];
2002 ctrl->cmd_stretch[channel] = cmd_stretch;
2004 const union tc_rap_reg tc_rap = {
2005 .tRRD = ctrl->tRRD,
2006 .tRTP = ctrl->tRTP,
2007 .tCKE = ctrl->tCKE,
2008 .tWTR = ctrl->tWTR,
2009 .tFAW = ctrl->tFAW,
2010 .tWR = ctrl->tWR,
2011 .tCMD = ctrl->cmd_stretch[channel],
2013 mchbar_write32(TC_RAP_ch(channel), tc_rap.raw);
2015 if (ctrl->cmd_stretch[channel] == 2)
2016 delta = 2;
2017 else if (ctrl->cmd_stretch[channel] == 0)
2018 delta = 4;
2020 FOR_ALL_POPULATED_RANKS {
2021 ctrl->timings[channel][slotrank].roundtrip_latency -= delta;
2024 for (command_pi = CT_MIN_PI; command_pi < CT_MAX_PI; command_pi++) {
2025 FOR_ALL_POPULATED_RANKS {
2026 ctrl->timings[channel][slotrank].pi_coding = command_pi;
2028 program_timings(ctrl, channel);
2029 reprogram_320c(ctrl);
2030 FOR_ALL_POPULATED_RANKS {
2031 stat[slotrank][command_pi - CT_MIN_PI] =
2032 test_command_training(ctrl, channel, slotrank);
2035 FOR_ALL_POPULATED_RANKS {
2036 struct run rn = get_longest_zero_run(stat[slotrank], CT_PI_LENGTH - 1);
2038 ctrl->timings[channel][slotrank].pi_coding = rn.middle + CT_MIN_PI;
2039 printram("cmd_stretch: %d, %d: % 4d-% 4d-% 4d\n",
2040 channel, slotrank, rn.start, rn.middle, rn.end);
2042 if (rn.all || rn.length < MIN_C320C_LEN) {
2043 FOR_ALL_POPULATED_RANKS {
2044 ctrl->timings[channel][slotrank] =
2045 saved_timings[channel][slotrank];
2047 return MAKE_ERR;
2051 return 0;
2055 * Adjust CMD phase shift and try multiple command rates.
2056 * A command rate of 2T doubles the time needed for address and command decode.
2058 int command_training(ramctr_timing *ctrl)
2060 int channel;
2062 FOR_ALL_POPULATED_CHANNELS {
2063 fill_pattern5(ctrl, channel, 0);
2066 FOR_ALL_POPULATED_CHANNELS {
2067 int cmdrate, err;
2070 * Dual DIMM per channel:
2071 * Issue:
2072 * While command training seems to succeed, raminit will fail in write training.
2074 * Workaround:
2075 * Skip 1T in dual DIMM mode, that's only supported by a few DIMMs.
2076 * Only try 1T mode for XMP DIMMs that request it in dual DIMM mode.
2078 * Single DIMM per channel:
2079 * Try command rate 1T and 2T
2081 cmdrate = ((ctrl->rankmap[channel] & 0x5) == 0x5);
2082 if (ctrl->tCMD)
2083 /* XMP gives the CMD rate in clock ticks, not ns */
2084 cmdrate = MIN(DIV_ROUND_UP(ctrl->tCMD, 256) - 1, 1);
2086 for (; cmdrate < 2; cmdrate++) {
2087 err = try_cmd_stretch(ctrl, channel, cmdrate << 1);
2089 if (!err)
2090 break;
2093 if (err) {
2094 printk(BIOS_EMERG, "Command training failed: %d\n", channel);
2095 return err;
2098 printram("Using CMD rate %uT on channel %u\n", cmdrate + 1, channel);
2101 FOR_ALL_POPULATED_CHANNELS
2102 program_timings(ctrl, channel);
2104 reprogram_320c(ctrl);
2105 return 0;
2108 static int find_read_mpr_margin(ramctr_timing *ctrl, int channel, int slotrank, int *edges)
2110 int dqs_pi;
2111 int stats[NUM_LANES][MAX_EDGE_TIMING + 1];
2112 int lane;
2114 for (dqs_pi = 0; dqs_pi <= MAX_EDGE_TIMING; dqs_pi++) {
2115 FOR_ALL_LANES {
2116 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p = dqs_pi;
2117 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n = dqs_pi;
2119 program_timings(ctrl, channel);
2121 FOR_ALL_LANES {
2122 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel, lane), 0);
2123 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
2126 wait_for_iosav(channel);
2128 iosav_write_read_mpr_sequence(
2129 channel, slotrank, ctrl->tMOD, 500, 4, 1, ctrl->CAS + 8);
2131 iosav_run_once_and_wait(channel);
2133 FOR_ALL_LANES {
2134 stats[lane][dqs_pi] = mchbar_read32(
2135 IOSAV_By_ERROR_COUNT_ch(channel, lane));
2139 FOR_ALL_LANES {
2140 struct run rn = get_longest_zero_run(stats[lane], MAX_EDGE_TIMING + 1);
2141 edges[lane] = rn.middle;
2143 if (rn.all) {
2144 printk(BIOS_EMERG, "Read MPR training failed: %d, %d, %d\n", channel,
2145 slotrank, lane);
2146 return MAKE_ERR;
2148 printram("eval %d, %d, %d: % 4d\n", channel, slotrank, lane, edges[lane]);
2150 return 0;
2153 static void find_predefined_pattern(ramctr_timing *ctrl, const int channel)
2155 int slotrank, lane;
2157 fill_pattern0(ctrl, channel, 0, 0);
2158 FOR_ALL_LANES {
2159 mchbar_write32(IOSAV_By_BW_MASK_ch(channel, lane), 0);
2160 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
2163 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2164 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n = 16;
2165 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p = 16;
2168 program_timings(ctrl, channel);
2170 FOR_ALL_POPULATED_RANKS {
2171 wait_for_iosav(channel);
2173 iosav_write_read_mpr_sequence(
2174 channel, slotrank, ctrl->tMOD, 3, 4, 1, ctrl->CAS + 8);
2176 iosav_run_once_and_wait(channel);
2179 /* XXX: check any measured value ? */
2181 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2182 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n = 48;
2183 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p = 48;
2186 program_timings(ctrl, channel);
2188 FOR_ALL_POPULATED_RANKS {
2189 wait_for_iosav(channel);
2191 iosav_write_read_mpr_sequence(
2192 channel, slotrank, ctrl->tMOD, 3, 4, 1, ctrl->CAS + 8);
2194 iosav_run_once_and_wait(channel);
2197 /* XXX: check any measured value ? */
2199 FOR_ALL_LANES {
2200 mchbar_write32(IOSAV_By_BW_MASK_ch(channel, lane),
2201 ~mchbar_read32(IOSAV_By_BW_SERROR_ch(channel, lane)) & 0xff);
2205 int read_mpr_training(ramctr_timing *ctrl)
2207 int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2208 int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2209 int channel, slotrank, lane;
2210 int err;
2212 mchbar_write32(GDCRTRAININGMOD, 0);
2214 toggle_io_reset();
2216 FOR_ALL_POPULATED_CHANNELS {
2217 find_predefined_pattern(ctrl, channel);
2219 fill_pattern0(ctrl, channel, 0, 0xffffffff);
2223 * FIXME: Under some conditions, vendor BIOS sets both edges to the same value. It will
2224 * also use a single loop. It would seem that it is a debugging configuration.
2226 mchbar_write32(IOSAV_DC_MASK, 3 << 8);
2227 printram("discover falling edges:\n[%x] = %x\n", IOSAV_DC_MASK, 3 << 8);
2229 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2230 err = find_read_mpr_margin(ctrl, channel, slotrank,
2231 falling_edges[channel][slotrank]);
2232 if (err)
2233 return err;
2236 mchbar_write32(IOSAV_DC_MASK, 2 << 8);
2237 printram("discover rising edges:\n[%x] = %x\n", IOSAV_DC_MASK, 2 << 8);
2239 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2240 err = find_read_mpr_margin(ctrl, channel, slotrank,
2241 rising_edges[channel][slotrank]);
2242 if (err)
2243 return err;
2246 mchbar_write32(IOSAV_DC_MASK, 0);
2248 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2249 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n =
2250 falling_edges[channel][slotrank][lane];
2251 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p =
2252 rising_edges[channel][slotrank][lane];
2255 FOR_ALL_POPULATED_CHANNELS {
2256 program_timings(ctrl, channel);
2259 FOR_ALL_POPULATED_CHANNELS FOR_ALL_LANES {
2260 mchbar_write32(IOSAV_By_BW_MASK_ch(channel, lane), 0);
2262 return 0;
2265 static int find_agrsv_read_margin(ramctr_timing *ctrl, int channel, int slotrank, int *edges)
2267 const int rd_vref_offsets[] = { 0, 0xc, 0x2c };
2269 u32 raw_stats[MAX_EDGE_TIMING + 1];
2270 int lower[NUM_LANES];
2271 int upper[NUM_LANES];
2272 int lane, i, read_pi, pat;
2274 FOR_ALL_LANES {
2275 lower[lane] = 0;
2276 upper[lane] = MAX_EDGE_TIMING;
2279 for (i = 0; i < ARRAY_SIZE(rd_vref_offsets); i++) {
2280 const union gdcr_training_mod_reg training_mod = {
2281 .vref_gen_ctl = rd_vref_offsets[i],
2283 mchbar_write32(GDCRTRAININGMOD_ch(channel), training_mod.raw);
2284 printram("[%x] = 0x%08x\n", GDCRTRAININGMOD_ch(channel), training_mod.raw);
2286 for (pat = 0; pat < NUM_PATTERNS; pat++) {
2287 fill_pattern5(ctrl, channel, pat);
2288 printram("using pattern %d\n", pat);
2290 for (read_pi = 0; read_pi <= MAX_EDGE_TIMING; read_pi++) {
2291 FOR_ALL_LANES {
2292 ctrl->timings[channel][slotrank].lanes[lane]
2293 .rx_dqs_p = read_pi;
2294 ctrl->timings[channel][slotrank].lanes[lane]
2295 .rx_dqs_n = read_pi;
2297 program_timings(ctrl, channel);
2299 FOR_ALL_LANES {
2300 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel, lane),
2302 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
2304 wait_for_iosav(channel);
2306 iosav_write_data_write_sequence(ctrl, channel, slotrank);
2308 iosav_run_once_and_wait(channel);
2310 FOR_ALL_LANES {
2311 mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel, lane));
2314 /* FIXME: This register only exists on Ivy Bridge */
2315 raw_stats[read_pi] = mchbar_read32(
2316 IOSAV_BYTE_SERROR_C_ch(channel));
2319 FOR_ALL_LANES {
2320 int stats[MAX_EDGE_TIMING + 1];
2321 struct run rn;
2323 for (read_pi = 0; read_pi <= MAX_EDGE_TIMING; read_pi++)
2324 stats[read_pi] = !!(raw_stats[read_pi] & (1 << lane));
2326 rn = get_longest_zero_run(stats, MAX_EDGE_TIMING + 1);
2328 printram("edges: %d, %d, %d: % 4d-% 4d-% 4d, "
2329 "% 4d-% 4d\n", channel, slotrank, i, rn.start,
2330 rn.middle, rn.end, rn.start + ctrl->edge_offset[i],
2331 rn.end - ctrl->edge_offset[i]);
2333 lower[lane] = MAX(rn.start + ctrl->edge_offset[i], lower[lane]);
2334 upper[lane] = MIN(rn.end - ctrl->edge_offset[i], upper[lane]);
2336 edges[lane] = (lower[lane] + upper[lane]) / 2;
2337 if (rn.all || (lower[lane] > upper[lane])) {
2338 printk(BIOS_EMERG, "Aggressive read training failed: "
2339 "%d, %d, %d\n", channel, slotrank, lane);
2341 return MAKE_ERR;
2347 /* Restore nominal Vref after training */
2348 mchbar_write32(GDCRTRAININGMOD_ch(channel), 0);
2349 printram("CPA\n");
2350 return 0;
2353 int aggressive_read_training(ramctr_timing *ctrl)
2355 int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2356 int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2357 int channel, slotrank, lane, err;
2360 * FIXME: Under some conditions, vendor BIOS sets both edges to the same value. It will
2361 * also use a single loop. It would seem that it is a debugging configuration.
2363 mchbar_write32(IOSAV_DC_MASK, 3 << 8);
2364 printram("discover falling edges aggressive:\n[%x] = %x\n", IOSAV_DC_MASK, 3 << 8);
2366 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2367 err = find_agrsv_read_margin(ctrl, channel, slotrank,
2368 falling_edges[channel][slotrank]);
2369 if (err)
2370 return err;
2373 mchbar_write32(IOSAV_DC_MASK, 2 << 8);
2374 printram("discover rising edges aggressive:\n[%x] = %x\n", IOSAV_DC_MASK, 2 << 8);
2376 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2377 err = find_agrsv_read_margin(ctrl, channel, slotrank,
2378 rising_edges[channel][slotrank]);
2379 if (err)
2380 return err;
2383 mchbar_write32(IOSAV_DC_MASK, 0);
2385 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2386 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n =
2387 falling_edges[channel][slotrank][lane];
2389 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p =
2390 rising_edges[channel][slotrank][lane];
2393 FOR_ALL_POPULATED_CHANNELS
2394 program_timings(ctrl, channel);
2396 return 0;
2399 static void test_aggressive_write(ramctr_timing *ctrl, int channel, int slotrank)
2401 wait_for_iosav(channel);
2403 iosav_write_aggressive_write_read_sequence(ctrl, channel, slotrank);
2405 iosav_run_once_and_wait(channel);
2408 static void set_write_vref(const int channel, const u8 wr_vref)
2410 mchbar_clrsetbits32(GDCRCMDDEBUGMUXCFG_Cz_S(channel), 0x3f << 24, wr_vref << 24);
2411 udelay(2);
2414 int aggressive_write_training(ramctr_timing *ctrl)
2416 const u8 wr_vref_offsets[3] = { 0, 0x0f, 0x2f };
2417 int i, pat;
2419 int lower[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2420 int upper[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2421 int channel, slotrank, lane;
2423 /* Changing the write Vref is only supported on some Ivy Bridge SKUs */
2424 if (!IS_IVY_CPU(ctrl->cpu))
2425 return 0;
2427 if (!(pci_read_config32(HOST_BRIDGE, CAPID0_A) & CAPID_WRTVREF))
2428 return 0;
2430 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2431 lower[channel][slotrank][lane] = 0;
2432 upper[channel][slotrank][lane] = MAX_TX_DQ;
2435 /* Only enable IOSAV_n_SPECIAL_COMMAND_ADDR optimization on later steppings */
2436 const bool enable_iosav_opt = IS_IVY_CPU_D(ctrl->cpu) || IS_IVY_CPU_E(ctrl->cpu);
2438 if (enable_iosav_opt)
2439 mchbar_write32(MCMNTS_SPARE, 1);
2441 printram("Aggressive write training:\n");
2443 for (i = 0; i < ARRAY_SIZE(wr_vref_offsets); i++) {
2444 FOR_ALL_POPULATED_CHANNELS {
2445 set_write_vref(channel, wr_vref_offsets[i]);
2447 for (pat = 0; pat < NUM_PATTERNS; pat++) {
2448 FOR_ALL_POPULATED_RANKS {
2449 int tx_dq;
2450 u32 raw_stats[MAX_TX_DQ + 1];
2451 int stats[MAX_TX_DQ + 1];
2453 /* Make sure rn.start < rn.end */
2454 stats[MAX_TX_DQ] = 1;
2456 fill_pattern5(ctrl, channel, pat);
2458 for (tx_dq = 0; tx_dq < MAX_TX_DQ; tx_dq++) {
2459 FOR_ALL_LANES {
2460 ctrl->timings[channel][slotrank]
2461 .lanes[lane].tx_dq = tx_dq;
2463 program_timings(ctrl, channel);
2465 test_aggressive_write(ctrl, channel, slotrank);
2467 raw_stats[tx_dq] = mchbar_read32(
2468 IOSAV_BYTE_SERROR_C_ch(channel));
2470 FOR_ALL_LANES {
2471 struct run rn;
2472 for (tx_dq = 0; tx_dq < MAX_TX_DQ; tx_dq++) {
2473 stats[tx_dq] = !!(raw_stats[tx_dq]
2474 & (1 << lane));
2477 rn = get_longest_zero_run(stats, MAX_TX_DQ + 1);
2478 if (rn.all) {
2479 printk(BIOS_EMERG, "Aggressive "
2480 "write training failed: "
2481 "%d, %d, %d\n", channel,
2482 slotrank, lane);
2484 return MAKE_ERR;
2486 printram("tx_dq: %d, %d, %d: "
2487 "% 4d-% 4d-% 4d, "
2488 "% 4d-% 4d\n", channel, slotrank,
2489 i, rn.start, rn.middle, rn.end,
2490 rn.start + ctrl->tx_dq_offset[i],
2491 rn.end - ctrl->tx_dq_offset[i]);
2493 lower[channel][slotrank][lane] =
2494 MAX(rn.start + ctrl->tx_dq_offset[i],
2495 lower[channel][slotrank][lane]);
2497 upper[channel][slotrank][lane] =
2498 MIN(rn.end - ctrl->tx_dq_offset[i],
2499 upper[channel][slotrank][lane]);
2506 FOR_ALL_CHANNELS {
2507 /* Restore nominal write Vref after training */
2508 set_write_vref(channel, 0);
2511 /* Disable IOSAV_n_SPECIAL_COMMAND_ADDR optimization */
2512 if (enable_iosav_opt)
2513 mchbar_write32(MCMNTS_SPARE, 0);
2515 printram("CPB\n");
2517 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2518 printram("tx_dq %d, %d, %d: % 4d\n", channel, slotrank, lane,
2519 (lower[channel][slotrank][lane] +
2520 upper[channel][slotrank][lane]) / 2);
2522 ctrl->timings[channel][slotrank].lanes[lane].tx_dq =
2523 (lower[channel][slotrank][lane] +
2524 upper[channel][slotrank][lane]) / 2;
2526 FOR_ALL_POPULATED_CHANNELS {
2527 program_timings(ctrl, channel);
2529 return 0;
2532 void normalize_training(ramctr_timing *ctrl)
2534 int channel, slotrank, lane;
2535 int mat;
2537 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2538 int delta;
2539 mat = 0;
2540 FOR_ALL_LANES mat =
2541 MAX(ctrl->timings[channel][slotrank].lanes[lane].rcven, mat);
2542 printram("normalize %d, %d, %d: mat %d\n",
2543 channel, slotrank, lane, mat);
2545 delta = (mat >> 6) - ctrl->timings[channel][slotrank].io_latency;
2546 printram("normalize %d, %d, %d: delta %d\n",
2547 channel, slotrank, lane, delta);
2549 ctrl->timings[channel][slotrank].roundtrip_latency += delta;
2550 ctrl->timings[channel][slotrank].io_latency += delta;
2553 FOR_ALL_POPULATED_CHANNELS {
2554 program_timings(ctrl, channel);
2558 int channel_test(ramctr_timing *ctrl)
2560 int channel, slotrank, lane;
2562 slotrank = 0;
2563 FOR_ALL_POPULATED_CHANNELS
2564 if (mchbar_read32(MC_INIT_STATE_ch(channel)) & 0xa000) {
2565 printk(BIOS_EMERG, "Mini channel test failed (1): %d\n", channel);
2566 return MAKE_ERR;
2568 FOR_ALL_POPULATED_CHANNELS {
2569 fill_pattern0(ctrl, channel, 0x12345678, 0x98765432);
2572 for (slotrank = 0; slotrank < 4; slotrank++)
2573 FOR_ALL_CHANNELS
2574 if (ctrl->rankmap[channel] & (1 << slotrank)) {
2575 FOR_ALL_LANES {
2576 mchbar_write32(IOSAV_By_ERROR_COUNT(lane), 0);
2577 mchbar_write32(IOSAV_By_BW_SERROR_C(lane), 0);
2579 wait_for_iosav(channel);
2581 iosav_write_memory_test_sequence(ctrl, channel, slotrank);
2583 iosav_run_once_and_wait(channel);
2585 FOR_ALL_LANES
2586 if (mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel, lane))) {
2587 printk(BIOS_EMERG, "Mini channel test failed (2): %d, %d, %d\n",
2588 channel, slotrank, lane);
2589 return MAKE_ERR;
2592 return 0;
2595 void channel_scrub(ramctr_timing *ctrl)
2597 int channel, slotrank, row, rowsize;
2598 u8 bank;
2600 FOR_ALL_POPULATED_CHANNELS {
2601 wait_for_iosav(channel);
2602 fill_pattern0(ctrl, channel, 0, 0);
2606 * During runtime the "scrubber" will periodically scan through the memory in the
2607 * physical address space, to identify and fix CRC errors.
2608 * The following loops writes to every DRAM address, setting the ECC bits to the
2609 * correct value. A read from this location will no longer return a CRC error,
2610 * except when a bit has toggled due to external events.
2611 * The same could be achieved by writing to the physical memory map, but it's
2612 * much more difficult due to SMM remapping, ME stolen memory, GFX stolen memory,
2613 * and firmware running in x86_32.
2615 FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS {
2616 rowsize = 1 << ctrl->info.dimm[channel][slotrank >> 1].row_bits;
2617 for (bank = 0; bank < 8; bank++) {
2618 for (row = 0; row < rowsize; row += 16) {
2619 u8 gap = MAX((ctrl->tFAW >> 2) + 1, ctrl->tRRD);
2620 const struct iosav_ssq sequence[] = {
2622 * DRAM command ACT
2623 * Opens the row for writing.
2625 [0] = {
2626 .sp_cmd_ctrl = {
2627 .command = IOSAV_ACT,
2628 .ranksel_ap = 1,
2630 .subseq_ctrl = {
2631 .cmd_executions = 1,
2632 .cmd_delay_gap = gap,
2633 .post_ssq_wait = ctrl->tRCD,
2634 .data_direction = SSQ_NA,
2636 .sp_cmd_addr = {
2637 .address = row,
2638 .rowbits = 6,
2639 .bank = bank,
2640 .rank = slotrank,
2642 .addr_update = {
2643 .inc_addr_1 = 1,
2644 .addr_wrap = 18,
2648 * DRAM command WR
2649 * Writes (128 + 1) * 8 (burst length) * 8 (bus width)
2650 * bytes.
2652 [1] = {
2653 .sp_cmd_ctrl = {
2654 .command = IOSAV_WR,
2655 .ranksel_ap = 1,
2657 .subseq_ctrl = {
2658 .cmd_executions = 129,
2659 .cmd_delay_gap = 4,
2660 .post_ssq_wait = ctrl->tWTR +
2661 ctrl->CWL + 8,
2662 .data_direction = SSQ_WR,
2664 .sp_cmd_addr = {
2665 .address = row,
2666 .rowbits = 0,
2667 .bank = bank,
2668 .rank = slotrank,
2670 .addr_update = {
2671 .inc_addr_8 = 1,
2672 .addr_wrap = 9,
2676 * DRAM command PRE
2677 * Closes the row.
2679 [2] = {
2680 .sp_cmd_ctrl = {
2681 .command = IOSAV_PRE,
2682 .ranksel_ap = 1,
2684 .subseq_ctrl = {
2685 .cmd_executions = 1,
2686 .cmd_delay_gap = 4,
2687 .post_ssq_wait = ctrl->tRP,
2688 .data_direction = SSQ_NA,
2690 .sp_cmd_addr = {
2691 .address = 0,
2692 .rowbits = 6,
2693 .bank = bank,
2694 .rank = slotrank,
2696 .addr_update = {
2697 .addr_wrap = 18,
2701 iosav_write_sequence(channel, sequence, ARRAY_SIZE(sequence));
2703 iosav_run_queue(channel, 16, 0);
2705 wait_for_iosav(channel);
2711 void set_scrambling_seed(ramctr_timing *ctrl)
2713 int channel;
2715 /* FIXME: we hardcode seeds. Do we need to use some PRNG for them? I don't think so. */
2716 static u32 seeds[NUM_CHANNELS][3] = {
2717 {0x00009a36, 0xbafcfdcf, 0x46d1ab68},
2718 {0x00028bfa, 0x53fe4b49, 0x19ed5483}
2720 FOR_ALL_POPULATED_CHANNELS {
2721 mchbar_clrbits32(SCHED_CBIT_ch(channel), 1 << 28);
2722 mchbar_write32(SCRAMBLING_SEED_1_ch(channel), seeds[channel][0]);
2723 mchbar_write32(SCRAMBLING_SEED_2_HI_ch(channel), seeds[channel][1]);
2724 mchbar_write32(SCRAMBLING_SEED_2_LO_ch(channel), seeds[channel][2]);
2728 void set_wmm_behavior(const u32 cpu)
2730 if (IS_SANDY_CPU(cpu) && (IS_SANDY_CPU_D0(cpu) || IS_SANDY_CPU_D1(cpu))) {
2731 mchbar_write32(SC_WDBWM, 0x141d1519);
2732 } else {
2733 mchbar_write32(SC_WDBWM, 0x551d1519);
2737 void prepare_training(ramctr_timing *ctrl)
2739 int channel;
2741 FOR_ALL_POPULATED_CHANNELS {
2742 /* Always drive command bus */
2743 mchbar_setbits32(TC_RAP_ch(channel), 1 << 29);
2746 udelay(1);
2748 FOR_ALL_POPULATED_CHANNELS {
2749 wait_for_iosav(channel);
2753 void set_read_write_timings(ramctr_timing *ctrl)
2755 /* Use a larger delay when running fast to improve stability */
2756 const u32 tRWDRDD_inc = ctrl->tCK <= TCK_1066MHZ ? 4 : 2;
2758 int channel, slotrank;
2760 FOR_ALL_POPULATED_CHANNELS {
2761 int min_pi = 10000;
2762 int max_pi = -10000;
2764 FOR_ALL_POPULATED_RANKS {
2765 max_pi = MAX(ctrl->timings[channel][slotrank].pi_coding, max_pi);
2766 min_pi = MIN(ctrl->timings[channel][slotrank].pi_coding, min_pi);
2769 const u32 tWRDRDD = (max_pi - min_pi > 51) ? 0 : ctrl->ref_card_offset[channel];
2771 const u32 val = (ctrl->pi_coding_threshold < max_pi - min_pi) ? 3 : 2;
2773 dram_odt_stretch(ctrl, channel);
2775 const union tc_rwp_reg tc_rwp = {
2776 .tRRDR = 0,
2777 .tRRDD = val,
2778 .tWWDR = val,
2779 .tWWDD = val,
2780 .tRWDRDD = ctrl->ref_card_offset[channel] + tRWDRDD_inc,
2781 .tWRDRDD = tWRDRDD,
2782 .tRWSR = 2,
2783 .dec_wrd = 1,
2785 mchbar_write32(TC_RWP_ch(channel), tc_rwp.raw);
2789 void set_normal_operation(ramctr_timing *ctrl)
2791 int channel;
2792 FOR_ALL_POPULATED_CHANNELS {
2793 mchbar_write32(MC_INIT_STATE_ch(channel), 1 << 12 | ctrl->rankmap[channel]);
2794 mchbar_clrbits32(TC_RAP_ch(channel), 1 << 29);
2798 /* Encode the watermark latencies in a suitable format for graphics drivers consumption */
2799 static int encode_wm(int ns)
2801 return (ns + 499) / 500;
2804 /* FIXME: values in this function should be hardware revision-dependent */
2805 void final_registers(ramctr_timing *ctrl)
2807 int channel;
2808 int t1_cycles = 0, t1_ns = 0, t2_ns;
2809 int t3_ns;
2810 u32 r32;
2812 if (IS_IVY_CPU(ctrl->cpu))
2813 mchbar_write32(WMM_READ_CONFIG, 0x46);
2815 FOR_ALL_CHANNELS {
2816 union tc_othp_reg tc_othp = {
2817 .raw = mchbar_read32(TC_OTHP_ch(channel)),
2819 if (IS_SANDY_CPU(ctrl->cpu) && (ctrl->cpu & 0xf) < SNB_STEP_D0)
2820 tc_othp.tCPDED = 2;
2821 else
2822 tc_othp.tCPDED = 1;
2823 mchbar_write32(TC_OTHP_ch(channel), tc_othp.raw);
2825 /* 64 DCLKs until idle, decision per rank */
2826 r32 = get_power_down_mode(ctrl, channel) << 8 | 64;
2827 mchbar_write32(PM_PDWN_CONFIG_ch(channel), r32);
2829 mchbar_write32(PM_TRML_M_CONFIG_ch(channel), 0x00000aaa);
2832 mchbar_write32(PM_BW_LIMIT_CONFIG, 0x5f7003ff);
2833 if (IS_SANDY_CPU(ctrl->cpu))
2834 mchbar_write32(PM_DLL_CONFIG, 0x000330f0);
2835 else
2836 mchbar_write32(PM_DLL_CONFIG, 0x00073000 | ctrl->mdll_wake_delay);
2838 FOR_ALL_CHANNELS {
2839 switch (ctrl->rankmap[channel]) {
2840 /* Unpopulated channel */
2841 case 0:
2842 mchbar_write32(PM_CMD_PWR_ch(channel), 0);
2843 break;
2844 /* Only single-ranked dimms */
2845 case 1:
2846 case 4:
2847 case 5:
2848 mchbar_write32(PM_CMD_PWR_ch(channel), 0x00373131);
2849 break;
2850 /* Dual-ranked dimms present */
2851 default:
2852 mchbar_write32(PM_CMD_PWR_ch(channel), 0x009b6ea1);
2853 break;
2857 mchbar_write32(MEM_TRML_ESTIMATION_CONFIG, 0xca9171e5);
2858 mchbar_clrsetbits32(MEM_TRML_THRESHOLDS_CONFIG, 0x00ffffff, 0x00e4d5d0);
2859 mchbar_clrbits32(MEM_TRML_INTERRUPT, 0x1f);
2861 FOR_ALL_CHANNELS {
2862 union tc_rfp_reg tc_rfp = {
2863 .raw = mchbar_read32(TC_RFP_ch(channel)),
2865 tc_rfp.refresh_2x_control = 1;
2866 mchbar_write32(TC_RFP_ch(channel), tc_rfp.raw);
2869 mchbar_setbits32(MC_INIT_STATE_G, 1 << 0);
2870 mchbar_setbits32(MC_INIT_STATE_G, 1 << 7);
2872 /* Find a populated channel */
2873 FOR_ALL_POPULATED_CHANNELS
2874 break;
2876 t1_cycles = (mchbar_read32(TC_ZQCAL_ch(channel)) >> 8) & 0xff;
2877 r32 = mchbar_read32(PM_DLL_CONFIG);
2878 if (r32 & (1 << 17))
2879 t1_cycles += (r32 & 0xfff);
2880 t1_cycles += mchbar_read32(TC_SRFTP_ch(channel)) & 0xfff;
2881 t1_ns = t1_cycles * ctrl->tCK / 256 + 544;
2882 if (!(r32 & (1 << 17)))
2883 t1_ns += 500;
2885 t2_ns = 10 * ((mchbar_read32(SAPMTIMERS) >> 8) & 0xfff);
2886 if (mchbar_read32(SAPMCTL) & 8) {
2887 t3_ns = 10 * ((mchbar_read32(BANDTIMERS_IVB) >> 8) & 0xfff);
2888 t3_ns += 10 * (mchbar_read32(SAPMTIMERS2_IVB) & 0xff);
2889 } else {
2890 t3_ns = 500;
2893 /* The graphics driver will use these watermark values */
2894 printk(BIOS_DEBUG, "t123: %d, %d, %d\n", t1_ns, t2_ns, t3_ns);
2895 mchbar_clrsetbits32(SSKPD, 0x3f3f3f3f,
2896 ((encode_wm(t1_ns) + encode_wm(t2_ns)) << 16) | (encode_wm(t1_ns) << 8) |
2897 ((encode_wm(t3_ns) + encode_wm(t2_ns) + encode_wm(t1_ns)) << 24) | 0x0c);
2900 void restore_timings(ramctr_timing *ctrl)
2902 int channel;
2904 FOR_ALL_POPULATED_CHANNELS {
2905 const union tc_rap_reg tc_rap = {
2906 .tRRD = ctrl->tRRD,
2907 .tRTP = ctrl->tRTP,
2908 .tCKE = ctrl->tCKE,
2909 .tWTR = ctrl->tWTR,
2910 .tFAW = ctrl->tFAW,
2911 .tWR = ctrl->tWR,
2912 .tCMD = ctrl->cmd_stretch[channel],
2914 mchbar_write32(TC_RAP_ch(channel), tc_rap.raw);
2917 udelay(1);
2919 FOR_ALL_POPULATED_CHANNELS {
2920 wait_for_iosav(channel);
2923 FOR_ALL_POPULATED_CHANNELS
2924 mchbar_setbits32(TC_RWP_ch(channel), 1 << 27);
2926 FOR_ALL_POPULATED_CHANNELS {
2927 udelay(1);
2928 mchbar_setbits32(SCHED_CBIT_ch(channel), 1 << 21);
2931 printram("CPE\n");
2933 mchbar_write32(GDCRTRAININGMOD, 0);
2934 mchbar_write32(IOSAV_DC_MASK, 0);
2936 printram("CP5b\n");
2938 FOR_ALL_POPULATED_CHANNELS {
2939 program_timings(ctrl, channel);
2942 u32 reg, addr;
2944 /* Poll for RCOMP */
2945 while (!(mchbar_read32(RCOMP_TIMER) & (1 << 16)))
2948 do {
2949 reg = mchbar_read32(IOSAV_STATUS_ch(0));
2950 } while ((reg & 0x14) == 0);
2952 /* Set state of memory controller */
2953 mchbar_write32(MC_INIT_STATE_G, 0x116);
2954 mchbar_write32(MC_INIT_STATE, 0);
2956 /* Wait 500us */
2957 udelay(500);
2959 FOR_ALL_CHANNELS {
2960 /* Set valid rank CKE */
2961 reg = 0;
2962 reg = (reg & ~0x0f) | ctrl->rankmap[channel];
2963 addr = MC_INIT_STATE_ch(channel);
2964 mchbar_write32(addr, reg);
2966 /* Wait 10ns for ranks to settle */
2967 // udelay(0.01);
2969 reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4);
2970 mchbar_write32(addr, reg);
2972 /* Write reset using a NOP */
2973 write_reset(ctrl);
2976 /* MRS commands */
2977 dram_mrscommands(ctrl);
2979 printram("CP5c\n");
2981 mchbar_write32(GDCRTRAININGMOD_ch(0), 0);
2983 FOR_ALL_CHANNELS {
2984 mchbar_clrbits32(GDCRCMDDEBUGMUXCFG_Cz_S(channel), 0x3f << 24);
2985 udelay(2);