tree: drop last paragraph of GPL copyright header
[coreboot.git] / src / northbridge / amd / amdmct / mct_ddr3 / mctsrc.c
blobd2a82ce995de69394210951c820a06cac6d3dc3d
1 /*
2 * This file is part of the coreboot project.
4 * Copyright (C) 2010 Advanced Micro Devices, Inc.
5 * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
18 /******************************************************************************
19 Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
20 ******************************************************************************/
22 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
23 struct DCTStatStruc *pDCTstat, u8 Pass);
24 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
25 struct DCTStatStruc *pDCTstat);
26 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
27 struct DCTStatStruc *pDCTstat, u8 Channel);
28 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
29 struct DCTStatStruc *pDCTstat, u8 Channel);
30 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly);
31 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
32 struct DCTStatStruc *pDCTstat, u8 dct);
33 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
35 /* Warning: These must be located so they do not cross a logical 16-bit
36 segment boundary! */
37 static const u32 TestPattern0_D[] = {
38 0x55555555, 0x55555555, 0x55555555, 0x55555555,
39 0x55555555, 0x55555555, 0x55555555, 0x55555555,
40 0x55555555, 0x55555555, 0x55555555, 0x55555555,
41 0x55555555, 0x55555555, 0x55555555, 0x55555555,
43 static const u32 TestPattern1_D[] = {
44 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
45 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
46 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
47 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
49 static const u32 TestPattern2_D[] = {
50 0x12345678, 0x87654321, 0x23456789, 0x98765432,
51 0x59385824, 0x30496724, 0x24490795, 0x99938733,
52 0x40385642, 0x38465245, 0x29432163, 0x05067894,
53 0x12349045, 0x98723467, 0x12387634, 0x34587623,
56 static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
57 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
60 * 1. Copy the alpha and Beta patterns from ROM to Cache,
61 * aligning on 16 byte boundary
62 * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
63 * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
65 u32 *buf_a;
66 u32 *buf_b;
67 u32 *p_A;
68 u32 *p_B;
69 u8 i;
71 buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
72 buf_b = buf_a + 32; /* ?? */
73 p_A = (u32 *)SetupDqsPattern_1PassB(pass);
74 p_B = (u32 *)SetupDqsPattern_1PassA(pass);
76 for(i=0;i<16;i++) {
77 buf_a[i] = p_A[i];
78 buf_b[i] = p_B[i];
81 pDCTstat->PtrPatternBufA = (u32)buf_a;
82 pDCTstat->PtrPatternBufB = (u32)buf_b;
85 void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
86 struct DCTStatStruc *pDCTstat, u8 Pass)
88 if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
89 dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
92 static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
94 uint8_t lane;
95 uint32_t dword;
97 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
98 uint32_t wdt_reg;
99 if ((lane == 0) || (lane == 1))
100 wdt_reg = 0x30;
101 if ((lane == 2) || (lane == 3))
102 wdt_reg = 0x31;
103 if ((lane == 4) || (lane == 5))
104 wdt_reg = 0x40;
105 if ((lane == 6) || (lane == 7))
106 wdt_reg = 0x41;
107 if (lane == 8)
108 wdt_reg = 0x32;
109 wdt_reg += dimm * 3;
110 dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
111 if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1))
112 current_total_delay[lane] = (dword & 0x00ff0000) >> 16;
113 if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0))
114 current_total_delay[lane] = dword & 0x000000ff;
118 static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
120 uint8_t lane;
121 uint32_t dword;
123 for (lane = 0; lane < 8; lane++) {
124 uint32_t ret_reg;
125 if ((lane == 0) || (lane == 1))
126 ret_reg = 0x10;
127 if ((lane == 2) || (lane == 3))
128 ret_reg = 0x11;
129 if ((lane == 4) || (lane == 5))
130 ret_reg = 0x20;
131 if ((lane == 6) || (lane == 7))
132 ret_reg = 0x21;
133 ret_reg += dimm * 3;
134 dword = Get_NB32_index_wait(dev, index_reg, ret_reg);
135 if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
136 dword &= ~(0x1ff << 16);
137 dword |= (current_total_delay[lane] & 0x1ff) << 16;
139 if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
140 dword &= ~0x1ff;
141 dword |= current_total_delay[lane] & 0x1ff;
143 Set_NB32_index_wait(dev, index_reg, ret_reg, dword);
147 static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel)
149 SetUpperFSbase(testaddr);
150 testaddr <<= 8;
152 if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
153 testaddr += 8; /* second channel */
156 return testaddr;
159 /* DQS Receiver Enable Training
160 * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
162 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
163 struct DCTStatStruc *pDCTstat, u8 Pass)
165 u8 Channel;
166 u8 _2Ranks;
167 u8 Addl_Index = 0;
168 u8 Receiver;
169 u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
170 u8 Final_Value;
171 u16 CTLRMaxDelay;
172 u16 MaxDelay_CH[2];
173 u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
174 u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
175 u32 Errors;
177 u32 val;
178 u32 reg;
179 u32 dev;
180 u32 index_reg;
181 u32 ch_start, ch_end, ch;
182 u32 msr;
183 u32 cr4;
184 u32 lo, hi;
186 uint32_t dword;
187 uint8_t rank;
188 uint8_t lane;
189 uint16_t current_total_delay[MAX_BYTE_LANES];
190 uint16_t candidate_total_delay[8];
191 uint8_t data_test_pass_sr[2][8]; /* [rank][lane] */
192 uint8_t data_test_pass[8]; /* [lane] */
193 uint8_t data_test_pass_prev[8]; /* [lane] */
194 uint8_t window_det_toggle[8];
195 uint8_t trained[8];
196 uint64_t result_qword1;
197 uint64_t result_qword2;
199 u8 valid;
201 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
202 print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
204 dev = pDCTstat->dev_dct;
205 ch_start = 0;
206 if(!pDCTstat->GangedMode) {
207 ch_end = 2;
208 } else {
209 ch_end = 1;
212 for (ch = ch_start; ch < ch_end; ch++) {
213 reg = 0x78 + (0x100 * ch);
214 val = Get_NB32(dev, reg);
215 val &= ~(0x3ff << 22);
216 val |= (0x0c8 << 22); /* Max Rd Lat */
217 Set_NB32(dev, reg, val);
220 Final_Value = 1;
221 if (Pass == FirstPass) {
222 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
223 } else {
224 pDCTstat->DimmTrainFail = 0;
225 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
228 cr4 = read_cr4();
229 if(cr4 & ( 1 << 9)) { /* save the old value */
230 _SSE2 = 1;
232 cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
233 write_cr4(cr4);
235 msr = HWCR;
236 _RDMSR(msr, &lo, &hi);
237 /* FIXME: Why use SSEDIS */
238 if(lo & (1 << 17)) { /* save the old value */
239 _Wrap32Dis = 1;
241 lo |= (1 << 17); /* HWCR.wrap32dis */
242 lo &= ~(1 << 15); /* SSEDIS */
243 _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
245 _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
247 SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
249 Errors = 0;
250 dev = pDCTstat->dev_dct;
252 for (Channel = 0; Channel < 2; Channel++) {
253 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
254 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
255 pDCTstat->Channel = Channel;
257 CTLRMaxDelay = 0;
258 MaxDelay_CH[Channel] = 0;
259 index_reg = 0x98 + 0x100 * Channel;
261 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
262 /* There are four receiver pairs, loosely associated with chipselects.
263 * This is essentially looping over each DIMM.
265 for (; Receiver < 8; Receiver += 2) {
266 Addl_Index = (Receiver >> 1) * 3 + 0x10;
268 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
270 if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
271 continue;
274 /* Clear data structures */
275 for (lane = 0; lane < 8; lane++) {
276 data_test_pass_prev[lane] = 0;
277 trained[lane] = 0;
280 /* 2.8.9.9.2 (1, 6)
281 * Retrieve gross and fine timing fields from write DQS registers
283 read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
285 /* 2.8.9.9.2 (1)
286 * Program the Write Data Timing and Write ECC Timing register to
287 * the values stored in the DQS Write Timing Control register
288 * for each lane
290 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
291 uint32_t wdt_reg;
293 /* Calculate Write Data Timing register location */
294 if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
295 wdt_reg = 0x1;
296 if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
297 wdt_reg = 0x2;
298 if (lane == 8)
299 wdt_reg = 0x3;
300 wdt_reg |= ((Receiver / 2) << 8);
302 /* Set Write Data Timing register values */
303 dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
304 if ((lane == 7) || (lane == 3)) {
305 dword &= ~(0x7f << 24);
306 dword |= (current_total_delay[lane] & 0x7f) << 24;
308 if ((lane == 6) || (lane == 2)) {
309 dword &= ~(0x7f << 16);
310 dword |= (current_total_delay[lane] & 0x7f) << 16;
312 if ((lane == 5) || (lane == 1)) {
313 dword &= ~(0x7f << 8);
314 dword |= (current_total_delay[lane] & 0x7f) << 8;
316 if ((lane == 8) || (lane == 4) || (lane == 0)) {
317 dword &= ~0x7f;
318 dword |= current_total_delay[lane] & 0x7f;
320 Set_NB32_index_wait(dev, index_reg, wdt_reg, dword);
323 /* 2.8.9.9.2 (2)
324 * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
325 * to 1/2 MEMCLK for all lanes
327 for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
328 uint32_t rdt_reg;
329 if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
330 rdt_reg = 0x5;
331 if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
332 rdt_reg = 0x6;
333 if (lane == 8)
334 rdt_reg = 0x7;
335 rdt_reg |= ((Receiver / 2) << 8);
336 if (lane == 8)
337 dword = 0x0000003f;
338 else
339 dword = 0x3f3f3f3f;
340 Set_NB32_index_wait(dev, index_reg, rdt_reg, dword);
343 /* 2.8.9.9.2 (3)
344 * Select two test addresses for each rank present
346 TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
347 if (!valid) { /* Address not supported on current CS */
348 continue;
351 TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
353 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
354 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
355 if(!valid) { /* Address not supported on current CS */
356 continue;
358 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
359 _2Ranks = 1;
360 } else {
361 _2Ranks = TestAddr1 = TestAddr1B = 0;
364 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
365 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
366 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
367 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
369 /* 2.8.9.9.2 (4, 5)
370 * Write 1 cache line of the appropriate test pattern to each test addresse
372 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */
373 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */
374 if (_2Ranks) {
375 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */
376 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */
379 #if DQS_TRAIN_DEBUG > 0
380 for (lane = 0; lane < 8; lane++) {
381 print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2);
382 print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2);
384 #endif
386 /* 2.8.9.9.2 (6)
387 * Write gross and fine timing fields to read DQS registers
389 write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
391 /* 2.8.9.9.2 (7)
392 * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
394 * FIXME
395 * It is not clear if training should be discontinued if any test failures occur in the first
396 * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK
397 * window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side.
399 uint16_t current_delay_step;
401 for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) {
402 print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3);
404 /* 2.8.9.9.2 (7 D)
405 * Terminate if all lanes are trained
407 uint8_t all_lanes_trained = 1;
408 for (lane = 0; lane < 8; lane++)
409 if (!trained[lane])
410 all_lanes_trained = 0;
412 if (all_lanes_trained)
413 break;
415 /* 2.8.9.9.2 (7 A)
416 * Loop over all ranks
418 for (rank = 0; rank < (_2Ranks + 1); rank++) {
419 /* 2.8.9.9.2 (7 A a-d)
420 * Read the first test address of the current rank
421 * Store the first data beat for analysis
422 * Reset read pointer in the DRAM controller FIFO
423 * Read the second test address of the current rank
424 * Store the first data beat for analysis
425 * Reset read pointer in the DRAM controller FIFO
427 if (rank & 1) {
428 /* 2.8.9.9.2 (7 D)
429 * Invert read instructions to alternate data read order on the bus
431 proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
432 result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
433 write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
434 proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
435 result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
436 write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
437 } else {
438 proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
439 result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
440 write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
441 proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
442 result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
443 write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
445 /* 2.8.9.9.2 (7 A e)
446 * Compare both read patterns and flag passing ranks/lanes
448 uint8_t result_lane_byte1;
449 uint8_t result_lane_byte2;
450 for (lane = 0; lane < 8; lane++) {
451 if (trained[lane] == 1) {
452 #if DQS_TRAIN_DEBUG > 0
453 print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4);
454 #endif
455 continue;
458 result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff;
459 result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff;
460 if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa))
461 data_test_pass_sr[rank][lane] = 1;
462 else
463 data_test_pass_sr[rank][lane] = 0;
464 #if DQS_TRAIN_DEBUG > 0
465 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1, 4);
466 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2, 4);
467 #endif
471 /* 2.8.9.9.2 (7 B)
472 * If DIMM is dual rank, only use delays that pass testing for both ranks
474 for (lane = 0; lane < 8; lane++) {
475 if (_2Ranks) {
476 if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane]))
477 data_test_pass[lane] = 1;
478 else
479 data_test_pass[lane] = 0;
480 } else {
481 data_test_pass[lane] = data_test_pass_sr[0][lane];
485 /* 2.8.9.9.2 (7 E)
486 * For each lane, update the DQS receiver delay setting in support of next iteration
488 for (lane = 0; lane < 8; lane++) {
489 if (trained[lane] == 1)
490 continue;
492 /* 2.8.9.9.2 (7 C a)
493 * Save the total delay of the first success after a failure for later use
495 if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) {
496 candidate_total_delay[lane] = current_total_delay[lane];
497 window_det_toggle[lane] = 0;
500 /* 2.8.9.9.2 (7 C b)
501 * If the current delay failed testing add 1/8 UI to the current delay
503 if (data_test_pass[lane] == 0)
504 current_total_delay[lane] += 0x4;
506 /* 2.8.9.9.2 (7 C c)
507 * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay
508 * If 1.25 UI of delay have been added with no failures the lane is considered trained
510 if (data_test_pass[lane] == 1) {
511 /* See if lane is trained */
512 if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) {
513 trained[lane] = 1;
515 /* Calculate and set final lane delay value
516 * The final delay is the candidate delay + 7/8 UI
518 current_total_delay[lane] = candidate_total_delay[lane] + 0x1c;
519 } else {
520 if (window_det_toggle[lane] == 0) {
521 current_total_delay[lane] += 0x1;
522 window_det_toggle[lane] = 1;
523 } else {
524 current_total_delay[lane] += 0x8;
525 window_det_toggle[lane] = 0;
531 /* Update delays in hardware */
532 write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
534 /* Save previous results for comparison in the next iteration */
535 for (lane = 0; lane < 8; lane++)
536 data_test_pass_prev[lane] = data_test_pass[lane];
539 #if DQS_TRAIN_DEBUG > 0
540 for (lane = 0; lane < 8; lane++)
541 print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
542 #endif
544 /* Find highest delay value and save for later use */
545 for (lane = 0; lane < 8; lane++)
546 if (current_total_delay[lane] > CTLRMaxDelay)
547 CTLRMaxDelay = current_total_delay[lane];
549 /* See if any lanes failed training, and set error flags appropriately
550 * For all trained lanes, save delay values for later use
552 for (lane = 0; lane < 8; lane++) {
553 if (trained[lane]) {
554 pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane];
555 } else {
556 printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel);
558 /* Set error flags */
559 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
560 Errors |= 1 << SB_NORCVREN;
561 pDCTstat->ErrCode = SC_FatalErr;
562 pDCTstat->CSTrainFail |= 1 << Receiver;
563 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
567 /* 2.8.9.9.2 (8)
568 * Flush the receiver FIFO
569 * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO
572 WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1);
573 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0);
575 MaxDelay_CH[Channel] = CTLRMaxDelay;
578 CTLRMaxDelay = MaxDelay_CH[0];
579 if (MaxDelay_CH[1] > CTLRMaxDelay)
580 CTLRMaxDelay = MaxDelay_CH[1];
582 for (Channel = 0; Channel < 2; Channel++) {
583 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
586 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
588 if(_DisableDramECC) {
589 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
592 if (Pass == FirstPass) {
593 /*Disable DQSRcvrEn training mode */
594 mct_DisableDQSRcvEn_D(pDCTstat);
597 if(!_Wrap32Dis) {
598 msr = HWCR;
599 _RDMSR(msr, &lo, &hi);
600 lo &= ~(1<<17); /* restore HWCR.wrap32dis */
601 _WRMSR(msr, lo, hi);
603 if(!_SSE2){
604 cr4 = read_cr4();
605 cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
606 write_cr4(cr4);
609 #if DQS_TRAIN_DEBUG > 0
611 u8 ChannelDTD;
612 printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
613 for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
614 printk(BIOS_DEBUG, "Channel:%x: %x\n",
615 ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
618 #endif
620 #if DQS_TRAIN_DEBUG > 0
622 u16 valDTD;
623 u8 ChannelDTD, ReceiverDTD;
624 u8 i;
625 u16 *p;
627 printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
628 for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
629 printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
630 for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
631 printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
632 p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
633 for (i=0;i<8; i++) {
634 valDTD = p[i];
635 printk(BIOS_DEBUG, " %03x", valDTD);
637 printk(BIOS_DEBUG, "\n");
641 #endif
643 printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
644 printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
645 printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
646 printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
649 u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
651 if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
652 return 8;
653 } else {
654 return 0;
658 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
660 u8 ch_end, ch;
661 u32 reg;
662 u32 dev;
663 u32 val;
665 dev = pDCTstat->dev_dct;
666 if (pDCTstat->GangedMode) {
667 ch_end = 1;
668 } else {
669 ch_end = 2;
672 for (ch=0; ch<ch_end; ch++) {
673 reg = 0x78 + 0x100 * ch;
674 val = Get_NB32(dev, reg);
675 val &= ~(1 << DqsRcvEnTrain);
676 Set_NB32(dev, reg, val);
680 /* mct_ModifyIndex_D
681 * Function only used once so it was inlined.
684 /* mct_GetInitFlag_D
685 * Function only used once so it was inlined.
688 /* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers
689 * See BKDG Rev. 3.62 page 268 for more information
691 void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
692 u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
693 u32 index_reg, u8 Addl_Index, u8 Pass)
695 u32 index;
696 u8 i;
697 u16 *p;
698 u32 val;
700 if(RcvrEnDly == 0x1fe) {
701 /*set the boundary flag */
702 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
705 /* DimmOffset not needed for CH_D_B_RCVRDLY array */
706 for(i=0; i < 8; i++) {
707 if(FinalValue) {
708 /*calculate dimm offset */
709 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
710 RcvrEnDly = p[i];
713 /* if flag=0, set DqsRcvEn value to reg. */
714 /* get the register index from table */
715 index = Table_DQSRcvEn_Offset[i >> 1];
716 index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
717 val = Get_NB32_index_wait(dev, index_reg, index);
718 if(i & 1) {
719 /* odd byte lane */
720 val &= ~(0x1ff << 16);
721 val |= ((RcvrEnDly & 0x1ff) << 16);
722 } else {
723 /* even byte lane */
724 val &= ~0x1ff;
725 val |= (RcvrEnDly & 0x1ff);
727 Set_NB32_index_wait(dev, index_reg, index, val);
732 /* Calculate MaxRdLatency
733 * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5
735 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly)
737 u32 dev;
738 u32 reg;
739 u32 SubTotal;
740 u32 index_reg;
741 u32 reg_off;
742 u32 val;
744 uint8_t cpu_val_n;
745 uint8_t cpu_val_p;
747 u16 freq_tab[] = {400, 533, 667, 800};
749 /* Set up processor-dependent values */
750 if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
751 /* Revision D and above */
752 cpu_val_n = 4;
753 cpu_val_p = 29;
754 } else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) {
755 /* Revision C */
756 uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
757 if ((package_type == PT_L1) /* Socket F (1207) */
758 || (package_type == PT_M2) /* Socket AM3 */
759 || (package_type == PT_S1)) { /* Socket S1g<x> */
760 cpu_val_n = 10;
761 cpu_val_p = 11;
762 } else {
763 cpu_val_n = 4;
764 cpu_val_p = 29;
766 } else {
767 /* Revision B and below */
768 cpu_val_n = 10;
769 cpu_val_p = 11;
772 if(pDCTstat->GangedMode)
773 Channel = 0;
775 dev = pDCTstat->dev_dct;
776 reg_off = 0x100 * Channel;
777 index_reg = 0x98 + reg_off;
779 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
780 val = Get_NB32(dev, 0x88 + reg_off);
781 SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */
783 /* If registered DIMMs are being used then
784 * add 1 MEMCLK to the sub-total.
786 val = Get_NB32(dev, 0x90 + reg_off);
787 if(!(val & (1 << UnBuffDimm)))
788 SubTotal += 2;
790 /* If the address prelaunch is setup for 1/2 MEMCLKs then
791 * add 1, else add 2 to the sub-total.
792 * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
794 val = Get_NB32_index_wait(dev, index_reg, 0x04);
795 if(!(val & 0x00202020))
796 SubTotal += 1;
797 else
798 SubTotal += 2;
800 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
801 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
802 val = Get_NB32(dev, 0x78 + reg_off);
803 SubTotal += 8 - (val & 0x0f);
805 /* Convert bits 7-5 (also referred to as the coarse delay) of
806 * the current (or worst case) DQS receiver enable delay to
807 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
809 SubTotal += DQSRcvEnDly >> 5; /* Retrieve gross delay portion of value */
811 /* Add "P" to the sub-total. "P" represents part of the
812 * processor specific constant delay value in the DRAM
813 * clock domain.
815 SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
816 SubTotal += cpu_val_p; /*add "P" 1/2MemClk */
817 SubTotal >>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */
819 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
820 * clocks (NCLKs)
822 SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4);
823 SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)];
824 SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */
826 /* Add "N" NCLKs to the sub-total. "N" represents part of the
827 * processor specific constant value in the northbridge
828 * clock domain.
830 SubTotal += (cpu_val_n) / 2;
832 pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
833 if(pDCTstat->GangedMode) {
834 pDCTstat->CH_MaxRdLat[1] = SubTotal;
837 /* Program the F2x[1, 0]78[MaxRdLatency] register with
838 * the total delay value (in NCLKs).
840 reg = 0x78 + reg_off;
841 val = Get_NB32(dev, reg);
842 val &= ~(0x3ff << 22);
843 val |= (SubTotal & 0x3ff) << 22;
845 /* program MaxRdLatency to correspond with current delay */
846 Set_NB32(dev, reg, val);
849 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
850 struct DCTStatStruc *pDCTstat)
852 /* Initialize the DQS Positions in preparation for
853 * Receiver Enable Training.
854 * Write Position is 1/2 Memclock Delay
855 * Read Position is 1/2 Memclock Delay
857 u8 i;
858 for(i=0;i<2; i++){
859 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
863 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
864 struct DCTStatStruc *pDCTstat, u8 Channel)
866 /* Initialize the DQS Positions in preparation for
867 * Receiver Enable Training.
868 * Write Position is no Delay
869 * Read Position is 1/2 Memclock Delay
872 u8 i, j;
873 u32 dword;
874 u8 dn = 4; /* TODO: Rev C could be 4 */
875 u32 dev = pDCTstat->dev_dct;
876 u32 index_reg = 0x98 + 0x100 * Channel;
878 /* FIXME: add Cx support */
879 dword = 0x00000000;
880 for(i=1; i<=3; i++) {
881 for(j=0; j<dn; j++)
882 /* DIMM0 Write Data Timing Low */
883 /* DIMM0 Write ECC Timing */
884 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
887 /* errata #180 */
888 dword = 0x2f2f2f2f;
889 for(i=5; i<=6; i++) {
890 for(j=0; j<dn; j++)
891 /* DIMM0 Read DQS Timing Control Low */
892 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
895 dword = 0x0000002f;
896 for(j=0; j<dn; j++)
897 /* DIMM0 Read DQS ECC Timing Control */
898 Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
901 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
903 u32 dev;
904 u32 index_reg;
905 u32 index;
906 u8 ChipSel;
907 u16 *p;
908 u32 val;
910 dev = pDCTstat->dev_dct;
911 index_reg = 0x98 + Channel * 0x100;
912 index = 0x12;
913 p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
914 print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
915 for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
916 val = p[ChipSel>>1];
917 Set_NB32_index_wait(dev, index_reg, index, val);
918 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
919 ChipSel, " rcvr_delay ", val, 2);
920 index += 3;
924 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
925 struct DCTStatStruc *pDCTstat, u8 Channel)
927 u8 ChipSel;
928 u16 EccDQSLike;
929 u8 EccDQSScale;
930 u32 val, val0, val1;
932 EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
933 EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
935 for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
936 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
937 u16 *p;
938 p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
940 /* DQS Delay Value of Data Bytelane
941 * most like ECC byte lane */
942 val0 = p[EccDQSLike & 0x07];
943 /* DQS Delay Value of Data Bytelane
944 * 2nd most like ECC byte lane */
945 val1 = p[(EccDQSLike>>8) & 0x07];
947 if (!(pDCTstat->Status & (1 << SB_Registered))) {
948 if(val0 > val1) {
949 val = val0 - val1;
950 } else {
951 val = val1 - val0;
954 val *= ~EccDQSScale;
955 val >>= 8; /* /256 */
957 if(val0 > val1) {
958 val -= val1;
959 } else {
960 val += val0;
962 } else {
963 val = val1 - val0;
964 val += val1;
967 pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
970 SetEccDQSRcvrEn_D(pDCTstat, Channel);
973 /* 2.8.9.9.4
974 * ECC Byte Lane Training
975 * DQS Receiver Enable Delay
977 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
978 struct DCTStatStruc *pDCTstatA)
980 u8 Node;
981 u8 i;
983 for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
984 struct DCTStatStruc *pDCTstat;
985 pDCTstat = pDCTstatA + Node;
986 if (!pDCTstat->NodePresent)
987 break;
988 if (pDCTstat->DCTSysLimit) {
989 for(i=0; i<2; i++)
990 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
995 void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
996 struct DCTStatStruc *pDCTstatA)
998 u8 Node = 0;
999 struct DCTStatStruc *pDCTstat;
1001 /* FIXME: skip for Ax */
1002 while (Node < MAX_NODES_SUPPORTED) {
1003 pDCTstat = pDCTstatA + Node;
1005 if(pDCTstat->DCTSysLimit) {
1006 fenceDynTraining_D(pMCTstat, pDCTstat, 0);
1007 fenceDynTraining_D(pMCTstat, pDCTstat, 1);
1009 Node++;
1013 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
1014 struct DCTStatStruc *pDCTstat, u8 dct)
1016 u16 avRecValue;
1017 u32 val;
1018 u32 dev;
1019 u32 index_reg = 0x98 + 0x100 * dct;
1020 u32 index;
1022 /* BIOS first programs a seed value to the phase recovery engine
1023 * (recommended 19) registers.
1024 * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
1025 * F2x[1,0]9C_x52.) .
1027 dev = pDCTstat->dev_dct;
1028 for (index = 0x50; index <= 0x52; index ++) {
1029 val = (FenceTrnFinDlySeed & 0x1F);
1030 if (index != 0x52) {
1031 val |= val << 8 | val << 16 | val << 24;
1033 Set_NB32_index_wait(dev, index_reg, index, val);
1036 /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
1037 val = Get_NB32_index_wait(dev, index_reg, 0x08);
1038 val |= 1 << PhyFenceTrEn;
1039 Set_NB32_index_wait(dev, index_reg, 0x08, val);
1041 /* Wait 200 MEMCLKs. */
1042 mct_Wait(50000); /* wait 200us */
1044 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
1045 val = Get_NB32_index_wait(dev, index_reg, 0x08);
1046 val &= ~(1 << PhyFenceTrEn);
1047 Set_NB32_index_wait(dev, index_reg, 0x08, val);
1049 /* BIOS reads the phase recovery engine registers
1050 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1051 avRecValue = 0;
1052 for (index = 0x50; index <= 0x52; index ++) {
1053 val = Get_NB32_index_wait(dev, index_reg, index);
1054 avRecValue += val & 0x7F;
1055 if (index != 0x52) {
1056 avRecValue += (val >> 8) & 0x7F;
1057 avRecValue += (val >> 16) & 0x7F;
1058 avRecValue += (val >> 24) & 0x7F;
1062 val = avRecValue / 9;
1063 if (avRecValue % 9)
1064 val++;
1065 avRecValue = val;
1067 /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1068 /* inlined mct_AdjustFenceValue() */
1069 /* TODO: The RBC0 is not supported. */
1070 /* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
1071 avRecValue -= 3;
1072 else
1074 if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
1075 avRecValue -= 8;
1076 else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
1077 avRecValue -= 8;
1078 else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
1079 avRecValue -= 8;
1081 val = Get_NB32_index_wait(dev, index_reg, 0x0C);
1082 val &= ~(0x1F << 16);
1083 val |= (avRecValue & 0x1F) << 16;
1084 Set_NB32_index_wait(dev, index_reg, 0x0C, val);
1086 /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1087 * delays (both channels). */
1088 val = Get_NB32_index_wait(dev, index_reg, 0x04);
1089 Set_NB32_index_wait(dev, index_reg, 0x04, val);
1092 void mct_Wait(u32 cycles)
1094 u32 saved;
1095 u32 hi, lo, msr;
1097 /* Wait # of 50ns cycles
1098 This seems like a hack to me... */
1100 cycles <<= 3; /* x8 (number of 1.25ns ticks) */
1102 msr = 0x10; /* TSC */
1103 _RDMSR(msr, &lo, &hi);
1104 saved = lo;
1105 do {
1106 _RDMSR(msr, &lo, &hi);
1107 } while (lo - saved < cycles );