2 * This file is part of the coreboot project.
4 * Copyright (C) 2010 Advanced Micro Devices, Inc.
5 * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
18 /******************************************************************************
19 Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
20 ******************************************************************************/
22 static void dqsTrainRcvrEn_SW(struct MCTStatStruc
*pMCTstat
,
23 struct DCTStatStruc
*pDCTstat
, u8 Pass
);
24 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
25 struct DCTStatStruc
*pDCTstat
);
26 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
27 struct DCTStatStruc
*pDCTstat
, u8 Channel
);
28 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
29 struct DCTStatStruc
*pDCTstat
, u8 Channel
);
30 static void mct_SetMaxLatency_D(struct DCTStatStruc
*pDCTstat
, u8 Channel
, u16 DQSRcvEnDly
);
31 static void fenceDynTraining_D(struct MCTStatStruc
*pMCTstat
,
32 struct DCTStatStruc
*pDCTstat
, u8 dct
);
33 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc
*pDCTstat
);
35 /* Warning: These must be located so they do not cross a logical 16-bit
37 static const u32 TestPattern0_D
[] = {
38 0x55555555, 0x55555555, 0x55555555, 0x55555555,
39 0x55555555, 0x55555555, 0x55555555, 0x55555555,
40 0x55555555, 0x55555555, 0x55555555, 0x55555555,
41 0x55555555, 0x55555555, 0x55555555, 0x55555555,
43 static const u32 TestPattern1_D
[] = {
44 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
45 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
46 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
47 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
49 static const u32 TestPattern2_D
[] = {
50 0x12345678, 0x87654321, 0x23456789, 0x98765432,
51 0x59385824, 0x30496724, 0x24490795, 0x99938733,
52 0x40385642, 0x38465245, 0x29432163, 0x05067894,
53 0x12349045, 0x98723467, 0x12387634, 0x34587623,
56 static void SetupRcvrPattern(struct MCTStatStruc
*pMCTstat
,
57 struct DCTStatStruc
*pDCTstat
, u32
*buffer
, u8 pass
)
60 * 1. Copy the alpha and Beta patterns from ROM to Cache,
61 * aligning on 16 byte boundary
62 * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
63 * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
71 buf_a
= (u32
*)(((u32
)buffer
+ 0x10) & (0xfffffff0));
72 buf_b
= buf_a
+ 32; /* ?? */
73 p_A
= (u32
*)SetupDqsPattern_1PassB(pass
);
74 p_B
= (u32
*)SetupDqsPattern_1PassA(pass
);
81 pDCTstat
->PtrPatternBufA
= (u32
)buf_a
;
82 pDCTstat
->PtrPatternBufB
= (u32
)buf_b
;
85 void mct_TrainRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
86 struct DCTStatStruc
*pDCTstat
, u8 Pass
)
88 if(mct_checkNumberOfDqsRcvEn_1Pass(Pass
))
89 dqsTrainRcvrEn_SW(pMCTstat
, pDCTstat
, Pass
);
92 static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay
, uint32_t dev
, uint8_t dimm
, uint32_t index_reg
)
97 for (lane
= 0; lane
< MAX_BYTE_LANES
; lane
++) {
99 if ((lane
== 0) || (lane
== 1))
101 if ((lane
== 2) || (lane
== 3))
103 if ((lane
== 4) || (lane
== 5))
105 if ((lane
== 6) || (lane
== 7))
110 dword
= Get_NB32_index_wait(dev
, index_reg
, wdt_reg
);
111 if ((lane
== 7) || (lane
== 5) || (lane
== 3) || (lane
== 1))
112 current_total_delay
[lane
] = (dword
& 0x00ff0000) >> 16;
113 if ((lane
== 8) || (lane
== 6) || (lane
== 4) || (lane
== 2) || (lane
== 0))
114 current_total_delay
[lane
] = dword
& 0x000000ff;
118 static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay
, uint32_t dev
, uint8_t dimm
, uint32_t index_reg
)
123 for (lane
= 0; lane
< 8; lane
++) {
125 if ((lane
== 0) || (lane
== 1))
127 if ((lane
== 2) || (lane
== 3))
129 if ((lane
== 4) || (lane
== 5))
131 if ((lane
== 6) || (lane
== 7))
134 dword
= Get_NB32_index_wait(dev
, index_reg
, ret_reg
);
135 if ((lane
== 7) || (lane
== 5) || (lane
== 3) || (lane
== 1)) {
136 dword
&= ~(0x1ff << 16);
137 dword
|= (current_total_delay
[lane
] & 0x1ff) << 16;
139 if ((lane
== 6) || (lane
== 4) || (lane
== 2) || (lane
== 0)) {
141 dword
|= current_total_delay
[lane
] & 0x1ff;
143 Set_NB32_index_wait(dev
, index_reg
, ret_reg
, dword
);
147 static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc
*pDCTstat
, uint32_t testaddr
, uint8_t channel
)
149 SetUpperFSbase(testaddr
);
152 if((pDCTstat
->Status
& (1<<SB_128bitmode
)) && channel
) {
153 testaddr
+= 8; /* second channel */
159 /* DQS Receiver Enable Training
160 * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
162 static void dqsTrainRcvrEn_SW(struct MCTStatStruc
*pMCTstat
,
163 struct DCTStatStruc
*pDCTstat
, u8 Pass
)
169 u8 _DisableDramECC
= 0, _Wrap32Dis
= 0, _SSE2
= 0;
173 u32 TestAddr0
, TestAddr1
, TestAddr0B
, TestAddr1B
;
174 u32 PatternBuffer
[64+4]; /* FIXME: need increase 8? */
181 u32 ch_start
, ch_end
, ch
;
189 uint16_t current_total_delay
[MAX_BYTE_LANES
];
190 uint16_t candidate_total_delay
[8];
191 uint8_t data_test_pass_sr
[2][8]; /* [rank][lane] */
192 uint8_t data_test_pass
[8]; /* [lane] */
193 uint8_t data_test_pass_prev
[8]; /* [lane] */
194 uint8_t window_det_toggle
[8];
196 uint64_t result_qword1
;
197 uint64_t result_qword2
;
201 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat
->Node_ID
, 0);
202 print_debug_dqs("TrainRcvEn: Pass", Pass
, 0);
204 dev
= pDCTstat
->dev_dct
;
206 if(!pDCTstat
->GangedMode
) {
212 for (ch
= ch_start
; ch
< ch_end
; ch
++) {
213 reg
= 0x78 + (0x100 * ch
);
214 val
= Get_NB32(dev
, reg
);
215 val
&= ~(0x3ff << 22);
216 val
|= (0x0c8 << 22); /* Max Rd Lat */
217 Set_NB32(dev
, reg
, val
);
221 if (Pass
== FirstPass
) {
222 mct_InitDQSPos4RcvrEn_D(pMCTstat
, pDCTstat
);
224 pDCTstat
->DimmTrainFail
= 0;
225 pDCTstat
->CSTrainFail
= ~pDCTstat
->CSPresent
;
229 if(cr4
& ( 1 << 9)) { /* save the old value */
232 cr4
|= (1 << 9); /* OSFXSR enable SSE2 */
236 _RDMSR(msr
, &lo
, &hi
);
237 /* FIXME: Why use SSEDIS */
238 if(lo
& (1 << 17)) { /* save the old value */
241 lo
|= (1 << 17); /* HWCR.wrap32dis */
242 lo
&= ~(1 << 15); /* SSEDIS */
243 _WRMSR(msr
, lo
, hi
); /* Setting wrap32dis allows 64-bit memory references in real mode */
245 _DisableDramECC
= mct_DisableDimmEccEn_D(pMCTstat
, pDCTstat
);
247 SetupRcvrPattern(pMCTstat
, pDCTstat
, PatternBuffer
, Pass
);
250 dev
= pDCTstat
->dev_dct
;
252 for (Channel
= 0; Channel
< 2; Channel
++) {
253 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat
->Node_ID
, 1);
254 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel
, 1);
255 pDCTstat
->Channel
= Channel
;
258 MaxDelay_CH
[Channel
] = 0;
259 index_reg
= 0x98 + 0x100 * Channel
;
261 Receiver
= mct_InitReceiver_D(pDCTstat
, Channel
);
262 /* There are four receiver pairs, loosely associated with chipselects.
263 * This is essentially looping over each DIMM.
265 for (; Receiver
< 8; Receiver
+= 2) {
266 Addl_Index
= (Receiver
>> 1) * 3 + 0x10;
268 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index
, 2);
270 if (!mct_RcvrRankEnabled_D(pMCTstat
, pDCTstat
, Channel
, Receiver
)) {
274 /* Clear data structures */
275 for (lane
= 0; lane
< 8; lane
++) {
276 data_test_pass_prev
[lane
] = 0;
281 * Retrieve gross and fine timing fields from write DQS registers
283 read_dqs_write_timing_control_registers(current_total_delay
, dev
, (Receiver
>> 1), index_reg
);
286 * Program the Write Data Timing and Write ECC Timing register to
287 * the values stored in the DQS Write Timing Control register
290 for (lane
= 0; lane
< MAX_BYTE_LANES
; lane
++) {
293 /* Calculate Write Data Timing register location */
294 if ((lane
== 0) || (lane
== 1) || (lane
== 2) || (lane
== 3))
296 if ((lane
== 4) || (lane
== 5) || (lane
== 6) || (lane
== 7))
300 wdt_reg
|= ((Receiver
/ 2) << 8);
302 /* Set Write Data Timing register values */
303 dword
= Get_NB32_index_wait(dev
, index_reg
, wdt_reg
);
304 if ((lane
== 7) || (lane
== 3)) {
305 dword
&= ~(0x7f << 24);
306 dword
|= (current_total_delay
[lane
] & 0x7f) << 24;
308 if ((lane
== 6) || (lane
== 2)) {
309 dword
&= ~(0x7f << 16);
310 dword
|= (current_total_delay
[lane
] & 0x7f) << 16;
312 if ((lane
== 5) || (lane
== 1)) {
313 dword
&= ~(0x7f << 8);
314 dword
|= (current_total_delay
[lane
] & 0x7f) << 8;
316 if ((lane
== 8) || (lane
== 4) || (lane
== 0)) {
318 dword
|= current_total_delay
[lane
] & 0x7f;
320 Set_NB32_index_wait(dev
, index_reg
, wdt_reg
, dword
);
324 * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
325 * to 1/2 MEMCLK for all lanes
327 for (lane
= 0; lane
< MAX_BYTE_LANES
; lane
++) {
329 if ((lane
== 0) || (lane
== 1) || (lane
== 2) || (lane
== 3))
331 if ((lane
== 4) || (lane
== 5) || (lane
== 6) || (lane
== 7))
335 rdt_reg
|= ((Receiver
/ 2) << 8);
340 Set_NB32_index_wait(dev
, index_reg
, rdt_reg
, dword
);
344 * Select two test addresses for each rank present
346 TestAddr0
= mct_GetRcvrSysAddr_D(pMCTstat
, pDCTstat
, Channel
, Receiver
, &valid
);
347 if (!valid
) { /* Address not supported on current CS */
351 TestAddr0B
= TestAddr0
+ (BigPagex8_RJ8
<< 3);
353 if(mct_RcvrRankEnabled_D(pMCTstat
, pDCTstat
, Channel
, Receiver
+1)) {
354 TestAddr1
= mct_GetRcvrSysAddr_D(pMCTstat
, pDCTstat
, Channel
, Receiver
+1, &valid
);
355 if(!valid
) { /* Address not supported on current CS */
358 TestAddr1B
= TestAddr1
+ (BigPagex8_RJ8
<< 3);
361 _2Ranks
= TestAddr1
= TestAddr1B
= 0;
364 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0
, 2);
365 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B
, 2);
366 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1
, 2);
367 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B
, 2);
370 * Write 1 cache line of the appropriate test pattern to each test addresse
372 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr0
, 0); /* rank 0 of DIMM, testpattern 0 */
373 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr0B
, 1); /* rank 0 of DIMM, testpattern 1 */
375 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr1
, 0); /*rank 1 of DIMM, testpattern 0 */
376 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr1B
, 1); /*rank 1 of DIMM, testpattern 1 */
379 #if DQS_TRAIN_DEBUG > 0
380 for (lane
= 0; lane
< 8; lane
++) {
381 print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane
, 2);
382 print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay
[lane
], 2);
387 * Write gross and fine timing fields to read DQS registers
389 write_dqs_receiver_enable_control_registers(current_total_delay
, dev
, (Receiver
>> 1), index_reg
);
392 * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
395 * It is not clear if training should be discontinued if any test failures occur in the first
396 * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK
397 * window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side.
399 uint16_t current_delay_step
;
401 for (current_delay_step
= 0; current_delay_step
< 0x80; current_delay_step
++) {
402 print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step
, 3);
405 * Terminate if all lanes are trained
407 uint8_t all_lanes_trained
= 1;
408 for (lane
= 0; lane
< 8; lane
++)
410 all_lanes_trained
= 0;
412 if (all_lanes_trained
)
416 * Loop over all ranks
418 for (rank
= 0; rank
< (_2Ranks
+ 1); rank
++) {
419 /* 2.8.9.9.2 (7 A a-d)
420 * Read the first test address of the current rank
421 * Store the first data beat for analysis
422 * Reset read pointer in the DRAM controller FIFO
423 * Read the second test address of the current rank
424 * Store the first data beat for analysis
425 * Reset read pointer in the DRAM controller FIFO
429 * Invert read instructions to alternate data read order on the bus
431 proc_IOCLFLUSH_D((rank
== 0)?TestAddr0B
:TestAddr1B
);
432 result_qword2
= read64_fs(convert_testaddr_and_channel_to_address(pDCTstat
, (rank
== 0)?TestAddr0B
:TestAddr1B
, Channel
));
433 write_dqs_receiver_enable_control_registers(current_total_delay
, dev
, (Receiver
>> 1), index_reg
);
434 proc_IOCLFLUSH_D((rank
== 0)?TestAddr0
:TestAddr1
);
435 result_qword1
= read64_fs(convert_testaddr_and_channel_to_address(pDCTstat
, (rank
== 0)?TestAddr0
:TestAddr1
, Channel
));
436 write_dqs_receiver_enable_control_registers(current_total_delay
, dev
, (Receiver
>> 1), index_reg
);
438 proc_IOCLFLUSH_D((rank
== 0)?TestAddr0
:TestAddr1
);
439 result_qword1
= read64_fs(convert_testaddr_and_channel_to_address(pDCTstat
, (rank
== 0)?TestAddr0
:TestAddr1
, Channel
));
440 write_dqs_receiver_enable_control_registers(current_total_delay
, dev
, (Receiver
>> 1), index_reg
);
441 proc_IOCLFLUSH_D((rank
== 0)?TestAddr0B
:TestAddr1B
);
442 result_qword2
= read64_fs(convert_testaddr_and_channel_to_address(pDCTstat
, (rank
== 0)?TestAddr0B
:TestAddr1B
, Channel
));
443 write_dqs_receiver_enable_control_registers(current_total_delay
, dev
, (Receiver
>> 1), index_reg
);
446 * Compare both read patterns and flag passing ranks/lanes
448 uint8_t result_lane_byte1
;
449 uint8_t result_lane_byte2
;
450 for (lane
= 0; lane
< 8; lane
++) {
451 if (trained
[lane
] == 1) {
452 #if DQS_TRAIN_DEBUG > 0
453 print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane
, 4);
458 result_lane_byte1
= (result_qword1
>> (lane
* 8)) & 0xff;
459 result_lane_byte2
= (result_qword2
>> (lane
* 8)) & 0xff;
460 if ((result_lane_byte1
== 0x55) && (result_lane_byte2
== 0xaa))
461 data_test_pass_sr
[rank
][lane
] = 1;
463 data_test_pass_sr
[rank
][lane
] = 0;
464 #if DQS_TRAIN_DEBUG > 0
465 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1
, 4);
466 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2
, 4);
472 * If DIMM is dual rank, only use delays that pass testing for both ranks
474 for (lane
= 0; lane
< 8; lane
++) {
476 if ((data_test_pass_sr
[0][lane
]) && (data_test_pass_sr
[1][lane
]))
477 data_test_pass
[lane
] = 1;
479 data_test_pass
[lane
] = 0;
481 data_test_pass
[lane
] = data_test_pass_sr
[0][lane
];
486 * For each lane, update the DQS receiver delay setting in support of next iteration
488 for (lane
= 0; lane
< 8; lane
++) {
489 if (trained
[lane
] == 1)
493 * Save the total delay of the first success after a failure for later use
495 if ((data_test_pass
[lane
] == 1) && (data_test_pass_prev
[lane
] == 0)) {
496 candidate_total_delay
[lane
] = current_total_delay
[lane
];
497 window_det_toggle
[lane
] = 0;
501 * If the current delay failed testing add 1/8 UI to the current delay
503 if (data_test_pass
[lane
] == 0)
504 current_total_delay
[lane
] += 0x4;
507 * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay
508 * If 1.25 UI of delay have been added with no failures the lane is considered trained
510 if (data_test_pass
[lane
] == 1) {
511 /* See if lane is trained */
512 if ((current_total_delay
[lane
] - candidate_total_delay
[lane
]) >= 0x28) {
515 /* Calculate and set final lane delay value
516 * The final delay is the candidate delay + 7/8 UI
518 current_total_delay
[lane
] = candidate_total_delay
[lane
] + 0x1c;
520 if (window_det_toggle
[lane
] == 0) {
521 current_total_delay
[lane
] += 0x1;
522 window_det_toggle
[lane
] = 1;
524 current_total_delay
[lane
] += 0x8;
525 window_det_toggle
[lane
] = 0;
531 /* Update delays in hardware */
532 write_dqs_receiver_enable_control_registers(current_total_delay
, dev
, (Receiver
>> 1), index_reg
);
534 /* Save previous results for comparison in the next iteration */
535 for (lane
= 0; lane
< 8; lane
++)
536 data_test_pass_prev
[lane
] = data_test_pass
[lane
];
539 #if DQS_TRAIN_DEBUG > 0
540 for (lane
= 0; lane
< 8; lane
++)
541 print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane
, " current_total_delay ", current_total_delay
[lane
], 2);
544 /* Find highest delay value and save for later use */
545 for (lane
= 0; lane
< 8; lane
++)
546 if (current_total_delay
[lane
] > CTLRMaxDelay
)
547 CTLRMaxDelay
= current_total_delay
[lane
];
549 /* See if any lanes failed training, and set error flags appropriately
550 * For all trained lanes, save delay values for later use
552 for (lane
= 0; lane
< 8; lane
++) {
554 pDCTstat
->CH_D_B_RCVRDLY
[Channel
][Receiver
>> 1][lane
] = current_total_delay
[lane
];
556 printk(BIOS_WARNING
, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane
, Receiver
, Channel
);
558 /* Set error flags */
559 pDCTstat
->ErrStatus
|= 1 << SB_NORCVREN
;
560 Errors
|= 1 << SB_NORCVREN
;
561 pDCTstat
->ErrCode
= SC_FatalErr
;
562 pDCTstat
->CSTrainFail
|= 1 << Receiver
;
563 pDCTstat
->DimmTrainFail
|= 1 << (Receiver
+ Channel
);
568 * Flush the receiver FIFO
569 * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO
572 WriteLNTestPattern(TestAddr0
<< 8, (uint8_t *)TestPattern2_D
, 1);
573 mct_Read1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr0
);
575 MaxDelay_CH
[Channel
] = CTLRMaxDelay
;
578 CTLRMaxDelay
= MaxDelay_CH
[0];
579 if (MaxDelay_CH
[1] > CTLRMaxDelay
)
580 CTLRMaxDelay
= MaxDelay_CH
[1];
582 for (Channel
= 0; Channel
< 2; Channel
++) {
583 mct_SetMaxLatency_D(pDCTstat
, Channel
, CTLRMaxDelay
); /* program Ch A/B MaxAsyncLat to correspond with max delay */
586 ResetDCTWrPtr_D(dev
, index_reg
, Addl_Index
);
588 if(_DisableDramECC
) {
589 mct_EnableDimmEccEn_D(pMCTstat
, pDCTstat
, _DisableDramECC
);
592 if (Pass
== FirstPass
) {
593 /*Disable DQSRcvrEn training mode */
594 mct_DisableDQSRcvEn_D(pDCTstat
);
599 _RDMSR(msr
, &lo
, &hi
);
600 lo
&= ~(1<<17); /* restore HWCR.wrap32dis */
605 cr4
&= ~(1<<9); /* restore cr4.OSFXSR */
609 #if DQS_TRAIN_DEBUG > 0
612 printk(BIOS_DEBUG
, "TrainRcvrEn: CH_MaxRdLat:\n");
613 for(ChannelDTD
= 0; ChannelDTD
<2; ChannelDTD
++) {
614 printk(BIOS_DEBUG
, "Channel:%x: %x\n",
615 ChannelDTD
, pDCTstat
->CH_MaxRdLat
[ChannelDTD
]);
620 #if DQS_TRAIN_DEBUG > 0
623 u8 ChannelDTD
, ReceiverDTD
;
627 printk(BIOS_DEBUG
, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
628 for(ChannelDTD
= 0; ChannelDTD
< 2; ChannelDTD
++) {
629 printk(BIOS_DEBUG
, "Channel:%x\n", ChannelDTD
);
630 for(ReceiverDTD
= 0; ReceiverDTD
<8; ReceiverDTD
+=2) {
631 printk(BIOS_DEBUG
, "\t\tReceiver:%x:", ReceiverDTD
);
632 p
= pDCTstat
->CH_D_B_RCVRDLY
[ChannelDTD
][ReceiverDTD
>>1];
635 printk(BIOS_DEBUG
, " %03x", valDTD
);
637 printk(BIOS_DEBUG
, "\n");
643 printk(BIOS_DEBUG
, "TrainRcvrEn: Status %x\n", pDCTstat
->Status
);
644 printk(BIOS_DEBUG
, "TrainRcvrEn: ErrStatus %x\n", pDCTstat
->ErrStatus
);
645 printk(BIOS_DEBUG
, "TrainRcvrEn: ErrCode %x\n", pDCTstat
->ErrCode
);
646 printk(BIOS_DEBUG
, "TrainRcvrEn: Done\n\n");
649 u8
mct_InitReceiver_D(struct DCTStatStruc
*pDCTstat
, u8 dct
)
651 if (pDCTstat
->DIMMValidDCT
[dct
] == 0 ) {
658 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc
*pDCTstat
)
665 dev
= pDCTstat
->dev_dct
;
666 if (pDCTstat
->GangedMode
) {
672 for (ch
=0; ch
<ch_end
; ch
++) {
673 reg
= 0x78 + 0x100 * ch
;
674 val
= Get_NB32(dev
, reg
);
675 val
&= ~(1 << DqsRcvEnTrain
);
676 Set_NB32(dev
, reg
, val
);
681 * Function only used once so it was inlined.
685 * Function only used once so it was inlined.
688 /* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers
689 * See BKDG Rev. 3.62 page 268 for more information
691 void mct_SetRcvrEnDly_D(struct DCTStatStruc
*pDCTstat
, u16 RcvrEnDly
,
692 u8 FinalValue
, u8 Channel
, u8 Receiver
, u32 dev
,
693 u32 index_reg
, u8 Addl_Index
, u8 Pass
)
700 if(RcvrEnDly
== 0x1fe) {
701 /*set the boundary flag */
702 pDCTstat
->Status
|= 1 << SB_DQSRcvLimit
;
705 /* DimmOffset not needed for CH_D_B_RCVRDLY array */
706 for(i
=0; i
< 8; i
++) {
708 /*calculate dimm offset */
709 p
= pDCTstat
->CH_D_B_RCVRDLY
[Channel
][Receiver
>> 1];
713 /* if flag=0, set DqsRcvEn value to reg. */
714 /* get the register index from table */
715 index
= Table_DQSRcvEn_Offset
[i
>> 1];
716 index
+= Addl_Index
; /* DIMMx DqsRcvEn byte0 */
717 val
= Get_NB32_index_wait(dev
, index_reg
, index
);
720 val
&= ~(0x1ff << 16);
721 val
|= ((RcvrEnDly
& 0x1ff) << 16);
725 val
|= (RcvrEnDly
& 0x1ff);
727 Set_NB32_index_wait(dev
, index_reg
, index
, val
);
732 /* Calculate MaxRdLatency
733 * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5
735 static void mct_SetMaxLatency_D(struct DCTStatStruc
*pDCTstat
, u8 Channel
, u16 DQSRcvEnDly
)
747 u16 freq_tab
[] = {400, 533, 667, 800};
749 /* Set up processor-dependent values */
750 if (pDCTstat
->LogicalCPUID
& AMD_DR_Dx
) {
751 /* Revision D and above */
754 } else if (pDCTstat
->LogicalCPUID
& AMD_DR_Cx
) {
756 uint8_t package_type
= mctGet_NVbits(NV_PACK_TYPE
);
757 if ((package_type
== PT_L1
) /* Socket F (1207) */
758 || (package_type
== PT_M2
) /* Socket AM3 */
759 || (package_type
== PT_S1
)) { /* Socket S1g<x> */
767 /* Revision B and below */
772 if(pDCTstat
->GangedMode
)
775 dev
= pDCTstat
->dev_dct
;
776 reg_off
= 0x100 * Channel
;
777 index_reg
= 0x98 + reg_off
;
779 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
780 val
= Get_NB32(dev
, 0x88 + reg_off
);
781 SubTotal
= ((val
& 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */
783 /* If registered DIMMs are being used then
784 * add 1 MEMCLK to the sub-total.
786 val
= Get_NB32(dev
, 0x90 + reg_off
);
787 if(!(val
& (1 << UnBuffDimm
)))
790 /* If the address prelaunch is setup for 1/2 MEMCLKs then
791 * add 1, else add 2 to the sub-total.
792 * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
794 val
= Get_NB32_index_wait(dev
, index_reg
, 0x04);
795 if(!(val
& 0x00202020))
800 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
801 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
802 val
= Get_NB32(dev
, 0x78 + reg_off
);
803 SubTotal
+= 8 - (val
& 0x0f);
805 /* Convert bits 7-5 (also referred to as the coarse delay) of
806 * the current (or worst case) DQS receiver enable delay to
807 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
809 SubTotal
+= DQSRcvEnDly
>> 5; /* Retrieve gross delay portion of value */
811 /* Add "P" to the sub-total. "P" represents part of the
812 * processor specific constant delay value in the DRAM
815 SubTotal
<<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
816 SubTotal
+= cpu_val_p
; /*add "P" 1/2MemClk */
817 SubTotal
>>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */
819 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
822 SubTotal
*= 200 * ((Get_NB32(pDCTstat
->dev_nbmisc
, 0xd4) & 0x1f) + 4);
823 SubTotal
/= freq_tab
[((Get_NB32(pDCTstat
->dev_dct
, 0x94 + reg_off
) & 0x7) - 3)];
824 SubTotal
= (SubTotal
+ (2 - 1)) / 2; /* Round up */
826 /* Add "N" NCLKs to the sub-total. "N" represents part of the
827 * processor specific constant value in the northbridge
830 SubTotal
+= (cpu_val_n
) / 2;
832 pDCTstat
->CH_MaxRdLat
[Channel
] = SubTotal
;
833 if(pDCTstat
->GangedMode
) {
834 pDCTstat
->CH_MaxRdLat
[1] = SubTotal
;
837 /* Program the F2x[1, 0]78[MaxRdLatency] register with
838 * the total delay value (in NCLKs).
840 reg
= 0x78 + reg_off
;
841 val
= Get_NB32(dev
, reg
);
842 val
&= ~(0x3ff << 22);
843 val
|= (SubTotal
& 0x3ff) << 22;
845 /* program MaxRdLatency to correspond with current delay */
846 Set_NB32(dev
, reg
, val
);
849 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
850 struct DCTStatStruc
*pDCTstat
)
852 /* Initialize the DQS Positions in preparation for
853 * Receiver Enable Training.
854 * Write Position is 1/2 Memclock Delay
855 * Read Position is 1/2 Memclock Delay
859 InitDQSPos4RcvrEn_D(pMCTstat
, pDCTstat
, i
);
863 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
864 struct DCTStatStruc
*pDCTstat
, u8 Channel
)
866 /* Initialize the DQS Positions in preparation for
867 * Receiver Enable Training.
868 * Write Position is no Delay
869 * Read Position is 1/2 Memclock Delay
874 u8 dn
= 4; /* TODO: Rev C could be 4 */
875 u32 dev
= pDCTstat
->dev_dct
;
876 u32 index_reg
= 0x98 + 0x100 * Channel
;
878 /* FIXME: add Cx support */
880 for(i
=1; i
<=3; i
++) {
882 /* DIMM0 Write Data Timing Low */
883 /* DIMM0 Write ECC Timing */
884 Set_NB32_index_wait(dev
, index_reg
, i
+ 0x100 * j
, dword
);
889 for(i
=5; i
<=6; i
++) {
891 /* DIMM0 Read DQS Timing Control Low */
892 Set_NB32_index_wait(dev
, index_reg
, i
+ 0x100 * j
, dword
);
897 /* DIMM0 Read DQS ECC Timing Control */
898 Set_NB32_index_wait(dev
, index_reg
, 7 + 0x100 * j
, dword
);
901 void SetEccDQSRcvrEn_D(struct DCTStatStruc
*pDCTstat
, u8 Channel
)
910 dev
= pDCTstat
->dev_dct
;
911 index_reg
= 0x98 + Channel
* 0x100;
913 p
= pDCTstat
->CH_D_BC_RCVRDLY
[Channel
];
914 print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel
, 2);
915 for(ChipSel
= 0; ChipSel
< MAX_CS_SUPPORTED
; ChipSel
+= 2) {
917 Set_NB32_index_wait(dev
, index_reg
, index
, val
);
918 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
919 ChipSel
, " rcvr_delay ", val
, 2);
924 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
925 struct DCTStatStruc
*pDCTstat
, u8 Channel
)
932 EccDQSLike
= pDCTstat
->CH_EccDQSLike
[Channel
];
933 EccDQSScale
= pDCTstat
->CH_EccDQSScale
[Channel
];
935 for (ChipSel
= 0; ChipSel
< MAX_CS_SUPPORTED
; ChipSel
+= 2) {
936 if(mct_RcvrRankEnabled_D(pMCTstat
, pDCTstat
, Channel
, ChipSel
)) {
938 p
= pDCTstat
->CH_D_B_RCVRDLY
[Channel
][ChipSel
>>1];
940 /* DQS Delay Value of Data Bytelane
941 * most like ECC byte lane */
942 val0
= p
[EccDQSLike
& 0x07];
943 /* DQS Delay Value of Data Bytelane
944 * 2nd most like ECC byte lane */
945 val1
= p
[(EccDQSLike
>>8) & 0x07];
947 if (!(pDCTstat
->Status
& (1 << SB_Registered
))) {
955 val
>>= 8; /* /256 */
967 pDCTstat
->CH_D_BC_RCVRDLY
[Channel
][ChipSel
>>1] = val
;
970 SetEccDQSRcvrEn_D(pDCTstat
, Channel
);
974 * ECC Byte Lane Training
975 * DQS Receiver Enable Delay
977 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
978 struct DCTStatStruc
*pDCTstatA
)
983 for (Node
= 0; Node
< MAX_NODES_SUPPORTED
; Node
++) {
984 struct DCTStatStruc
*pDCTstat
;
985 pDCTstat
= pDCTstatA
+ Node
;
986 if (!pDCTstat
->NodePresent
)
988 if (pDCTstat
->DCTSysLimit
) {
990 CalcEccDQSRcvrEn_D(pMCTstat
, pDCTstat
, i
);
995 void phyAssistedMemFnceTraining(struct MCTStatStruc
*pMCTstat
,
996 struct DCTStatStruc
*pDCTstatA
)
999 struct DCTStatStruc
*pDCTstat
;
1001 /* FIXME: skip for Ax */
1002 while (Node
< MAX_NODES_SUPPORTED
) {
1003 pDCTstat
= pDCTstatA
+ Node
;
1005 if(pDCTstat
->DCTSysLimit
) {
1006 fenceDynTraining_D(pMCTstat
, pDCTstat
, 0);
1007 fenceDynTraining_D(pMCTstat
, pDCTstat
, 1);
1013 static void fenceDynTraining_D(struct MCTStatStruc
*pMCTstat
,
1014 struct DCTStatStruc
*pDCTstat
, u8 dct
)
1019 u32 index_reg
= 0x98 + 0x100 * dct
;
1022 /* BIOS first programs a seed value to the phase recovery engine
1023 * (recommended 19) registers.
1024 * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
1025 * F2x[1,0]9C_x52.) .
1027 dev
= pDCTstat
->dev_dct
;
1028 for (index
= 0x50; index
<= 0x52; index
++) {
1029 val
= (FenceTrnFinDlySeed
& 0x1F);
1030 if (index
!= 0x52) {
1031 val
|= val
<< 8 | val
<< 16 | val
<< 24;
1033 Set_NB32_index_wait(dev
, index_reg
, index
, val
);
1036 /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
1037 val
= Get_NB32_index_wait(dev
, index_reg
, 0x08);
1038 val
|= 1 << PhyFenceTrEn
;
1039 Set_NB32_index_wait(dev
, index_reg
, 0x08, val
);
1041 /* Wait 200 MEMCLKs. */
1042 mct_Wait(50000); /* wait 200us */
1044 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
1045 val
= Get_NB32_index_wait(dev
, index_reg
, 0x08);
1046 val
&= ~(1 << PhyFenceTrEn
);
1047 Set_NB32_index_wait(dev
, index_reg
, 0x08, val
);
1049 /* BIOS reads the phase recovery engine registers
1050 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1052 for (index
= 0x50; index
<= 0x52; index
++) {
1053 val
= Get_NB32_index_wait(dev
, index_reg
, index
);
1054 avRecValue
+= val
& 0x7F;
1055 if (index
!= 0x52) {
1056 avRecValue
+= (val
>> 8) & 0x7F;
1057 avRecValue
+= (val
>> 16) & 0x7F;
1058 avRecValue
+= (val
>> 24) & 0x7F;
1062 val
= avRecValue
/ 9;
1067 /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1068 /* inlined mct_AdjustFenceValue() */
1069 /* TODO: The RBC0 is not supported. */
1070 /* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
1074 if (pDCTstat
->LogicalCPUID
& AMD_DR_Dx
)
1076 else if (pDCTstat
->LogicalCPUID
& AMD_DR_Cx
)
1078 else if (pDCTstat
->LogicalCPUID
& AMD_DR_Bx
)
1081 val
= Get_NB32_index_wait(dev
, index_reg
, 0x0C);
1082 val
&= ~(0x1F << 16);
1083 val
|= (avRecValue
& 0x1F) << 16;
1084 Set_NB32_index_wait(dev
, index_reg
, 0x0C, val
);
1086 /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1087 * delays (both channels). */
1088 val
= Get_NB32_index_wait(dev
, index_reg
, 0x04);
1089 Set_NB32_index_wait(dev
, index_reg
, 0x04, val
);
1092 void mct_Wait(u32 cycles
)
1097 /* Wait # of 50ns cycles
1098 This seems like a hack to me... */
1100 cycles
<<= 3; /* x8 (number of 1.25ns ticks) */
1102 msr
= 0x10; /* TSC */
1103 _RDMSR(msr
, &lo
, &hi
);
1106 _RDMSR(msr
, &lo
, &hi
);
1107 } while (lo
- saved
< cycles
);