2 * This file is part of the coreboot project.
4 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
17 /******************************************************************************
18 Description: Receiver En and DQS Timing Training feature for DDR 2 MCT
19 ******************************************************************************/
21 static void dqsTrainRcvrEn_SW(struct MCTStatStruc
*pMCTstat
,
22 struct DCTStatStruc
*pDCTstat
, u8 Pass
);
23 static u8
mct_SavePassRcvEnDly_D(struct DCTStatStruc
*pDCTstat
,
24 u8 rcvrEnDly
, u8 Channel
,
25 u8 receiver
, u8 Pass
);
26 static u8
mct_CompareTestPatternQW0_D(struct MCTStatStruc
*pMCTstat
,
27 struct DCTStatStruc
*pDCTstat
,
30 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
31 struct DCTStatStruc
*pDCTstat
);
32 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
33 struct DCTStatStruc
*pDCTstat
, u8 Channel
);
34 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
35 struct DCTStatStruc
*pDCTstat
, u8 Channel
);
36 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc
*pDCTstat
,
37 u8 RcvrEnDly
, u8 where
,
38 u8 Channel
, u8 Receiver
,
39 u32 dev
, u32 index_reg
,
40 u8 Addl_Index
, u8 Pass
);
41 static void mct_SetMaxLatency_D(struct DCTStatStruc
*pDCTstat
, u8 Channel
, u8 DQSRcvEnDly
);
42 static void fenceDynTraining_D(struct MCTStatStruc
*pMCTstat
,
43 struct DCTStatStruc
*pDCTstat
, u8 dct
);
44 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc
*pDCTstat
);
47 /* Warning: These must be located so they do not cross a logical 16-bit
49 static const u32 TestPattern0_D
[] = {
50 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
51 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
52 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
53 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
55 static const u32 TestPattern1_D
[] = {
56 0x55555555, 0x55555555, 0x55555555, 0x55555555,
57 0x55555555, 0x55555555, 0x55555555, 0x55555555,
58 0x55555555, 0x55555555, 0x55555555, 0x55555555,
59 0x55555555, 0x55555555, 0x55555555, 0x55555555,
61 static const u32 TestPattern2_D
[] = {
62 0x12345678, 0x87654321, 0x23456789, 0x98765432,
63 0x59385824, 0x30496724, 0x24490795, 0x99938733,
64 0x40385642, 0x38465245, 0x29432163, 0x05067894,
65 0x12349045, 0x98723467, 0x12387634, 0x34587623,
68 static void SetupRcvrPattern(struct MCTStatStruc
*pMCTstat
,
69 struct DCTStatStruc
*pDCTstat
, u32
*buffer
, u8 pass
)
72 * 1. Copy the alpha and Beta patterns from ROM to Cache,
73 * aligning on 16 byte boundary
74 * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
75 * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
84 buf_a
= (u32
*)(((u32
)buffer
+ 0x10) & (0xfffffff0));
85 buf_b
= buf_a
+ 32; //??
86 p_A
= (u32
*)SetupDqsPattern_1PassB(pass
);
87 p_B
= (u32
*)SetupDqsPattern_1PassA(pass
);
94 pDCTstat
->PtrPatternBufA
= (u32
)buf_a
;
95 pDCTstat
->PtrPatternBufB
= (u32
)buf_b
;
99 void mct_TrainRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
100 struct DCTStatStruc
*pDCTstat
, u8 Pass
)
102 if(mct_checkNumberOfDqsRcvEn_1Pass(Pass
))
103 dqsTrainRcvrEn_SW(pMCTstat
, pDCTstat
, Pass
);
107 static void dqsTrainRcvrEn_SW(struct MCTStatStruc
*pMCTstat
,
108 struct DCTStatStruc
*pDCTstat
, u8 Pass
)
110 u8 Channel
, RcvrEnDly
, RcvrEnDlyRmin
;
111 u8 Test0
, Test1
, CurrTest
, CurrTestSide0
, CurrTestSide1
;
112 u8 CTLRMaxDelay
, _2Ranks
, PatternA
, PatternB
;
115 u8 _DisableDramECC
= 0, _Wrap32Dis
= 0, _SSE2
= 0;
116 u8 RcvrEnDlyLimit
, Final_Value
, MaxDelay_CH
[2];
117 u32 TestAddr0
, TestAddr1
, TestAddr0B
, TestAddr1B
;
118 u32 PatternBuffer
[64+4]; /* FIXME: need increase 8? */
125 u32 ch_start
, ch_end
, ch
;
134 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat
->Node_ID
, 0);
135 print_debug_dqs("TrainRcvEn: Pass", Pass
, 0);
138 dev
= pDCTstat
->dev_dct
;
140 if(!pDCTstat
->GangedMode
) {
146 for (ch
= ch_start
; ch
< ch_end
; ch
++) {
147 reg
= 0x78 + (0x100 * ch
);
148 val
= Get_NB32(dev
, reg
);
149 val
&= ~(0x3ff << 22);
150 val
|= (0x0c8 << 22); /* Max Rd Lat */
151 Set_NB32(dev
, reg
, val
);
155 if (Pass
== FirstPass
) {
156 mct_InitDQSPos4RcvrEn_D(pMCTstat
, pDCTstat
);
158 pDCTstat
->DimmTrainFail
= 0;
159 pDCTstat
->CSTrainFail
= ~pDCTstat
->CSPresent
;
161 print_t("TrainRcvrEn: 1\n");
164 if(cr4
& ( 1 << 9)) { /* save the old value */
167 cr4
|= (1 << 9); /* OSFXSR enable SSE2 */
169 print_t("TrainRcvrEn: 2\n");
172 _RDMSR(msr
, &lo
, &hi
);
173 //FIXME: Why use SSEDIS
174 if(lo
& (1 << 17)) { /* save the old value */
177 lo
|= (1 << 17); /* HWCR.wrap32dis */
178 lo
&= ~(1 << 15); /* SSEDIS */
179 _WRMSR(msr
, lo
, hi
); /* Setting wrap32dis allows 64-bit memory references in real mode */
180 print_t("TrainRcvrEn: 3\n");
182 _DisableDramECC
= mct_DisableDimmEccEn_D(pMCTstat
, pDCTstat
);
185 if(pDCTstat
->Speed
== 1) {
186 pDCTstat
->T1000
= 5000; /* get the T1000 figure (cycle time (ns)*1K */
187 } else if(pDCTstat
->Speed
== 2) {
188 pDCTstat
->T1000
= 3759;
189 } else if(pDCTstat
->Speed
== 3) {
190 pDCTstat
->T1000
= 3003;
191 } else if(pDCTstat
->Speed
== 4) {
192 pDCTstat
->T1000
= 2500;
193 } else if(pDCTstat
->Speed
== 5) {
194 pDCTstat
->T1000
= 1876;
199 SetupRcvrPattern(pMCTstat
, pDCTstat
, PatternBuffer
, Pass
);
200 print_t("TrainRcvrEn: 4\n");
203 dev
= pDCTstat
->dev_dct
;
206 for (Channel
= 0; Channel
< 2; Channel
++) {
207 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat
->Node_ID
, 1);
208 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel
, 1);
209 pDCTstat
->Channel
= Channel
;
211 MaxDelay_CH
[Channel
] = 0;
212 index_reg
= 0x98 + 0x100 * Channel
;
214 Receiver
= mct_InitReceiver_D(pDCTstat
, Channel
);
215 /* There are four receiver pairs, loosely associated with chipselects. */
216 for (; Receiver
< 8; Receiver
+= 2) {
217 Addl_Index
= (Receiver
>> 1) * 3 + 0x10;
220 /* mct_ModifyIndex_D */
221 RcvrEnDlyRmin
= RcvrEnDlyLimit
= 0xff;
223 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index
, 2);
225 if(!mct_RcvrRankEnabled_D(pMCTstat
, pDCTstat
, Channel
, Receiver
)) {
226 print_t("\t\t\tRank not enabled_D\n");
230 TestAddr0
= mct_GetRcvrSysAddr_D(pMCTstat
, pDCTstat
, Channel
, Receiver
, &valid
);
231 if(!valid
) { /* Address not supported on current CS */
232 print_t("\t\t\tAddress not supported on current CS\n");
236 TestAddr0B
= TestAddr0
+ (BigPagex8_RJ8
<< 3);
238 if(mct_RcvrRankEnabled_D(pMCTstat
, pDCTstat
, Channel
, Receiver
+1)) {
239 TestAddr1
= mct_GetRcvrSysAddr_D(pMCTstat
, pDCTstat
, Channel
, Receiver
+1, &valid
);
240 if(!valid
) { /* Address not supported on current CS */
241 print_t("\t\t\tAddress not supported on current CS+1\n");
244 TestAddr1B
= TestAddr1
+ (BigPagex8_RJ8
<< 3);
247 _2Ranks
= TestAddr1
= TestAddr1B
= 0;
250 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0
, 2);
251 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B
, 2);
252 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1
, 2);
253 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B
, 2);
256 * Get starting RcvrEnDly value
258 RcvrEnDly
= mct_Get_Start_RcvrEnDly_1Pass(Pass
);
260 /* mct_GetInitFlag_D*/
261 if (Pass
== FirstPass
) {
262 pDCTstat
->DqsRcvEn_Pass
= 0;
264 pDCTstat
->DqsRcvEn_Pass
=0xFF;
266 pDCTstat
->DqsRcvEn_Saved
= 0;
269 while(RcvrEnDly
< RcvrEnDlyLimit
) { /* sweep Delay value here */
270 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly
, 3);
272 /* callback not required
273 if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
277 /* Odd steps get another pattern such that even
278 and odd steps alternate. The pointers to the
279 patterns will be swaped at the end of the loop
280 so that they correspond. */
290 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr0
, PatternA
); /* rank 0 of DIMM, testpattern 0 */
291 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr0B
, PatternB
); /* rank 0 of DIMM, testpattern 1 */
293 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr1
, PatternA
); /*rank 1 of DIMM, testpattern 0 */
294 mct_Write1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr1B
, PatternB
); /*rank 1 of DIMM, testpattern 1 */
297 mct_SetRcvrEnDly_D(pDCTstat
, RcvrEnDly
, 0, Channel
, Receiver
, dev
, index_reg
, Addl_Index
, Pass
);
300 CurrTestSide0
= DQS_FAIL
;
301 CurrTestSide1
= DQS_FAIL
;
303 mct_Read1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr0
); /*cache fills */
304 Test0
= mct_CompareTestPatternQW0_D(pMCTstat
, pDCTstat
, TestAddr0
, Channel
, PatternA
, Pass
);/* ROM vs cache compare */
305 proc_IOCLFLUSH_D(TestAddr0
);
306 ResetDCTWrPtr_D(dev
, index_reg
, Addl_Index
);
308 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0
, 3);
312 if(Test0
== DQS_PASS
) {
313 mct_Read1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr0B
); /*cache fills */
314 /* ROM vs cache compare */
315 Test1
= mct_CompareTestPatternQW0_D(pMCTstat
, pDCTstat
, TestAddr0B
, Channel
, PatternB
, Pass
);
316 proc_IOCLFLUSH_D(TestAddr0B
);
317 ResetDCTWrPtr_D(dev
, index_reg
, Addl_Index
);
319 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1
, 3);
321 if(Test1
== DQS_PASS
) {
322 CurrTestSide0
= DQS_PASS
;
326 mct_Read1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr1
); /*cache fills */
327 /* ROM vs cache compare */
328 Test0
= mct_CompareTestPatternQW0_D(pMCTstat
, pDCTstat
, TestAddr1
, Channel
, PatternA
, Pass
);
329 proc_IOCLFLUSH_D(TestAddr1
);
330 ResetDCTWrPtr_D(dev
, index_reg
, Addl_Index
);
332 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0
, 3);
334 if(Test0
== DQS_PASS
) {
335 mct_Read1LTestPattern_D(pMCTstat
, pDCTstat
, TestAddr1B
); /*cache fills */
336 /* ROM vs cache compare */
337 Test1
= mct_CompareTestPatternQW0_D(pMCTstat
, pDCTstat
, TestAddr1B
, Channel
, PatternB
, Pass
);
338 proc_IOCLFLUSH_D(TestAddr1B
);
339 ResetDCTWrPtr_D(dev
, index_reg
, Addl_Index
);
341 print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1
, 3);
342 if(Test1
== DQS_PASS
) {
343 CurrTestSide1
= DQS_PASS
;
349 if ((CurrTestSide0
== DQS_PASS
) && (CurrTestSide1
== DQS_PASS
)) {
352 } else if (CurrTestSide0
== DQS_PASS
) {
357 /* record first pass DqsRcvEn to stack */
358 valid
= mct_SavePassRcvEnDly_D(pDCTstat
, RcvrEnDly
, Channel
, Receiver
, Pass
);
360 /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
361 if(valid
== 2 || (LastTest
== DQS_FAIL
&& valid
== 1)) {
362 RcvrEnDlyRmin
= RcvrEnDly
;
368 /* swap the rank 0 pointers */
370 TestAddr0
= TestAddr0B
;
373 /* swap the rank 1 pointers */
375 TestAddr1
= TestAddr1B
;
378 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly
, 3);
382 } /* while RcvrEnDly */
384 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly
, 2);
385 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin
, 3);
386 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit
, 3);
387 if(RcvrEnDlyRmin
== RcvrEnDlyLimit
) {
388 /* no passing window */
389 pDCTstat
->ErrStatus
|= 1 << SB_NORCVREN
;
390 Errors
|= 1 << SB_NORCVREN
;
391 pDCTstat
->ErrCode
= SC_FatalErr
;
394 if(RcvrEnDly
> (RcvrEnDlyLimit
- 1)) {
395 /* passing window too narrow, too far delayed*/
396 pDCTstat
->ErrStatus
|= 1 << SB_SmallRCVR
;
397 Errors
|= 1 << SB_SmallRCVR
;
398 pDCTstat
->ErrCode
= SC_FatalErr
;
399 RcvrEnDly
= RcvrEnDlyLimit
- 1;
400 pDCTstat
->CSTrainFail
|= 1 << Receiver
;
401 pDCTstat
->DimmTrainFail
|= 1 << (Receiver
+ Channel
);
404 // CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass
405 mct_Average_RcvrEnDly_Pass(pDCTstat
, RcvrEnDly
, RcvrEnDlyLimit
, Channel
, Receiver
, Pass
);
407 mct_SetFinalRcvrEnDly_D(pDCTstat
, RcvrEnDly
, Final_Value
, Channel
, Receiver
, dev
, index_reg
, Addl_Index
, Pass
);
409 if(pDCTstat
->ErrStatus
& (1 << SB_SmallRCVR
)) {
410 Errors
|= 1 << SB_SmallRCVR
;
413 RcvrEnDly
+= Pass1MemClkDly
;
414 if(RcvrEnDly
> CTLRMaxDelay
) {
415 CTLRMaxDelay
= RcvrEnDly
;
418 } /* while Receiver */
420 MaxDelay_CH
[Channel
] = CTLRMaxDelay
;
423 CTLRMaxDelay
= MaxDelay_CH
[0];
424 if (MaxDelay_CH
[1] > CTLRMaxDelay
)
425 CTLRMaxDelay
= MaxDelay_CH
[1];
427 for (Channel
= 0; Channel
< 2; Channel
++) {
428 mct_SetMaxLatency_D(pDCTstat
, Channel
, CTLRMaxDelay
); /* program Ch A/B MaxAsyncLat to correspond with max delay */
431 ResetDCTWrPtr_D(dev
, index_reg
, Addl_Index
);
433 if(_DisableDramECC
) {
434 mct_EnableDimmEccEn_D(pMCTstat
, pDCTstat
, _DisableDramECC
);
437 if (Pass
== FirstPass
) {
438 /*Disable DQSRcvrEn training mode */
439 print_t("TrainRcvrEn: mct_DisableDQSRcvEn_D\n");
440 mct_DisableDQSRcvEn_D(pDCTstat
);
445 _RDMSR(msr
, &lo
, &hi
);
446 lo
&= ~(1<<17); /* restore HWCR.wrap32dis */
451 cr4
&= ~(1<<9); /* restore cr4.OSFXSR */
455 #if DQS_TRAIN_DEBUG > 0
458 printk(BIOS_DEBUG
, "TrainRcvrEn: CH_MaxRdLat:\n");
459 for(Channel
= 0; Channel
<2; Channel
++) {
460 printk(BIOS_DEBUG
, "Channel: %02x: %02x\n", Channel
, pDCTstat
->CH_MaxRdLat
[Channel
]);
465 #if DQS_TRAIN_DEBUG > 0
468 u8 Channel
, Receiver
;
472 printk(BIOS_DEBUG
, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
473 for(Channel
= 0; Channel
< 2; Channel
++) {
474 printk(BIOS_DEBUG
, "Channel: %02x\n", Channel
);
475 for(Receiver
= 0; Receiver
<8; Receiver
+=2) {
476 printk(BIOS_DEBUG
, "\t\tReceiver: %02x: ", Receiver
);
477 p
= pDCTstat
->CH_D_B_RCVRDLY
[Channel
][Receiver
>>1];
480 printk(BIOS_DEBUG
, "%02x ", val
);
482 printk(BIOS_DEBUG
, "\n");
488 print_tx("TrainRcvrEn: Status ", pDCTstat
->Status
);
489 print_tx("TrainRcvrEn: ErrStatus ", pDCTstat
->ErrStatus
);
490 print_tx("TrainRcvrEn: ErrCode ", pDCTstat
->ErrCode
);
491 print_t("TrainRcvrEn: Done\n");
495 u8
mct_InitReceiver_D(struct DCTStatStruc
*pDCTstat
, u8 dct
)
497 if (pDCTstat
->DIMMValidDCT
[dct
] == 0 ) {
505 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc
*pDCTstat
, u8 RcvrEnDly
, u8 where
, u8 Channel
, u8 Receiver
, u32 dev
, u32 index_reg
, u8 Addl_Index
, u8 Pass
/*, u8 *p*/)
508 * Program final DqsRcvEnDly to additional index for DQS receiver
511 mct_SetRcvrEnDly_D(pDCTstat
, RcvrEnDly
, where
, Channel
, Receiver
, dev
, index_reg
, Addl_Index
, Pass
);
515 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc
*pDCTstat
)
522 dev
= pDCTstat
->dev_dct
;
523 if (pDCTstat
->GangedMode
) {
529 for (ch
=0; ch
<ch_end
; ch
++) {
530 reg
= 0x78 + 0x100 * ch
;
531 val
= Get_NB32(dev
, reg
);
532 val
&= ~(1 << DqsRcvEnTrain
);
533 Set_NB32(dev
, reg
, val
);
539 * Function only used once so it was inlined.
544 * Function only used once so it was inlined.
548 void mct_SetRcvrEnDly_D(struct DCTStatStruc
*pDCTstat
, u8 RcvrEnDly
,
549 u8 FinalValue
, u8 Channel
, u8 Receiver
, u32 dev
,
550 u32 index_reg
, u8 Addl_Index
, u8 Pass
)
557 if(RcvrEnDly
== 0xFE) {
558 /*set the boudary flag */
559 pDCTstat
->Status
|= 1 << SB_DQSRcvLimit
;
562 /* DimmOffset not needed for CH_D_B_RCVRDLY array */
565 for(i
=0; i
< 8; i
++) {
567 /*calculate dimm offset */
568 p
= pDCTstat
->CH_D_B_RCVRDLY
[Channel
][Receiver
>> 1];
572 /* if flag=0, set DqsRcvEn value to reg. */
573 /* get the register index from table */
574 index
= Table_DQSRcvEn_Offset
[i
>> 1];
575 index
+= Addl_Index
; /* DIMMx DqsRcvEn byte0 */
576 val
= Get_NB32_index_wait(dev
, index_reg
, index
);
579 val
&= ~(0xFF << 16);
580 val
|= (RcvrEnDly
<< 16);
586 Set_NB32_index_wait(dev
, index_reg
, index
, val
);
591 static void mct_SetMaxLatency_D(struct DCTStatStruc
*pDCTstat
, u8 Channel
, u8 DQSRcvEnDly
)
601 if(pDCTstat
->GangedMode
)
604 dev
= pDCTstat
->dev_dct
;
605 reg_off
= 0x100 * Channel
;
606 index_reg
= 0x98 + reg_off
;
608 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
609 val
= Get_NB32(dev
, 0x88 + reg_off
);
610 SubTotal
= ((val
& 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */
612 /* If registered DIMMs are being used then
613 * add 1 MEMCLK to the sub-total.
615 val
= Get_NB32(dev
, 0x90 + reg_off
);
616 if(!(val
& (1 << UnBuffDimm
)))
619 /* If the address prelaunch is setup for 1/2 MEMCLKs then
620 * add 1, else add 2 to the sub-total.
621 * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
623 val
= Get_NB32_index_wait(dev
, index_reg
, 0x04);
624 if(!(val
& 0x00202020))
629 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
630 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
631 val
= Get_NB32(dev
, 0x78 + reg_off
);
632 SubTotal
+= 8 - (val
& 0x0f);
634 /* Convert bits 7-5 (also referred to as the course delay) of
635 * the current (or worst case) DQS receiver enable delay to
636 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
638 SubTotal
+= DQSRcvEnDly
>> 5; /*BOZO-no rounding up */
640 /* Add 5.5 to the sub-total. 5.5 represents part of the
641 * processor specific constant delay value in the DRAM
644 SubTotal
<<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
645 SubTotal
+= 11; /*add 5.5 1/2MemClk */
647 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
648 * clocks (NCLKs) as follows (assuming DDR400 and assuming
649 * that no P-state or link speed changes have occurred).
653 * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
654 val
= Get_NB32(dev
, 0x94 + reg_off
);
656 /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
659 val
++; /* adjust for DDR2-1066 */
661 valx
= (val
+ 3) << 2;
663 val
= Get_NB32(pDCTstat
->dev_nbmisc
, 0xD4);
664 SubTotal
*= ((val
& 0x1f) + 4 ) * 3;
667 if (SubTotal
% valx
) { /* round up */
671 /* Add 5 NCLKs to the sub-total. 5 represents part of the
672 * processor specific constant value in the northbridge
677 pDCTstat
->CH_MaxRdLat
[Channel
] = SubTotal
;
678 if(pDCTstat
->GangedMode
) {
679 pDCTstat
->CH_MaxRdLat
[1] = SubTotal
;
682 /* Program the F2x[1, 0]78[MaxRdLatency] register with
683 * the total delay value (in NCLKs).
686 reg
= 0x78 + reg_off
;
687 val
= Get_NB32(dev
, reg
);
688 val
&= ~(0x3ff << 22);
689 val
|= (SubTotal
& 0x3ff) << 22;
691 /* program MaxRdLatency to correspond with current delay */
692 Set_NB32(dev
, reg
, val
);
696 static u8
mct_SavePassRcvEnDly_D(struct DCTStatStruc
*pDCTstat
,
697 u8 rcvrEnDly
, u8 Channel
,
698 u8 receiver
, u8 Pass
)
701 u8 mask_Saved
, mask_Pass
;
704 /* calculate dimm offset
705 * not needed for CH_D_B_RCVRDLY array
708 /* cmp if there has new DqsRcvEnDly to be recorded */
709 mask_Pass
= pDCTstat
->DqsRcvEn_Pass
;
711 if(Pass
== SecondPass
) {
712 mask_Pass
= ~mask_Pass
;
715 mask_Saved
= pDCTstat
->DqsRcvEn_Saved
;
716 if(mask_Pass
!= mask_Saved
) {
718 /* find desired stack offset according to channel/dimm/byte */
719 if(Pass
== SecondPass
) {
720 // FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1];
721 p
= 0; // Keep the compiler happy.
723 mask_Saved
&= mask_Pass
;
724 p
= pDCTstat
->CH_D_B_RCVRDLY
[Channel
][receiver
>>1];
726 for(i
=0; i
< 8; i
++) {
727 /* cmp per byte lane */
728 if(mask_Pass
& (1 << i
)) {
729 if(!(mask_Saved
& (1 << i
))) {
730 /* save RcvEnDly to stack, according to
731 the related Dimm/byte lane */
732 p
[i
] = (u8
)rcvrEnDly
;
733 mask_Saved
|= 1 << i
;
737 pDCTstat
->DqsRcvEn_Saved
= mask_Saved
;
739 return mct_SaveRcvEnDly_D_1Pass(pDCTstat
, Pass
);
743 static u8
mct_CompareTestPatternQW0_D(struct MCTStatStruc
*pMCTstat
,
744 struct DCTStatStruc
*pDCTstat
,
745 u32 addr
, u8 channel
,
748 /* Compare only the first beat of data. Since target addrs are cache
749 * line aligned, the Channel parameter is used to determine which
750 * cache QW to compare.
759 if(Pass
== FirstPass
) {
761 test_buf
= (u8
*)TestPattern1_D
;
763 test_buf
= (u8
*)TestPattern0_D
;
765 } else { // Second Pass
766 test_buf
= (u8
*)TestPattern2_D
;
769 SetUpperFSbase(addr
);
772 if((pDCTstat
->Status
& (1<<SB_128bitmode
)) && channel
) {
773 addr
+= 8; /* second channel */
777 print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32
)test_buf
, " | addr_lo = ", addr
, 4);
778 for (i
=0; i
<8; i
++) {
779 value
= read32_fs(addr
);
780 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf
[i
], " | ", value
, 4);
782 if (value
== test_buf
[i
]) {
783 pDCTstat
->DqsRcvEn_Pass
|= (1<<i
);
785 pDCTstat
->DqsRcvEn_Pass
&= ~(1<<i
);
791 if (Pass
== FirstPass
) {
792 /* if first pass, at least one byte lane pass
793 * ,then DQS_PASS=1 and will set to related reg.
795 if(pDCTstat
->DqsRcvEn_Pass
!= 0) {
802 /* if second pass, at least one byte lane fail
803 * ,then DQS_FAIL=1 and will set to related reg.
805 if(pDCTstat
->DqsRcvEn_Pass
!= 0xFF) {
812 /* if second pass, we can't find the fail until FFh,
813 * then let it fail to save the final delay
815 if((Pass
== SecondPass
) && (pDCTstat
->Status
& (1 << SB_DQSRcvLimit
))) {
817 pDCTstat
->DqsRcvEn_Pass
= 0;
820 /* second pass needs to be inverted
821 * FIXME? this could be inverted in the above code to start with...
823 if(Pass
== SecondPass
) {
824 if (result
== DQS_PASS
) {
826 } else if (result
== DQS_FAIL
) { /* FIXME: doesn't need to be else if */
837 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
838 struct DCTStatStruc
*pDCTstat
)
840 /* Initialize the DQS Positions in preparation for
841 * Receiver Enable Training.
842 * Write Position is 1/2 Memclock Delay
843 * Read Position is 1/2 Memclock Delay
847 InitDQSPos4RcvrEn_D(pMCTstat
, pDCTstat
, i
);
852 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc
*pMCTstat
,
853 struct DCTStatStruc
*pDCTstat
, u8 Channel
)
855 /* Initialize the DQS Positions in preparation for
856 * Receiver Enable Training.
857 * Write Position is no Delay
858 * Read Position is 1/2 Memclock Delay
863 u8 dn
= 2; // TODO: Rev C could be 4
864 u32 dev
= pDCTstat
->dev_dct
;
865 u32 index_reg
= 0x98 + 0x100 * Channel
;
868 // FIXME: add Cx support
870 for(i
=1; i
<=3; i
++) {
872 /* DIMM0 Write Data Timing Low */
873 /* DIMM0 Write ECC Timing */
874 Set_NB32_index_wait(dev
, index_reg
, i
+ 0x100 * j
, dword
);
879 for(i
=5; i
<=6; i
++) {
881 /* DIMM0 Read DQS Timing Control Low */
882 Set_NB32_index_wait(dev
, index_reg
, i
+ 0x100 * j
, dword
);
887 /* DIMM0 Read DQS ECC Timing Control */
888 Set_NB32_index_wait(dev
, index_reg
, 7 + 0x100 * j
, dword
);
892 void SetEccDQSRcvrEn_D(struct DCTStatStruc
*pDCTstat
, u8 Channel
)
901 dev
= pDCTstat
->dev_dct
;
902 index_reg
= 0x98 + Channel
* 0x100;
904 p
= pDCTstat
->CH_D_BC_RCVRDLY
[Channel
];
905 print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel
, 2);
906 for(ChipSel
= 0; ChipSel
< MAX_CS_SUPPORTED
; ChipSel
+= 2) {
908 Set_NB32_index_wait(dev
, index_reg
, index
, val
);
909 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
910 ChipSel
, " rcvr_delay ", val
, 2);
916 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
917 struct DCTStatStruc
*pDCTstat
, u8 Channel
)
924 EccDQSLike
= pDCTstat
->CH_EccDQSLike
[Channel
];
925 EccDQSScale
= pDCTstat
->CH_EccDQSScale
[Channel
];
927 for (ChipSel
= 0; ChipSel
< MAX_CS_SUPPORTED
; ChipSel
+= 2) {
928 if(mct_RcvrRankEnabled_D(pMCTstat
, pDCTstat
, Channel
, ChipSel
)) {
930 p
= pDCTstat
->CH_D_B_RCVRDLY
[Channel
][ChipSel
>>1];
932 /* DQS Delay Value of Data Bytelane
933 * most like ECC byte lane */
934 val0
= p
[EccDQSLike
& 0x07];
935 /* DQS Delay Value of Data Bytelane
936 * 2nd most like ECC byte lane */
937 val1
= p
[(EccDQSLike
>>8) & 0x07];
954 pDCTstat
->CH_D_BC_RCVRDLY
[Channel
][ChipSel
>>1] = val
;
957 SetEccDQSRcvrEn_D(pDCTstat
, Channel
);
960 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc
*pMCTstat
,
961 struct DCTStatStruc
*pDCTstatA
)
966 for (Node
= 0; Node
< MAX_NODES_SUPPORTED
; Node
++) {
967 struct DCTStatStruc
*pDCTstat
;
968 pDCTstat
= pDCTstatA
+ Node
;
969 if (!pDCTstat
->NodePresent
)
971 if (pDCTstat
->DCTSysLimit
) {
973 CalcEccDQSRcvrEn_D(pMCTstat
, pDCTstat
, i
);
979 void phyAssistedMemFnceTraining(struct MCTStatStruc
*pMCTstat
,
980 struct DCTStatStruc
*pDCTstatA
)
984 struct DCTStatStruc
*pDCTstat
;
986 // FIXME: skip for Ax
987 while (Node
< MAX_NODES_SUPPORTED
) {
988 pDCTstat
= pDCTstatA
+ Node
;
990 if(pDCTstat
->DCTSysLimit
) {
991 fenceDynTraining_D(pMCTstat
, pDCTstat
, 0);
992 fenceDynTraining_D(pMCTstat
, pDCTstat
, 1);
999 static void fenceDynTraining_D(struct MCTStatStruc
*pMCTstat
,
1000 struct DCTStatStruc
*pDCTstat
, u8 dct
)
1005 u32 index_reg
= 0x98 + 0x100 * dct
;
1008 /* BIOS first programs a seed value to the phase recovery engine
1009 * (recommended 19) registers.
1010 * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
1011 * F2x[1,0]9C_x52.) .
1014 dev
= pDCTstat
->dev_dct
;
1015 for (index
= 0x50; index
<= 0x52; index
++) {
1016 val
= (FenceTrnFinDlySeed
& 0x1F);
1017 if (index
!= 0x52) {
1018 val
|= val
<< 8 | val
<< 16 | val
<< 24;
1020 Set_NB32_index_wait(dev
, index_reg
, index
, val
);
1024 /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
1025 val
= Get_NB32_index_wait(dev
, index_reg
, 0x08);
1026 val
|= 1 << PhyFenceTrEn
;
1027 Set_NB32_index_wait(dev
, index_reg
, 0x08, val
);
1029 /* Wait 200 MEMCLKs. */
1030 mct_Wait(50000); /* wait 200us */
1032 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
1033 val
= Get_NB32_index_wait(dev
, index_reg
, 0x08);
1034 val
&= ~(1 << PhyFenceTrEn
);
1035 Set_NB32_index_wait(dev
, index_reg
, 0x08, val
);
1037 /* BIOS reads the phase recovery engine registers
1038 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1040 for (index
= 0x50; index
<= 0x52; index
++) {
1041 val
= Get_NB32_index_wait(dev
, index_reg
, index
);
1042 avRecValue
+= val
& 0x7F;
1043 if (index
!= 0x52) {
1044 avRecValue
+= (val
>> 8) & 0x7F;
1045 avRecValue
+= (val
>> 16) & 0x7F;
1046 avRecValue
+= (val
>> 24) & 0x7F;
1050 val
= avRecValue
/ 9;
1055 /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1057 val
= Get_NB32_index_wait(dev
, index_reg
, 0x0C);
1058 val
&= ~(0x1F << 16);
1059 val
|= (avRecValue
& 0x1F) << 16;
1060 Set_NB32_index_wait(dev
, index_reg
, 0x0C, val
);
1062 /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1063 * delays (both channels). */
1064 val
= Get_NB32_index_wait(dev
, index_reg
, 0x04);
1065 Set_NB32_index_wait(dev
, index_reg
, 0x04, val
);
1069 void mct_Wait(u32 cycles
)
1074 /* Wait # of 50ns cycles
1075 This seems like a hack to me... */
1077 cycles
<<= 3; /* x8 (number of 1.25ns ticks) */
1079 msr
= 0x10; /* TSC */
1080 _RDMSR(msr
, &lo
, &hi
);
1083 _RDMSR(msr
, &lo
, &hi
);
1084 } while (lo
- saved
< cycles
);