tree: drop last paragraph of GPL copyright header
[coreboot.git] / src / northbridge / amd / amdmct / mct / mctsrc.c
blob1881a37c546215931059ec9c054e2038e0765331
1 /*
2 * This file is part of the coreboot project.
4 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
17 /******************************************************************************
18 Description: Receiver En and DQS Timing Training feature for DDR 2 MCT
19 ******************************************************************************/
21 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
22 struct DCTStatStruc *pDCTstat, u8 Pass);
23 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
24 u8 rcvrEnDly, u8 Channel,
25 u8 receiver, u8 Pass);
26 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
27 struct DCTStatStruc *pDCTstat,
28 u32 addr, u8 channel,
29 u8 pattern, u8 Pass);
30 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
31 struct DCTStatStruc *pDCTstat);
32 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
33 struct DCTStatStruc *pDCTstat, u8 Channel);
34 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
35 struct DCTStatStruc *pDCTstat, u8 Channel);
36 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
37 u8 RcvrEnDly, u8 where,
38 u8 Channel, u8 Receiver,
39 u32 dev, u32 index_reg,
40 u8 Addl_Index, u8 Pass);
41 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
42 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
43 struct DCTStatStruc *pDCTstat, u8 dct);
44 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
47 /* Warning: These must be located so they do not cross a logical 16-bit
48 segment boundary! */
49 static const u32 TestPattern0_D[] = {
50 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
51 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
52 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
53 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
55 static const u32 TestPattern1_D[] = {
56 0x55555555, 0x55555555, 0x55555555, 0x55555555,
57 0x55555555, 0x55555555, 0x55555555, 0x55555555,
58 0x55555555, 0x55555555, 0x55555555, 0x55555555,
59 0x55555555, 0x55555555, 0x55555555, 0x55555555,
61 static const u32 TestPattern2_D[] = {
62 0x12345678, 0x87654321, 0x23456789, 0x98765432,
63 0x59385824, 0x30496724, 0x24490795, 0x99938733,
64 0x40385642, 0x38465245, 0x29432163, 0x05067894,
65 0x12349045, 0x98723467, 0x12387634, 0x34587623,
68 static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
69 struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
72 * 1. Copy the alpha and Beta patterns from ROM to Cache,
73 * aligning on 16 byte boundary
74 * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
75 * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
78 u32 *buf_a;
79 u32 *buf_b;
80 u32 *p_A;
81 u32 *p_B;
82 u8 i;
84 buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
85 buf_b = buf_a + 32; //??
86 p_A = (u32 *)SetupDqsPattern_1PassB(pass);
87 p_B = (u32 *)SetupDqsPattern_1PassA(pass);
89 for(i=0;i<16;i++) {
90 buf_a[i] = p_A[i];
91 buf_b[i] = p_B[i];
94 pDCTstat->PtrPatternBufA = (u32)buf_a;
95 pDCTstat->PtrPatternBufB = (u32)buf_b;
99 void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
100 struct DCTStatStruc *pDCTstat, u8 Pass)
102 if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
103 dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
107 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
108 struct DCTStatStruc *pDCTstat, u8 Pass)
110 u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
111 u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
112 u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
113 u8 Addl_Index = 0;
114 u8 Receiver;
115 u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
116 u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
117 u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
118 u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
119 u32 Errors;
121 u32 val;
122 u32 reg;
123 u32 dev;
124 u32 index_reg;
125 u32 ch_start, ch_end, ch;
126 u32 msr;
127 u32 cr4;
128 u32 lo, hi;
130 u8 valid;
131 u32 tmp;
132 u8 LastTest;
134 print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
135 print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
138 dev = pDCTstat->dev_dct;
139 ch_start = 0;
140 if(!pDCTstat->GangedMode) {
141 ch_end = 2;
142 } else {
143 ch_end = 1;
146 for (ch = ch_start; ch < ch_end; ch++) {
147 reg = 0x78 + (0x100 * ch);
148 val = Get_NB32(dev, reg);
149 val &= ~(0x3ff << 22);
150 val |= (0x0c8 << 22); /* Max Rd Lat */
151 Set_NB32(dev, reg, val);
154 Final_Value = 1;
155 if (Pass == FirstPass) {
156 mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
157 } else {
158 pDCTstat->DimmTrainFail = 0;
159 pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
161 print_t("TrainRcvrEn: 1\n");
163 cr4 = read_cr4();
164 if(cr4 & ( 1 << 9)) { /* save the old value */
165 _SSE2 = 1;
167 cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
168 write_cr4(cr4);
169 print_t("TrainRcvrEn: 2\n");
171 msr = HWCR;
172 _RDMSR(msr, &lo, &hi);
173 //FIXME: Why use SSEDIS
174 if(lo & (1 << 17)) { /* save the old value */
175 _Wrap32Dis = 1;
177 lo |= (1 << 17); /* HWCR.wrap32dis */
178 lo &= ~(1 << 15); /* SSEDIS */
179 _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
180 print_t("TrainRcvrEn: 3\n");
182 _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
185 if(pDCTstat->Speed == 1) {
186 pDCTstat->T1000 = 5000; /* get the T1000 figure (cycle time (ns)*1K */
187 } else if(pDCTstat->Speed == 2) {
188 pDCTstat->T1000 = 3759;
189 } else if(pDCTstat->Speed == 3) {
190 pDCTstat->T1000 = 3003;
191 } else if(pDCTstat->Speed == 4) {
192 pDCTstat->T1000 = 2500;
193 } else if(pDCTstat->Speed == 5) {
194 pDCTstat->T1000 = 1876;
195 } else {
196 pDCTstat->T1000 = 0;
199 SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
200 print_t("TrainRcvrEn: 4\n");
202 Errors = 0;
203 dev = pDCTstat->dev_dct;
204 CTLRMaxDelay = 0;
206 for (Channel = 0; Channel < 2; Channel++) {
207 print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
208 print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
209 pDCTstat->Channel = Channel;
211 MaxDelay_CH[Channel] = 0;
212 index_reg = 0x98 + 0x100 * Channel;
214 Receiver = mct_InitReceiver_D(pDCTstat, Channel);
215 /* There are four receiver pairs, loosely associated with chipselects. */
216 for (; Receiver < 8; Receiver += 2) {
217 Addl_Index = (Receiver >> 1) * 3 + 0x10;
218 LastTest = DQS_FAIL;
220 /* mct_ModifyIndex_D */
221 RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
223 print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
225 if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
226 print_t("\t\t\tRank not enabled_D\n");
227 continue;
230 TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
231 if(!valid) { /* Address not supported on current CS */
232 print_t("\t\t\tAddress not supported on current CS\n");
233 continue;
236 TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
238 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
239 TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
240 if(!valid) { /* Address not supported on current CS */
241 print_t("\t\t\tAddress not supported on current CS+1\n");
242 continue;
244 TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
245 _2Ranks = 1;
246 } else {
247 _2Ranks = TestAddr1 = TestAddr1B = 0;
250 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
251 print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
252 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
253 print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
256 * Get starting RcvrEnDly value
258 RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
260 /* mct_GetInitFlag_D*/
261 if (Pass == FirstPass) {
262 pDCTstat->DqsRcvEn_Pass = 0;
263 } else {
264 pDCTstat->DqsRcvEn_Pass=0xFF;
266 pDCTstat->DqsRcvEn_Saved = 0;
269 while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */
270 print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
272 /* callback not required
273 if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
274 goto skipDly;
277 /* Odd steps get another pattern such that even
278 and odd steps alternate. The pointers to the
279 patterns will be swaped at the end of the loop
280 so that they correspond. */
281 if(RcvrEnDly & 1) {
282 PatternA = 1;
283 PatternB = 0;
284 } else {
285 /* Even step */
286 PatternA = 0;
287 PatternB = 1;
290 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
291 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
292 if(_2Ranks) {
293 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
294 mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
297 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
299 CurrTest = DQS_FAIL;
300 CurrTestSide0 = DQS_FAIL;
301 CurrTestSide1 = DQS_FAIL;
303 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
304 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
305 proc_IOCLFLUSH_D(TestAddr0);
306 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
308 print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
310 // != 0x00 mean pass
312 if(Test0 == DQS_PASS) {
313 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */
314 /* ROM vs cache compare */
315 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
316 proc_IOCLFLUSH_D(TestAddr0B);
317 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
319 print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
321 if(Test1 == DQS_PASS) {
322 CurrTestSide0 = DQS_PASS;
325 if(_2Ranks) {
326 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
327 /* ROM vs cache compare */
328 Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
329 proc_IOCLFLUSH_D(TestAddr1);
330 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
332 print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
334 if(Test0 == DQS_PASS) {
335 mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */
336 /* ROM vs cache compare */
337 Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
338 proc_IOCLFLUSH_D(TestAddr1B);
339 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
341 print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
342 if(Test1 == DQS_PASS) {
343 CurrTestSide1 = DQS_PASS;
348 if(_2Ranks) {
349 if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
350 CurrTest = DQS_PASS;
352 } else if (CurrTestSide0 == DQS_PASS) {
353 CurrTest = DQS_PASS;
357 /* record first pass DqsRcvEn to stack */
358 valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
360 /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
361 if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
362 RcvrEnDlyRmin = RcvrEnDly;
363 break;
366 LastTest = CurrTest;
368 /* swap the rank 0 pointers */
369 tmp = TestAddr0;
370 TestAddr0 = TestAddr0B;
371 TestAddr0B = tmp;
373 /* swap the rank 1 pointers */
374 tmp = TestAddr1;
375 TestAddr1 = TestAddr1B;
376 TestAddr1B = tmp;
378 print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
380 RcvrEnDly++;
382 } /* while RcvrEnDly */
384 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
385 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
386 print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
387 if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
388 /* no passing window */
389 pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
390 Errors |= 1 << SB_NORCVREN;
391 pDCTstat->ErrCode = SC_FatalErr;
394 if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
395 /* passing window too narrow, too far delayed*/
396 pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
397 Errors |= 1 << SB_SmallRCVR;
398 pDCTstat->ErrCode = SC_FatalErr;
399 RcvrEnDly = RcvrEnDlyLimit - 1;
400 pDCTstat->CSTrainFail |= 1 << Receiver;
401 pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
404 // CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass
405 mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
407 mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
409 if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
410 Errors |= 1 << SB_SmallRCVR;
413 RcvrEnDly += Pass1MemClkDly;
414 if(RcvrEnDly > CTLRMaxDelay) {
415 CTLRMaxDelay = RcvrEnDly;
418 } /* while Receiver */
420 MaxDelay_CH[Channel] = CTLRMaxDelay;
421 } /* for Channel */
423 CTLRMaxDelay = MaxDelay_CH[0];
424 if (MaxDelay_CH[1] > CTLRMaxDelay)
425 CTLRMaxDelay = MaxDelay_CH[1];
427 for (Channel = 0; Channel < 2; Channel++) {
428 mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
431 ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
433 if(_DisableDramECC) {
434 mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
437 if (Pass == FirstPass) {
438 /*Disable DQSRcvrEn training mode */
439 print_t("TrainRcvrEn: mct_DisableDQSRcvEn_D\n");
440 mct_DisableDQSRcvEn_D(pDCTstat);
443 if(!_Wrap32Dis) {
444 msr = HWCR;
445 _RDMSR(msr, &lo, &hi);
446 lo &= ~(1<<17); /* restore HWCR.wrap32dis */
447 _WRMSR(msr, lo, hi);
449 if(!_SSE2){
450 cr4 = read_cr4();
451 cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
452 write_cr4(cr4);
455 #if DQS_TRAIN_DEBUG > 0
457 u8 Channel;
458 printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
459 for(Channel = 0; Channel<2; Channel++) {
460 printk(BIOS_DEBUG, "Channel: %02x: %02x\n", Channel, pDCTstat->CH_MaxRdLat[Channel]);
463 #endif
465 #if DQS_TRAIN_DEBUG > 0
467 u8 val;
468 u8 Channel, Receiver;
469 u8 i;
470 u8 *p;
472 printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
473 for(Channel = 0; Channel < 2; Channel++) {
474 printk(BIOS_DEBUG, "Channel: %02x\n", Channel);
475 for(Receiver = 0; Receiver<8; Receiver+=2) {
476 printk(BIOS_DEBUG, "\t\tReceiver: %02x: ", Receiver);
477 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
478 for (i=0;i<8; i++) {
479 val = p[i];
480 printk(BIOS_DEBUG, "%02x ", val);
482 printk(BIOS_DEBUG, "\n");
486 #endif
488 print_tx("TrainRcvrEn: Status ", pDCTstat->Status);
489 print_tx("TrainRcvrEn: ErrStatus ", pDCTstat->ErrStatus);
490 print_tx("TrainRcvrEn: ErrCode ", pDCTstat->ErrCode);
491 print_t("TrainRcvrEn: Done\n");
495 u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
497 if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
498 return 8;
499 } else {
500 return 0;
505 static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
508 * Program final DqsRcvEnDly to additional index for DQS receiver
509 * enabled delay
511 mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
515 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
517 u8 ch_end, ch;
518 u32 reg;
519 u32 dev;
520 u32 val;
522 dev = pDCTstat->dev_dct;
523 if (pDCTstat->GangedMode) {
524 ch_end = 1;
525 } else {
526 ch_end = 2;
529 for (ch=0; ch<ch_end; ch++) {
530 reg = 0x78 + 0x100 * ch;
531 val = Get_NB32(dev, reg);
532 val &= ~(1 << DqsRcvEnTrain);
533 Set_NB32(dev, reg, val);
538 /* mct_ModifyIndex_D
539 * Function only used once so it was inlined.
543 /* mct_GetInitFlag_D
544 * Function only used once so it was inlined.
548 void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
549 u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
550 u32 index_reg, u8 Addl_Index, u8 Pass)
552 u32 index;
553 u8 i;
554 u8 *p;
555 u32 val;
557 if(RcvrEnDly == 0xFE) {
558 /*set the boudary flag */
559 pDCTstat->Status |= 1 << SB_DQSRcvLimit;
562 /* DimmOffset not needed for CH_D_B_RCVRDLY array */
565 for(i=0; i < 8; i++) {
566 if(FinalValue) {
567 /*calculate dimm offset */
568 p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
569 RcvrEnDly = p[i];
572 /* if flag=0, set DqsRcvEn value to reg. */
573 /* get the register index from table */
574 index = Table_DQSRcvEn_Offset[i >> 1];
575 index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
576 val = Get_NB32_index_wait(dev, index_reg, index);
577 if(i & 1) {
578 /* odd byte lane */
579 val &= ~(0xFF << 16);
580 val |= (RcvrEnDly << 16);
581 } else {
582 /* even byte lane */
583 val &= ~0xFF;
584 val |= RcvrEnDly;
586 Set_NB32_index_wait(dev, index_reg, index, val);
591 static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
593 u32 dev;
594 u32 reg;
595 u16 SubTotal;
596 u32 index_reg;
597 u32 reg_off;
598 u32 val;
599 u32 valx;
601 if(pDCTstat->GangedMode)
602 Channel = 0;
604 dev = pDCTstat->dev_dct;
605 reg_off = 0x100 * Channel;
606 index_reg = 0x98 + reg_off;
608 /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
609 val = Get_NB32(dev, 0x88 + reg_off);
610 SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */
612 /* If registered DIMMs are being used then
613 * add 1 MEMCLK to the sub-total.
615 val = Get_NB32(dev, 0x90 + reg_off);
616 if(!(val & (1 << UnBuffDimm)))
617 SubTotal += 2;
619 /* If the address prelaunch is setup for 1/2 MEMCLKs then
620 * add 1, else add 2 to the sub-total.
621 * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
623 val = Get_NB32_index_wait(dev, index_reg, 0x04);
624 if(!(val & 0x00202020))
625 SubTotal += 1;
626 else
627 SubTotal += 2;
629 /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
630 * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
631 val = Get_NB32(dev, 0x78 + reg_off);
632 SubTotal += 8 - (val & 0x0f);
634 /* Convert bits 7-5 (also referred to as the course delay) of
635 * the current (or worst case) DQS receiver enable delay to
636 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
638 SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
640 /* Add 5.5 to the sub-total. 5.5 represents part of the
641 * processor specific constant delay value in the DRAM
642 * clock domain.
644 SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
645 SubTotal += 11; /*add 5.5 1/2MemClk */
647 /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
648 * clocks (NCLKs) as follows (assuming DDR400 and assuming
649 * that no P-state or link speed changes have occurred).
652 /* New formula:
653 * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
654 val = Get_NB32(dev, 0x94 + reg_off);
656 /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
657 val &= 7;
658 if (val == 4) {
659 val++; /* adjust for DDR2-1066 */
661 valx = (val + 3) << 2;
663 val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
664 SubTotal *= ((val & 0x1f) + 4 ) * 3;
666 SubTotal /= valx;
667 if (SubTotal % valx) { /* round up */
668 SubTotal++;
671 /* Add 5 NCLKs to the sub-total. 5 represents part of the
672 * processor specific constant value in the northbridge
673 * clock domain.
675 SubTotal += 5;
677 pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
678 if(pDCTstat->GangedMode) {
679 pDCTstat->CH_MaxRdLat[1] = SubTotal;
682 /* Program the F2x[1, 0]78[MaxRdLatency] register with
683 * the total delay value (in NCLKs).
686 reg = 0x78 + reg_off;
687 val = Get_NB32(dev, reg);
688 val &= ~(0x3ff << 22);
689 val |= (SubTotal & 0x3ff) << 22;
691 /* program MaxRdLatency to correspond with current delay */
692 Set_NB32(dev, reg, val);
696 static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
697 u8 rcvrEnDly, u8 Channel,
698 u8 receiver, u8 Pass)
700 u8 i;
701 u8 mask_Saved, mask_Pass;
702 u8 *p;
704 /* calculate dimm offset
705 * not needed for CH_D_B_RCVRDLY array
708 /* cmp if there has new DqsRcvEnDly to be recorded */
709 mask_Pass = pDCTstat->DqsRcvEn_Pass;
711 if(Pass == SecondPass) {
712 mask_Pass = ~mask_Pass;
715 mask_Saved = pDCTstat->DqsRcvEn_Saved;
716 if(mask_Pass != mask_Saved) {
718 /* find desired stack offset according to channel/dimm/byte */
719 if(Pass == SecondPass) {
720 // FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1];
721 p = 0; // Keep the compiler happy.
722 } else {
723 mask_Saved &= mask_Pass;
724 p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
726 for(i=0; i < 8; i++) {
727 /* cmp per byte lane */
728 if(mask_Pass & (1 << i)) {
729 if(!(mask_Saved & (1 << i))) {
730 /* save RcvEnDly to stack, according to
731 the related Dimm/byte lane */
732 p[i] = (u8)rcvrEnDly;
733 mask_Saved |= 1 << i;
737 pDCTstat->DqsRcvEn_Saved = mask_Saved;
739 return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
743 static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
744 struct DCTStatStruc *pDCTstat,
745 u32 addr, u8 channel,
746 u8 pattern, u8 Pass)
748 /* Compare only the first beat of data. Since target addrs are cache
749 * line aligned, the Channel parameter is used to determine which
750 * cache QW to compare.
753 u8 *test_buf;
754 u8 i;
755 u8 result;
756 u8 value;
759 if(Pass == FirstPass) {
760 if(pattern==1) {
761 test_buf = (u8 *)TestPattern1_D;
762 } else {
763 test_buf = (u8 *)TestPattern0_D;
765 } else { // Second Pass
766 test_buf = (u8 *)TestPattern2_D;
769 SetUpperFSbase(addr);
770 addr <<= 8;
772 if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
773 addr += 8; /* second channel */
774 test_buf += 8;
777 print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4);
778 for (i=0; i<8; i++) {
779 value = read32_fs(addr);
780 print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4);
782 if (value == test_buf[i]) {
783 pDCTstat->DqsRcvEn_Pass |= (1<<i);
784 } else {
785 pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
789 result = DQS_FAIL;
791 if (Pass == FirstPass) {
792 /* if first pass, at least one byte lane pass
793 * ,then DQS_PASS=1 and will set to related reg.
795 if(pDCTstat->DqsRcvEn_Pass != 0) {
796 result = DQS_PASS;
797 } else {
798 result = DQS_FAIL;
801 } else {
802 /* if second pass, at least one byte lane fail
803 * ,then DQS_FAIL=1 and will set to related reg.
805 if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
806 result = DQS_FAIL;
807 } else {
808 result = DQS_PASS;
812 /* if second pass, we can't find the fail until FFh,
813 * then let it fail to save the final delay
815 if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
816 result = DQS_FAIL;
817 pDCTstat->DqsRcvEn_Pass = 0;
820 /* second pass needs to be inverted
821 * FIXME? this could be inverted in the above code to start with...
823 if(Pass == SecondPass) {
824 if (result == DQS_PASS) {
825 result = DQS_FAIL;
826 } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
827 result = DQS_PASS;
832 return result;
837 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
838 struct DCTStatStruc *pDCTstat)
840 /* Initialize the DQS Positions in preparation for
841 * Receiver Enable Training.
842 * Write Position is 1/2 Memclock Delay
843 * Read Position is 1/2 Memclock Delay
845 u8 i;
846 for(i=0;i<2; i++){
847 InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
852 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
853 struct DCTStatStruc *pDCTstat, u8 Channel)
855 /* Initialize the DQS Positions in preparation for
856 * Receiver Enable Training.
857 * Write Position is no Delay
858 * Read Position is 1/2 Memclock Delay
861 u8 i, j;
862 u32 dword;
863 u8 dn = 2; // TODO: Rev C could be 4
864 u32 dev = pDCTstat->dev_dct;
865 u32 index_reg = 0x98 + 0x100 * Channel;
868 // FIXME: add Cx support
869 dword = 0x00000000;
870 for(i=1; i<=3; i++) {
871 for(j=0; j<dn; j++)
872 /* DIMM0 Write Data Timing Low */
873 /* DIMM0 Write ECC Timing */
874 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
877 /* errata #180 */
878 dword = 0x2f2f2f2f;
879 for(i=5; i<=6; i++) {
880 for(j=0; j<dn; j++)
881 /* DIMM0 Read DQS Timing Control Low */
882 Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
885 dword = 0x0000002f;
886 for(j=0; j<dn; j++)
887 /* DIMM0 Read DQS ECC Timing Control */
888 Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
892 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
894 u32 dev;
895 u32 index_reg;
896 u32 index;
897 u8 ChipSel;
898 u8 *p;
899 u32 val;
901 dev = pDCTstat->dev_dct;
902 index_reg = 0x98 + Channel * 0x100;
903 index = 0x12;
904 p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
905 print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
906 for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
907 val = p[ChipSel>>1];
908 Set_NB32_index_wait(dev, index_reg, index, val);
909 print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
910 ChipSel, " rcvr_delay ", val, 2);
911 index += 3;
916 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
917 struct DCTStatStruc *pDCTstat, u8 Channel)
919 u8 ChipSel;
920 u16 EccDQSLike;
921 u8 EccDQSScale;
922 u32 val, val0, val1;
924 EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
925 EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
927 for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
928 if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
929 u8 *p;
930 p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
932 /* DQS Delay Value of Data Bytelane
933 * most like ECC byte lane */
934 val0 = p[EccDQSLike & 0x07];
935 /* DQS Delay Value of Data Bytelane
936 * 2nd most like ECC byte lane */
937 val1 = p[(EccDQSLike>>8) & 0x07];
939 if(val0 > val1) {
940 val = val0 - val1;
941 } else {
942 val = val1 - val0;
945 val *= ~EccDQSScale;
946 val >>= 8; // /256
948 if(val0 > val1) {
949 val -= val1;
950 } else {
951 val += val0;
954 pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
957 SetEccDQSRcvrEn_D(pDCTstat, Channel);
960 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
961 struct DCTStatStruc *pDCTstatA)
963 u8 Node;
964 u8 i;
966 for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
967 struct DCTStatStruc *pDCTstat;
968 pDCTstat = pDCTstatA + Node;
969 if (!pDCTstat->NodePresent)
970 break;
971 if (pDCTstat->DCTSysLimit) {
972 for(i=0; i<2; i++)
973 CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
979 void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
980 struct DCTStatStruc *pDCTstatA)
983 u8 Node = 0;
984 struct DCTStatStruc *pDCTstat;
986 // FIXME: skip for Ax
987 while (Node < MAX_NODES_SUPPORTED) {
988 pDCTstat = pDCTstatA + Node;
990 if(pDCTstat->DCTSysLimit) {
991 fenceDynTraining_D(pMCTstat, pDCTstat, 0);
992 fenceDynTraining_D(pMCTstat, pDCTstat, 1);
994 Node++;
999 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
1000 struct DCTStatStruc *pDCTstat, u8 dct)
1002 u16 avRecValue;
1003 u32 val;
1004 u32 dev;
1005 u32 index_reg = 0x98 + 0x100 * dct;
1006 u32 index;
1008 /* BIOS first programs a seed value to the phase recovery engine
1009 * (recommended 19) registers.
1010 * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
1011 * F2x[1,0]9C_x52.) .
1014 dev = pDCTstat->dev_dct;
1015 for (index = 0x50; index <= 0x52; index ++) {
1016 val = (FenceTrnFinDlySeed & 0x1F);
1017 if (index != 0x52) {
1018 val |= val << 8 | val << 16 | val << 24;
1020 Set_NB32_index_wait(dev, index_reg, index, val);
1024 /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
1025 val = Get_NB32_index_wait(dev, index_reg, 0x08);
1026 val |= 1 << PhyFenceTrEn;
1027 Set_NB32_index_wait(dev, index_reg, 0x08, val);
1029 /* Wait 200 MEMCLKs. */
1030 mct_Wait(50000); /* wait 200us */
1032 /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
1033 val = Get_NB32_index_wait(dev, index_reg, 0x08);
1034 val &= ~(1 << PhyFenceTrEn);
1035 Set_NB32_index_wait(dev, index_reg, 0x08, val);
1037 /* BIOS reads the phase recovery engine registers
1038 * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
1039 avRecValue = 0;
1040 for (index = 0x50; index <= 0x52; index ++) {
1041 val = Get_NB32_index_wait(dev, index_reg, index);
1042 avRecValue += val & 0x7F;
1043 if (index != 0x52) {
1044 avRecValue += (val >> 8) & 0x7F;
1045 avRecValue += (val >> 16) & 0x7F;
1046 avRecValue += (val >> 24) & 0x7F;
1050 val = avRecValue / 9;
1051 if (avRecValue % 9)
1052 val++;
1053 avRecValue = val;
1055 /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
1056 avRecValue -= 8;
1057 val = Get_NB32_index_wait(dev, index_reg, 0x0C);
1058 val &= ~(0x1F << 16);
1059 val |= (avRecValue & 0x1F) << 16;
1060 Set_NB32_index_wait(dev, index_reg, 0x0C, val);
1062 /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
1063 * delays (both channels). */
1064 val = Get_NB32_index_wait(dev, index_reg, 0x04);
1065 Set_NB32_index_wait(dev, index_reg, 0x04, val);
1069 void mct_Wait(u32 cycles)
1071 u32 saved;
1072 u32 hi, lo, msr;
1074 /* Wait # of 50ns cycles
1075 This seems like a hack to me... */
1077 cycles <<= 3; /* x8 (number of 1.25ns ticks) */
1079 msr = 0x10; /* TSC */
1080 _RDMSR(msr, &lo, &hi);
1081 saved = lo;
1082 do {
1083 _RDMSR(msr, &lo, &hi);
1084 } while (lo - saved < cycles );