tgl mainboards: Move PCIe root port settings into their device scope
[coreboot.git] / src / northbridge / intel / ironlake / raminit.c
blobb2620975a7b6741f307fde5f70a664b651bf749b
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include <console/console.h>
4 #include <commonlib/helpers.h>
5 #include <string.h>
6 #include <arch/io.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <device/smbus_host.h>
10 #include <cpu/x86/msr.h>
11 #include <cpu/x86/cache.h>
12 #include <cbmem.h>
13 #include <cf9_reset.h>
14 #include <option.h>
15 #include <device/pci_def.h>
16 #include <device/device.h>
17 #include <halt.h>
18 #include <spd.h>
19 #include <timestamp.h>
20 #include <cpu/x86/mtrr.h>
21 #include <cpu/intel/speedstep.h>
22 #include <cpu/intel/turbo.h>
23 #include <mrc_cache.h>
24 #include <southbridge/intel/ibexpeak/me.h>
25 #include <southbridge/intel/common/pmbase.h>
26 #include <delay.h>
27 #include <types.h>
29 #include "chip.h"
30 #include "ironlake.h"
31 #include "raminit.h"
32 #include "raminit_tables.h"
34 #define NORTHBRIDGE PCI_DEV(0, 0, 0)
35 #define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
36 #define GMA PCI_DEV(0, 0x2, 0x0)
38 #define FOR_ALL_RANKS \
39 for (channel = 0; channel < NUM_CHANNELS; channel++) \
40 for (slot = 0; slot < NUM_SLOTS; slot++) \
41 for (rank = 0; rank < NUM_RANKS; rank++)
43 #define FOR_POPULATED_RANKS \
44 for (channel = 0; channel < NUM_CHANNELS; channel++) \
45 for (slot = 0; slot < NUM_SLOTS; slot++) \
46 for (rank = 0; rank < NUM_RANKS; rank++) \
47 if (info->populated_ranks[channel][slot][rank])
49 #define FOR_POPULATED_RANKS_BACKWARDS \
50 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
51 for (slot = 0; slot < NUM_SLOTS; slot++) \
52 for (rank = 0; rank < NUM_RANKS; rank++) \
53 if (info->populated_ranks[channel][slot][rank])
55 #include <lib.h> /* Prototypes */
57 typedef struct _u128 {
58 u64 lo;
59 u64 hi;
60 } u128;
62 static void read128(u32 addr, u64 * out)
64 u128 ret;
65 u128 stor;
66 asm volatile ("movdqu %%xmm0, %0\n"
67 "movdqa (%2), %%xmm0\n"
68 "movdqu %%xmm0, %1\n"
69 "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
70 out[0] = ret.lo;
71 out[1] = ret.hi;
75 * Ironlake memory I/O timings are located in scan chains, accessible
76 * through MCHBAR register groups. Each channel has a scan chain, and
77 * there's a global scan chain too. Each chain is broken into smaller
78 * sections of N bits, where N <= 32. Each section allows reading and
79 * writing a certain parameter. Each section contains N - 2 data bits
80 * and two additional bits: a Mask bit, and a Halt bit.
83 /* OK */
84 static void write_1d0(u32 val, u16 addr, int bits, int flag)
86 mchbar_write32(0x1d0, 0);
87 while (mchbar_read32(0x1d0) & (1 << 23))
89 mchbar_write32(0x1d4, (val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
90 mchbar_write32(0x1d0, 1 << 30 | addr);
91 while (mchbar_read32(0x1d0) & (1 << 23))
95 /* OK */
96 static u16 read_1d0(u16 addr, int split)
98 u32 val;
99 mchbar_write32(0x1d0, 0);
100 while (mchbar_read32(0x1d0) & (1 << 23))
102 mchbar_write32(0x1d0, 1 << 31 | (((mchbar_read8(0x246) >> 2) & 3) + 0x361 - addr));
103 while (mchbar_read32(0x1d0) & (1 << 23))
105 val = mchbar_read32(0x1d8);
106 write_1d0(0, 0x33d, 0, 0);
107 write_1d0(0, 0x33d, 0, 0);
108 val &= ((1 << split) - 1);
109 // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
110 return val;
113 static void sfence(void)
115 asm volatile ("sfence");
118 static inline u16 get_lane_offset(int slot, int rank, int lane)
120 return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
121 0x452 * (lane == 8);
124 static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
126 const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
127 return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
130 static u32 gav_real(int line, u32 in)
132 // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
133 return in;
136 #define gav(x) gav_real(__LINE__, (x))
138 /* Global allocation of timings_car */
139 timing_bounds_t timings_car[64];
141 /* OK */
142 static u16
143 read_500(struct raminfo *info, int channel, u16 addr, int split)
145 u32 val;
146 info->last_500_command[channel] = 1 << 31;
147 mchbar_write32(0x500 + (channel << 10), 0);
148 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
150 mchbar_write32(0x500 + (channel << 10),
151 1 << 31 | (((mchbar_read8(0x246 + (channel << 10)) >> 2) & 3) + 0xb88 - addr));
152 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
154 val = mchbar_read32(0x508 + (channel << 10));
155 return val & ((1 << split) - 1);
158 /* OK */
159 static void
160 write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
161 int flag)
163 if (info->last_500_command[channel] == 1 << 31) {
164 info->last_500_command[channel] = 1 << 30;
165 write_500(info, channel, 0, 0xb61, 0, 0);
167 mchbar_write32(0x500 + (channel << 10), 0);
168 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
170 mchbar_write32(0x504 + (channel << 10),
171 (val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
172 mchbar_write32(0x500 + (channel << 10), 1 << 30 | addr);
173 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
177 static void rmw_500(struct raminfo *info, int channel, u16 addr, int bits, u32 and, u32 or)
179 const u32 val = read_500(info, channel, addr, bits) & and;
180 write_500(info, channel, val | or, addr, bits, 1);
183 static int rw_test(int rank)
185 const u32 mask = 0xf00fc33c;
186 int ok = 0xff;
187 int i;
188 for (i = 0; i < 64; i++)
189 write32p((rank << 28) | (i << 2), 0);
190 sfence();
191 for (i = 0; i < 64; i++)
192 gav(read32p((rank << 28) | (i << 2)));
193 sfence();
194 for (i = 0; i < 32; i++) {
195 u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
196 write32p((rank << 28) | (i << 3), pat);
197 write32p((rank << 28) | (i << 3) | 4, pat);
199 sfence();
200 for (i = 0; i < 32; i++) {
201 u8 pat = (((mask >> i) & 1) ? 0xff : 0);
202 int j;
203 u32 val;
204 gav(val = read32p((rank << 28) | (i << 3)));
205 for (j = 0; j < 4; j++)
206 if (((val >> (j * 8)) & 0xff) != pat)
207 ok &= ~(1 << j);
208 gav(val = read32p((rank << 28) | (i << 3) | 4));
209 for (j = 0; j < 4; j++)
210 if (((val >> (j * 8)) & 0xff) != pat)
211 ok &= ~(16 << j);
213 sfence();
214 for (i = 0; i < 64; i++)
215 write32p((rank << 28) | (i << 2), 0);
216 sfence();
217 for (i = 0; i < 64; i++)
218 gav(read32p((rank << 28) | (i << 2)));
220 return ok;
223 static void
224 program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
226 int lane;
227 for (lane = 0; lane < 8; lane++) {
228 write_500(info, channel,
229 base +
230 info->training.
231 lane_timings[2][channel][slot][rank][lane],
232 get_timing_register_addr(lane, 2, slot, rank), 9, 0);
233 write_500(info, channel,
234 base +
235 info->training.
236 lane_timings[3][channel][slot][rank][lane],
237 get_timing_register_addr(lane, 3, slot, rank), 9, 0);
241 static void write_26c(int channel, u16 si)
243 mchbar_write32(0x26c + (channel << 10), 0x03243f35);
244 mchbar_write32(0x268 + (channel << 10), 0xcfc00000 | si << 9);
245 mchbar_write16(0x2b9 + (channel << 10), si);
248 static void toggle_1d0_142_5ff(void)
250 u32 reg32 = gav(read_1d0(0x142, 3));
251 if (reg32 & (1 << 1))
252 write_1d0(0, 0x142, 3, 1);
254 mchbar_write8(0x5ff, 0);
255 mchbar_write8(0x5ff, 1 << 7);
256 if (reg32 & (1 << 1))
257 write_1d0(0x2, 0x142, 3, 1);
260 static u32 get_580(int channel, u8 addr)
262 u32 ret;
263 toggle_1d0_142_5ff();
264 mchbar_write32(0x580 + (channel << 10), 0x8493c012 | addr);
265 mchbar_setbits8(0x580 + (channel << 10), 1 << 0);
266 while (!((ret = mchbar_read32(0x580 + (channel << 10))) & (1 << 16)))
268 mchbar_clrbits8(0x580 + (channel << 10), 1 << 0);
269 return ret;
272 #define RANK_SHIFT 28
273 #define CHANNEL_SHIFT 10
275 static void seq9(struct raminfo *info, int channel, int slot, int rank)
277 int i, lane;
279 for (i = 0; i < 2; i++)
280 for (lane = 0; lane < 8; lane++)
281 write_500(info, channel,
282 info->training.lane_timings[i +
283 1][channel][slot]
284 [rank][lane], get_timing_register_addr(lane,
285 i + 1,
286 slot,
287 rank),
288 9, 0);
290 write_1d0(1, 0x103, 6, 1);
291 for (lane = 0; lane < 8; lane++)
292 write_500(info, channel,
293 info->training.
294 lane_timings[0][channel][slot][rank][lane],
295 get_timing_register_addr(lane, 0, slot, rank), 9, 0);
297 for (i = 0; i < 2; i++) {
298 for (lane = 0; lane < 8; lane++)
299 write_500(info, channel,
300 info->training.lane_timings[i +
301 1][channel][slot]
302 [rank][lane], get_timing_register_addr(lane,
303 i + 1,
304 slot,
305 rank),
306 9, 0);
307 gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
310 toggle_1d0_142_5ff();
311 write_1d0(0x2, 0x142, 3, 1);
313 for (lane = 0; lane < 8; lane++) {
314 // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
315 info->training.lane_timings[2][channel][slot][rank][lane] =
316 read_500(info, channel,
317 get_timing_register_addr(lane, 2, slot, rank), 9);
318 //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
319 info->training.lane_timings[3][channel][slot][rank][lane] =
320 info->training.lane_timings[2][channel][slot][rank][lane] +
321 0x20;
325 static int count_ranks_in_channel(struct raminfo *info, int channel)
327 int slot, rank;
328 int res = 0;
329 for (slot = 0; slot < NUM_SLOTS; slot++)
330 for (rank = 0; rank < NUM_SLOTS; rank++)
331 res += info->populated_ranks[channel][slot][rank];
332 return res;
335 static void
336 config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
338 int add;
340 write_1d0(0, 0x178, 7, 1);
341 seq9(info, channel, slot, rank);
342 program_timings(info, 0x80, channel, slot, rank);
344 if (channel == 0)
345 add = count_ranks_in_channel(info, 1);
346 else
347 add = 0;
348 if (!s3resume)
349 gav(rw_test(rank + add));
350 program_timings(info, 0x00, channel, slot, rank);
351 if (!s3resume)
352 gav(rw_test(rank + add));
353 if (!s3resume)
354 gav(rw_test(rank + add));
355 write_1d0(0, 0x142, 3, 1);
356 write_1d0(0, 0x103, 6, 1);
358 gav(get_580(channel, 0xc | (rank << 5)));
359 gav(read_1d0(0x142, 3));
361 mchbar_write8(0x5ff, 0);
362 mchbar_write8(0x5ff, 1 << 7);
365 static void set_4cf(struct raminfo *info, int channel, u8 bit, u8 val)
367 const u16 regtable[] = { 0x4cf, 0x659, 0x697 };
369 val &= 1;
370 for (int i = 0; i < ARRAY_SIZE(regtable); i++)
371 rmw_500(info, channel, regtable[i], 4, ~(1 << bit), val << bit);
374 static void set_334(int zero)
376 int j, k, channel;
377 const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
378 u32 vd8[2][16];
380 for (channel = 0; channel < NUM_CHANNELS; channel++) {
381 for (j = 0; j < 4; j++) {
382 u32 a = (j == 1) ? 0x29292929 : 0x31313131;
383 u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
384 u16 c;
385 if ((j == 0 || j == 3) && zero)
386 c = 0;
387 else if (j == 3)
388 c = 0x5f;
389 else
390 c = 0x5f5f;
392 for (k = 0; k < 2; k++) {
393 mchbar_write32(0x138 + 8 * k, channel << 26 | j << 24);
394 gav(vd8[1][(channel << 3) | (j << 1) | k] =
395 mchbar_read32(0x138 + 8 * k));
396 gav(vd8[0][(channel << 3) | (j << 1) | k] =
397 mchbar_read32(0x13c + 8 * k));
400 mchbar_write32(0x334 + (channel << 10) + j * 0x44, zero ? 0 : val3[j]);
401 mchbar_write32(0x32c + (channel << 10) + j * 0x44,
402 zero ? 0 : 0x18191819 & lmask);
403 mchbar_write16(0x34a + (channel << 10) + j * 0x44, c);
404 mchbar_write32(0x33c + (channel << 10) + j * 0x44,
405 zero ? 0 : a & lmask);
406 mchbar_write32(0x344 + (channel << 10) + j * 0x44,
407 zero ? 0 : a & lmask);
411 mchbar_setbits32(0x130, 1 << 0);
412 while (mchbar_read8(0x130) & 1)
416 static void rmw_1d0(u16 addr, u32 and, u32 or, int split)
418 u32 v;
419 v = read_1d0(addr, split);
420 write_1d0((v & and) | or, addr, split, 1);
423 static int find_highest_bit_set(u16 val)
425 int i;
426 for (i = 15; i >= 0; i--)
427 if (val & (1 << i))
428 return i;
429 return -1;
432 static int find_lowest_bit_set32(u32 val)
434 int i;
435 for (i = 0; i < 32; i++)
436 if (val & (1 << i))
437 return i;
438 return -1;
441 enum {
442 DEVICE_TYPE = 2,
443 MODULE_TYPE = 3,
444 DENSITY = 4,
445 RANKS_AND_DQ = 7,
446 MEMORY_BUS_WIDTH = 8,
447 TIMEBASE_DIVIDEND = 10,
448 TIMEBASE_DIVISOR = 11,
449 CYCLETIME = 12,
451 CAS_LATENCIES_LSB = 14,
452 CAS_LATENCIES_MSB = 15,
453 CAS_LATENCY_TIME = 16,
454 THERMAL_AND_REFRESH = 31,
455 REFERENCE_RAW_CARD_USED = 62,
456 RANK1_ADDRESS_MAPPING = 63
459 static void calculate_timings(struct raminfo *info)
461 unsigned int cycletime;
462 unsigned int cas_latency_time;
463 unsigned int supported_cas_latencies;
464 unsigned int channel, slot;
465 unsigned int clock_speed_index;
466 unsigned int min_cas_latency;
467 unsigned int cas_latency;
468 unsigned int max_clock_index;
470 /* Find common CAS latency */
471 supported_cas_latencies = 0x3fe;
472 for (channel = 0; channel < NUM_CHANNELS; channel++)
473 for (slot = 0; slot < NUM_SLOTS; slot++)
474 if (info->populated_ranks[channel][slot][0])
475 supported_cas_latencies &=
477 (info->
478 spd[channel][slot][CAS_LATENCIES_LSB] |
479 (info->
480 spd[channel][slot][CAS_LATENCIES_MSB] <<
481 8));
483 max_clock_index = MIN(3, info->max_supported_clock_speed_index);
485 cycletime = min_cycletime[max_clock_index];
486 cas_latency_time = min_cas_latency_time[max_clock_index];
488 for (channel = 0; channel < NUM_CHANNELS; channel++)
489 for (slot = 0; slot < NUM_SLOTS; slot++)
490 if (info->populated_ranks[channel][slot][0]) {
491 unsigned int timebase;
492 timebase =
493 1000 *
494 info->
495 spd[channel][slot][TIMEBASE_DIVIDEND] /
496 info->spd[channel][slot][TIMEBASE_DIVISOR];
497 cycletime =
498 MAX(cycletime,
499 timebase *
500 info->spd[channel][slot][CYCLETIME]);
501 cas_latency_time =
502 MAX(cas_latency_time,
503 timebase *
504 info->
505 spd[channel][slot][CAS_LATENCY_TIME]);
507 if (cycletime > min_cycletime[0])
508 die("RAM init: Decoded SPD DRAM freq is slower than the controller minimum!");
509 for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
510 if (cycletime == min_cycletime[clock_speed_index])
511 break;
512 if (cycletime > min_cycletime[clock_speed_index]) {
513 clock_speed_index--;
514 cycletime = min_cycletime[clock_speed_index];
515 break;
518 min_cas_latency = DIV_ROUND_UP(cas_latency_time, cycletime);
519 cas_latency = 0;
520 while (supported_cas_latencies) {
521 cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
522 if (cas_latency <= min_cas_latency)
523 break;
524 supported_cas_latencies &=
525 ~(1 << find_highest_bit_set(supported_cas_latencies));
528 if (cas_latency != min_cas_latency && clock_speed_index)
529 clock_speed_index--;
531 if (cas_latency * min_cycletime[clock_speed_index] > 20000)
532 die("Couldn't configure DRAM");
533 info->clock_speed_index = clock_speed_index;
534 info->cas_latency = cas_latency;
537 static void program_base_timings(struct raminfo *info)
539 unsigned int channel;
540 unsigned int slot, rank, lane;
541 unsigned int extended_silicon_revision;
542 int i;
544 extended_silicon_revision = info->silicon_revision;
545 if (info->silicon_revision == 0)
546 for (channel = 0; channel < NUM_CHANNELS; channel++)
547 for (slot = 0; slot < NUM_SLOTS; slot++)
548 if ((info->
549 spd[channel][slot][MODULE_TYPE] & 0xF) ==
551 extended_silicon_revision = 4;
553 for (channel = 0; channel < NUM_CHANNELS; channel++) {
554 for (slot = 0; slot < NUM_SLOTS; slot++)
555 for (rank = 0; rank < NUM_SLOTS; rank++) {
556 int card_timing_2;
557 if (!info->populated_ranks[channel][slot][rank])
558 continue;
560 for (lane = 0; lane < 9; lane++) {
561 int tm_reg;
562 int card_timing;
564 card_timing = 0;
565 if ((info->
566 spd[channel][slot][MODULE_TYPE] &
567 0xF) == 3) {
568 int reference_card;
569 reference_card =
570 info->
571 spd[channel][slot]
572 [REFERENCE_RAW_CARD_USED] &
573 0x1f;
574 if (reference_card == 3)
575 card_timing =
576 u16_ffd1188[0][lane]
577 [info->
578 clock_speed_index];
579 if (reference_card == 5)
580 card_timing =
581 u16_ffd1188[1][lane]
582 [info->
583 clock_speed_index];
586 info->training.
587 lane_timings[0][channel][slot][rank]
588 [lane] =
589 u8_FFFD1218[info->
590 clock_speed_index];
591 info->training.
592 lane_timings[1][channel][slot][rank]
593 [lane] = 256;
595 for (tm_reg = 2; tm_reg < 4; tm_reg++)
596 info->training.
597 lane_timings[tm_reg]
598 [channel][slot][rank][lane]
600 u8_FFFD1240[channel]
601 [extended_silicon_revision]
602 [lane][2 * slot +
603 rank][info->
604 clock_speed_index]
605 + info->max4048[channel]
607 u8_FFFD0C78[channel]
608 [extended_silicon_revision]
609 [info->
610 mode4030[channel]][slot]
611 [rank][info->
612 clock_speed_index]
613 + card_timing;
614 for (tm_reg = 0; tm_reg < 4; tm_reg++)
615 write_500(info, channel,
616 info->training.
617 lane_timings[tm_reg]
618 [channel][slot][rank]
619 [lane],
620 get_timing_register_addr
621 (lane, tm_reg, slot,
622 rank), 9, 0);
625 card_timing_2 = 0;
626 if (!(extended_silicon_revision != 4
627 || (info->
628 populated_ranks_mask[channel] & 5) ==
629 5)) {
630 if ((info->
631 spd[channel][slot]
632 [REFERENCE_RAW_CARD_USED] & 0x1F)
633 == 3)
634 card_timing_2 =
635 u16_FFFE0EB8[0][info->
636 clock_speed_index];
637 if ((info->
638 spd[channel][slot]
639 [REFERENCE_RAW_CARD_USED] & 0x1F)
640 == 5)
641 card_timing_2 =
642 u16_FFFE0EB8[1][info->
643 clock_speed_index];
646 for (i = 0; i < 3; i++)
647 write_500(info, channel,
648 (card_timing_2 +
649 info->max4048[channel]
651 u8_FFFD0EF8[channel]
652 [extended_silicon_revision]
653 [info->
654 mode4030[channel]][info->
655 clock_speed_index]),
656 u16_fffd0c50[i][slot][rank],
657 8, 1);
658 write_500(info, channel,
659 (info->max4048[channel] +
660 u8_FFFD0C78[channel]
661 [extended_silicon_revision][info->
662 mode4030
663 [channel]]
664 [slot][rank][info->
665 clock_speed_index]),
666 u16_fffd0c70[slot][rank], 7, 1);
668 if (!info->populated_ranks_mask[channel])
669 continue;
670 for (i = 0; i < 3; i++)
671 write_500(info, channel,
672 (info->max4048[channel] +
673 info->avg4044[channel]
675 u8_FFFD17E0[channel]
676 [extended_silicon_revision][info->
677 mode4030
678 [channel]][info->
679 clock_speed_index]),
680 u16_fffd0c68[i], 8, 1);
684 /* The time of clock cycle in ps. */
685 static unsigned int cycle_ps(struct raminfo *info)
687 return 2 * halfcycle_ps(info);
690 /* Frequency in 0.1 MHz units. */
691 static unsigned int frequency_01(struct raminfo *info)
693 return 100 * frequency_11(info) / 9;
696 static unsigned int ps_to_halfcycles(struct raminfo *info, unsigned int ps)
698 return (frequency_11(info) * 2) * ps / 900000;
701 static unsigned int ns_to_cycles(struct raminfo *info, unsigned int ns)
703 return (frequency_11(info)) * ns / 900;
706 static void compute_derived_timings(struct raminfo *info)
708 unsigned int channel, slot, rank;
709 int extended_silicon_revision;
710 int some_delay_1_ps;
711 int some_delay_2_ps;
712 int some_delay_2_halfcycles_ceil;
713 int some_delay_2_halfcycles_floor;
714 int some_delay_3_ps;
715 int some_delay_3_ps_rounded;
716 int some_delay_1_cycle_ceil;
717 int some_delay_1_cycle_floor;
719 some_delay_3_ps_rounded = 0;
720 extended_silicon_revision = info->silicon_revision;
721 if (!info->silicon_revision)
722 for (channel = 0; channel < NUM_CHANNELS; channel++)
723 for (slot = 0; slot < NUM_SLOTS; slot++)
724 if ((info->
725 spd[channel][slot][MODULE_TYPE] & 0xF) ==
727 extended_silicon_revision = 4;
728 if (info->board_lane_delay[7] < 5)
729 info->board_lane_delay[7] = 5;
730 info->revision_flag_1 = 2;
731 if (info->silicon_revision == 2 || info->silicon_revision == 3)
732 info->revision_flag_1 = 0;
733 if (info->revision < 16)
734 info->revision_flag_1 = 0;
736 if (info->revision < 8)
737 info->revision_flag_1 = 0;
738 if (info->revision >= 8 && (info->silicon_revision == 0
739 || info->silicon_revision == 1))
740 some_delay_2_ps = 735;
741 else
742 some_delay_2_ps = 750;
744 if (info->revision >= 0x10 && (info->silicon_revision == 0
745 || info->silicon_revision == 1))
746 some_delay_1_ps = 3929;
747 else
748 some_delay_1_ps = 3490;
750 some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
751 some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
752 if (some_delay_1_ps % cycle_ps(info))
753 some_delay_1_cycle_ceil++;
754 else
755 some_delay_1_cycle_floor--;
756 info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
757 if (info->revision_flag_1)
758 some_delay_2_ps = halfcycle_ps(info) >> 6;
759 some_delay_2_ps +=
760 MAX(some_delay_1_ps - 30,
761 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
762 375;
763 some_delay_3_ps =
764 halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
765 if (info->revision_flag_1) {
766 if (some_delay_3_ps >= 150) {
767 const int some_delay_3_halfcycles =
768 (some_delay_3_ps << 6) / halfcycle_ps(info);
769 some_delay_3_ps_rounded =
770 halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
773 some_delay_2_halfcycles_ceil =
774 (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
775 2 * (some_delay_1_cycle_ceil - 1);
776 if (info->revision_flag_1 && some_delay_3_ps < 150)
777 some_delay_2_halfcycles_ceil++;
778 some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
779 if (info->revision < 0x10)
780 some_delay_2_halfcycles_floor =
781 some_delay_2_halfcycles_ceil - 1;
782 if (!info->revision_flag_1)
783 some_delay_2_halfcycles_floor++;
784 /* FIXME: this variable is unused. Should it be used? */
785 (void)some_delay_2_halfcycles_floor;
786 info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
787 info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
788 if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
789 || (info->populated_ranks[1][0][0]
790 && info->populated_ranks[1][1][0]))
791 info->max_slots_used_in_channel = 2;
792 else
793 info->max_slots_used_in_channel = 1;
794 for (channel = 0; channel < NUM_CHANNELS; channel++)
795 mchbar_write32(0x244 + (channel << 10),
796 ((info->revision < 8) ? 1 : 0x200) |
797 ((2 - info->max_slots_used_in_channel) << 17) |
798 (channel << 21) |
799 (info->some_delay_1_cycle_floor << 18) | 0x9510);
800 if (info->max_slots_used_in_channel == 1) {
801 info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
802 info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
803 } else {
804 info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
805 info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
806 || (count_ranks_in_channel(info, 1) ==
807 2)) ? 2 : 3;
809 for (channel = 0; channel < NUM_CHANNELS; channel++) {
810 int max_of_unk;
811 int min_of_unk_2;
813 int i, count;
814 int sum;
816 if (!info->populated_ranks_mask[channel])
817 continue;
819 max_of_unk = 0;
820 min_of_unk_2 = 32767;
822 sum = 0;
823 count = 0;
824 for (i = 0; i < 3; i++) {
825 int unk1;
826 if (info->revision < 8)
827 unk1 =
828 u8_FFFD1891[0][channel][info->
829 clock_speed_index]
830 [i];
831 else if (!
832 (info->revision >= 0x10
833 || info->revision_flag_1))
834 unk1 =
835 u8_FFFD1891[1][channel][info->
836 clock_speed_index]
837 [i];
838 else
839 unk1 = 0;
840 for (slot = 0; slot < NUM_SLOTS; slot++)
841 for (rank = 0; rank < NUM_RANKS; rank++) {
842 int a = 0;
843 int b = 0;
845 if (!info->
846 populated_ranks[channel][slot]
847 [rank])
848 continue;
849 if (extended_silicon_revision == 4
850 && (info->
851 populated_ranks_mask[channel] &
852 5) != 5) {
853 if ((info->
854 spd[channel][slot]
855 [REFERENCE_RAW_CARD_USED] &
856 0x1F) == 3) {
857 a = u16_ffd1178[0]
858 [info->
859 clock_speed_index];
860 b = u16_fe0eb8[0][info->
861 clock_speed_index];
862 } else
863 if ((info->
864 spd[channel][slot]
865 [REFERENCE_RAW_CARD_USED]
866 & 0x1F) == 5) {
867 a = u16_ffd1178[1]
868 [info->
869 clock_speed_index];
870 b = u16_fe0eb8[1][info->
871 clock_speed_index];
874 min_of_unk_2 = MIN(min_of_unk_2, a);
875 min_of_unk_2 = MIN(min_of_unk_2, b);
876 if (rank == 0) {
877 sum += a;
878 count++;
881 int t;
882 t = b +
883 u8_FFFD0EF8[channel]
884 [extended_silicon_revision]
885 [info->
886 mode4030[channel]][info->
887 clock_speed_index];
888 if (unk1 >= t)
889 max_of_unk =
890 MAX(max_of_unk,
891 unk1 - t);
895 int t =
896 u8_FFFD17E0[channel]
897 [extended_silicon_revision][info->
898 mode4030
899 [channel]]
900 [info->clock_speed_index] + min_of_unk_2;
901 if (unk1 >= t)
902 max_of_unk = MAX(max_of_unk, unk1 - t);
906 if (count == 0)
907 die("No memory ranks found for channel %u\n", channel);
909 info->avg4044[channel] = sum / count;
910 info->max4048[channel] = max_of_unk;
914 static void jedec_read(struct raminfo *info,
915 int channel, int slot, int rank,
916 int total_rank, u8 addr3, unsigned int value)
918 /* Handle mirrored mapping. */
919 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1)) {
920 addr3 = (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) & 0x10);
921 value = (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8) << 1);
924 mchbar_clrsetbits8(0x271, 0x1f << 1, addr3);
925 mchbar_clrsetbits8(0x671, 0x1f << 1, addr3);
927 read32p((value << 3) | (total_rank << 28));
929 mchbar_clrsetbits8(0x271, 0x1f << 1, 1 << 1);
930 mchbar_clrsetbits8(0x671, 0x1f << 1, 1 << 1);
932 read32p(total_rank << 28);
935 enum {
936 MR1_RZQ12 = 512,
937 MR1_RZQ2 = 64,
938 MR1_RZQ4 = 4,
939 MR1_ODS34OHM = 2
942 enum {
943 MR0_BT_INTERLEAVED = 8,
944 MR0_DLL_RESET_ON = 256
947 enum {
948 MR2_RTT_WR_DISABLED = 0,
949 MR2_RZQ2 = 1 << 10
952 static void jedec_init(struct raminfo *info)
954 int write_recovery;
955 int channel, slot, rank;
956 int total_rank;
957 int dll_on;
958 int self_refresh_temperature;
959 int auto_self_refresh;
961 auto_self_refresh = 1;
962 self_refresh_temperature = 1;
963 if (info->board_lane_delay[3] <= 10) {
964 if (info->board_lane_delay[3] <= 8)
965 write_recovery = info->board_lane_delay[3] - 4;
966 else
967 write_recovery = 5;
968 } else {
969 write_recovery = 6;
971 FOR_POPULATED_RANKS {
972 auto_self_refresh &=
973 (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
974 self_refresh_temperature &=
975 info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
977 if (auto_self_refresh == 1)
978 self_refresh_temperature = 0;
980 dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
981 || (info->populated_ranks[0][0][0]
982 && info->populated_ranks[0][1][0])
983 || (info->populated_ranks[1][0][0]
984 && info->populated_ranks[1][1][0]));
986 total_rank = 0;
988 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
989 int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
990 int rzq_reg58e;
992 if (info->silicon_revision == 2 || info->silicon_revision == 3) {
993 rzq_reg58e = 64;
994 rtt = MR1_RZQ2;
995 if (info->clock_speed_index != 0) {
996 rzq_reg58e = 4;
997 if (info->populated_ranks_mask[channel] == 3)
998 rtt = MR1_RZQ4;
1000 } else {
1001 if ((info->populated_ranks_mask[channel] & 5) == 5) {
1002 rtt = MR1_RZQ12;
1003 rzq_reg58e = 64;
1004 rtt_wr = MR2_RZQ2;
1005 } else {
1006 rzq_reg58e = 4;
1007 rtt = MR1_RZQ4;
1011 mchbar_write16(0x588 + (channel << 10), 0);
1012 mchbar_write16(0x58a + (channel << 10), 4);
1013 mchbar_write16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
1014 mchbar_write16(0x58e + (channel << 10), rzq_reg58e | 0x82);
1015 mchbar_write16(0x590 + (channel << 10), 0x1282);
1017 for (slot = 0; slot < NUM_SLOTS; slot++)
1018 for (rank = 0; rank < NUM_RANKS; rank++)
1019 if (info->populated_ranks[channel][slot][rank]) {
1020 jedec_read(info, channel, slot, rank,
1021 total_rank, 0x28,
1022 rtt_wr | (info->
1023 clock_speed_index
1024 << 3)
1025 | (auto_self_refresh << 6) |
1026 (self_refresh_temperature <<
1027 7));
1028 jedec_read(info, channel, slot, rank,
1029 total_rank, 0x38, 0);
1030 jedec_read(info, channel, slot, rank,
1031 total_rank, 0x18,
1032 rtt | MR1_ODS34OHM);
1033 jedec_read(info, channel, slot, rank,
1034 total_rank, 6,
1035 (dll_on << 12) |
1036 (write_recovery << 9)
1037 | ((info->cas_latency - 4) <<
1038 4) | MR0_BT_INTERLEAVED |
1039 MR0_DLL_RESET_ON);
1040 total_rank++;
1045 static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
1047 unsigned int channel, slot, rank;
1048 unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */
1049 unsigned int channel_0_non_interleaved;
1051 FOR_ALL_RANKS {
1052 if (info->populated_ranks[channel][slot][rank]) {
1053 total_mb[channel] +=
1054 pre_jedec ? 256 : (256 << info->
1055 density[channel][slot] >> info->
1056 is_x16_module[channel][slot]);
1057 mchbar_write8(0x208 + rank + 2 * slot + (channel << 10),
1058 (pre_jedec ? (1 | ((1 + 1) << 1)) :
1059 (info->is_x16_module[channel][slot] |
1060 ((info->density[channel][slot] + 1) << 1))) |
1061 0x80);
1063 mchbar_write16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
1064 total_mb[channel] >> 6);
1067 info->total_memory_mb = total_mb[0] + total_mb[1];
1069 info->interleaved_part_mb =
1070 pre_jedec ? 0 : 2 * MIN(total_mb[0], total_mb[1]);
1071 info->non_interleaved_part_mb =
1072 total_mb[0] + total_mb[1] - info->interleaved_part_mb;
1073 channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
1074 mchbar_write32(0x100, channel_0_non_interleaved | info->non_interleaved_part_mb << 16);
1075 if (!pre_jedec)
1076 mchbar_write16(0x104, info->interleaved_part_mb);
1079 static void program_board_delay(struct raminfo *info)
1081 int cas_latency_shift;
1082 int some_delay_ns;
1083 int some_delay_3_half_cycles;
1085 unsigned int channel, i;
1086 int high_multiplier;
1087 int lane_3_delay;
1088 int cas_latency_derived;
1090 high_multiplier = 0;
1091 some_delay_ns = 200;
1092 some_delay_3_half_cycles = 4;
1093 cas_latency_shift = info->silicon_revision == 0
1094 || info->silicon_revision == 1 ? 1 : 0;
1095 if (info->revision < 8) {
1096 some_delay_ns = 600;
1097 cas_latency_shift = 0;
1100 int speed_bit;
1101 speed_bit =
1102 ((info->clock_speed_index > 1
1103 || (info->silicon_revision != 2
1104 && info->silicon_revision != 3))) ^ (info->revision >=
1105 0x10);
1106 write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1107 3, 1);
1108 write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1109 3, 1);
1110 if (info->revision >= 0x10 && info->clock_speed_index <= 1
1111 && (info->silicon_revision == 2
1112 || info->silicon_revision == 3))
1113 rmw_1d0(0x116, 5, 2, 4);
1115 mchbar_write32(0x120, 1 << (info->max_slots_used_in_channel + 28) | 0x188e7f9f);
1117 mchbar_write8(0x124, info->board_lane_delay[4] + (frequency_01(info) + 999) / 1000);
1118 mchbar_write16(0x125, 0x1360);
1119 mchbar_write8(0x127, 0x40);
1120 if (info->fsb_frequency < frequency_11(info) / 2) {
1121 unsigned int some_delay_2_half_cycles;
1122 high_multiplier = 1;
1123 some_delay_2_half_cycles = ps_to_halfcycles(info,
1124 ((3 *
1125 fsbcycle_ps(info))
1126 >> 1) +
1127 (halfcycle_ps(info)
1129 reg178_min[info->
1130 clock_speed_index]
1131 >> 6)
1134 halfcycle_ps(info)
1135 + 2230);
1136 some_delay_3_half_cycles =
1137 MIN((some_delay_2_half_cycles +
1138 (frequency_11(info) * 2) * (28 -
1139 some_delay_2_half_cycles) /
1140 (frequency_11(info) * 2 -
1141 4 * (info->fsb_frequency))) >> 3, 7);
1143 if (mchbar_read8(0x2ca9) & 1)
1144 some_delay_3_half_cycles = 3;
1145 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1146 mchbar_setbits32(0x220 + (channel << 10), 0x18001117);
1147 mchbar_write32(0x224 + (channel << 10),
1148 (info->max_slots_used_in_channel - 1) |
1149 (info->cas_latency - 5 - info->clock_speed_index)
1150 << 21 | (info->max_slots_used_in_channel +
1151 info->cas_latency - cas_latency_shift - 4) << 16 |
1152 (info->cas_latency - cas_latency_shift - 4) << 26 |
1153 (info->cas_latency - info->clock_speed_index +
1154 info->max_slots_used_in_channel - 6) << 8);
1155 mchbar_write32(0x228 + (channel << 10), info->max_slots_used_in_channel);
1156 mchbar_write8(0x239 + (channel << 10), 32);
1157 mchbar_write32(0x248 + (channel << 10), high_multiplier << 24 |
1158 some_delay_3_half_cycles << 25 | 0x840000);
1159 mchbar_write32(0x278 + (channel << 10), 0xc362042);
1160 mchbar_write32(0x27c + (channel << 10), 0x8b000062);
1161 mchbar_write32(0x24c + (channel << 10),
1162 (!!info->clock_speed_index) << 17 |
1163 ((2 + info->clock_speed_index -
1164 (!!info->clock_speed_index))) << 12 | 0x10200);
1166 mchbar_write8(0x267 + (channel << 10), 4);
1167 mchbar_write16(0x272 + (channel << 10), 0x155);
1168 mchbar_clrsetbits32(0x2bc + (channel << 10), 0xffffff, 0x707070);
1170 write_500(info, channel,
1171 ((!info->populated_ranks[channel][1][1])
1172 | (!info->populated_ranks[channel][1][0] << 1)
1173 | (!info->populated_ranks[channel][0][1] << 2)
1174 | (!info->populated_ranks[channel][0][0] << 3)),
1175 0x4c9, 4, 1);
1178 mchbar_write8(0x2c4, (1 + (info->clock_speed_index != 0)) << 6 | 0xc);
1180 u8 freq_divisor = 2;
1181 if (info->fsb_frequency == frequency_11(info))
1182 freq_divisor = 3;
1183 else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
1184 freq_divisor = 1;
1185 else
1186 freq_divisor = 2;
1187 mchbar_write32(0x2c0, freq_divisor << 11 | 0x6009c400);
1190 if (info->board_lane_delay[3] <= 10) {
1191 if (info->board_lane_delay[3] <= 8)
1192 lane_3_delay = info->board_lane_delay[3];
1193 else
1194 lane_3_delay = 10;
1195 } else {
1196 lane_3_delay = 12;
1198 cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
1199 if (info->clock_speed_index > 1)
1200 cas_latency_derived++;
1201 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1202 mchbar_write32(0x240 + (channel << 10),
1203 ((info->clock_speed_index == 0) * 0x11000) |
1204 0x1002100 | (2 + info->clock_speed_index) << 4 |
1205 (info->cas_latency - 3));
1206 write_500(info, channel, (info->clock_speed_index << 1) | 1,
1207 0x609, 6, 1);
1208 write_500(info, channel,
1209 info->clock_speed_index + 2 * info->cas_latency - 7,
1210 0x601, 6, 1);
1212 mchbar_write32(0x250 + (channel << 10),
1213 (lane_3_delay + info->clock_speed_index + 9) << 6 |
1214 info->board_lane_delay[7] << 2 |
1215 info->board_lane_delay[4] << 16 |
1216 info->board_lane_delay[1] << 25 |
1217 info->board_lane_delay[1] << 29 | 1);
1218 mchbar_write32(0x254 + (channel << 10),
1219 info->board_lane_delay[1] >> 3 |
1220 (info->board_lane_delay[8] + 4 * info->use_ecc) << 6 |
1221 0x80 | info->board_lane_delay[6] << 1 |
1222 info->board_lane_delay[2] << 28 |
1223 cas_latency_derived << 16 | 0x4700000);
1224 mchbar_write32(0x258 + (channel << 10),
1225 (info->board_lane_delay[5] + info->clock_speed_index + 9) << 12 |
1226 (info->clock_speed_index - info->cas_latency + 12) << 8 |
1227 info->board_lane_delay[2] << 17 |
1228 info->board_lane_delay[4] << 24 | 0x47);
1229 mchbar_write32(0x25c + (channel << 10),
1230 info->board_lane_delay[1] << 1 |
1231 info->board_lane_delay[0] << 8 | 0x1da50000);
1232 mchbar_write8(0x264 + (channel << 10), 0xff);
1233 mchbar_write8(0x5f8 + (channel << 10), cas_latency_shift << 3 | info->use_ecc);
1236 program_modules_memory_map(info, 1);
1238 mchbar_clrsetbits16(0x610, 0xfe3c,
1239 MIN(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9 | 0x3c);
1240 mchbar_setbits16(0x612, 1 << 8);
1241 mchbar_setbits16(0x214, 0x3e00);
1242 for (i = 0; i < 8; i++) {
1243 pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1244 (info->total_memory_mb - 64) | !i | 2);
1245 pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1249 #define DEFAULT_PCI_MMIO_SIZE 2048
1251 static void program_total_memory_map(struct raminfo *info)
1253 unsigned int tom, tolud, touud;
1254 unsigned int quickpath_reserved;
1255 unsigned int remap_base;
1256 unsigned int uma_base_igd;
1257 unsigned int uma_base_gtt;
1258 unsigned int mmio_size;
1259 int memory_remap;
1260 unsigned int memory_map[8];
1261 int i;
1262 unsigned int current_limit;
1263 unsigned int tseg_base;
1264 int uma_size_igd = 0, uma_size_gtt = 0;
1266 memset(memory_map, 0, sizeof(memory_map));
1268 if (info->uma_enabled) {
1269 u16 t = pci_read_config16(NORTHBRIDGE, GGC);
1270 gav(t);
1271 const int uma_sizes_gtt[16] =
1272 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1273 /* Igd memory */
1274 const int uma_sizes_igd[16] = {
1275 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1276 256, 512
1279 uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1280 uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1283 mmio_size = DEFAULT_PCI_MMIO_SIZE;
1285 tom = info->total_memory_mb;
1286 if (tom == 4096)
1287 tom = 4032;
1288 touud = ALIGN_DOWN(tom - info->memory_reserved_for_heci_mb, 64);
1289 tolud = ALIGN_DOWN(MIN(4096 - mmio_size + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
1290 , touud), 64);
1291 memory_remap = 0;
1292 if (touud - tolud > 64) {
1293 memory_remap = 1;
1294 remap_base = MAX(4096, touud);
1295 touud = touud - tolud + 4096;
1297 if (touud > 4096)
1298 memory_map[2] = touud | 1;
1299 quickpath_reserved = 0;
1301 u32 t = pci_read_config32(QPI_SAD, 0x68);
1303 gav(t);
1305 if (t & 0x800) {
1306 u32 shift = t >> 20;
1307 if (shift == 0)
1308 die("Quickpath value is 0\n");
1309 quickpath_reserved = (u32)1 << find_lowest_bit_set32(shift);
1312 if (memory_remap)
1313 touud -= quickpath_reserved;
1315 uma_base_igd = tolud - uma_size_igd;
1316 uma_base_gtt = uma_base_igd - uma_size_gtt;
1317 tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
1318 if (!memory_remap)
1319 tseg_base -= quickpath_reserved;
1320 tseg_base = ALIGN_DOWN(tseg_base, 8);
1322 pci_write_config16(NORTHBRIDGE, TOLUD, tolud << 4);
1323 pci_write_config16(NORTHBRIDGE, TOM, tom >> 6);
1324 if (memory_remap) {
1325 pci_write_config16(NORTHBRIDGE, REMAPBASE, remap_base >> 6);
1326 pci_write_config16(NORTHBRIDGE, REMAPLIMIT, (touud - 64) >> 6);
1328 pci_write_config16(NORTHBRIDGE, TOUUD, touud);
1330 if (info->uma_enabled) {
1331 pci_write_config32(NORTHBRIDGE, IGD_BASE, uma_base_igd << 20);
1332 pci_write_config32(NORTHBRIDGE, GTT_BASE, uma_base_gtt << 20);
1334 pci_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
1336 current_limit = 0;
1337 memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
1338 memory_map[1] = 4096;
1339 for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
1340 current_limit = MAX(current_limit, memory_map[i] & ~1);
1341 pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1342 (memory_map[i] & 1) | ALIGN_DOWN(current_limit -
1343 1, 64) | 2);
1344 pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1348 static void collect_system_info(struct raminfo *info)
1350 u32 capid0[3];
1351 int i;
1352 unsigned int channel;
1354 for (i = 0; i < 3; i++) {
1355 capid0[i] = pci_read_config32(NORTHBRIDGE, CAPID0 | (i << 2));
1356 printk(BIOS_DEBUG, "CAPID0[%d] = 0x%08x\n", i, capid0[i]);
1358 info->revision = pci_read_config8(NORTHBRIDGE, PCI_REVISION_ID);
1359 printk(BIOS_DEBUG, "Revision ID: 0x%x\n", info->revision);
1360 printk(BIOS_DEBUG, "Device ID: 0x%x\n", pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID));
1362 info->max_supported_clock_speed_index = (~capid0[1] & 7);
1364 if ((capid0[1] >> 11) & 1)
1365 info->uma_enabled = 0;
1366 else
1367 gav(info->uma_enabled =
1368 pci_read_config8(NORTHBRIDGE, DEVEN) & 8);
1369 /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
1370 info->silicon_revision = 0;
1372 if (capid0[2] & 2) {
1373 info->silicon_revision = 0;
1374 info->max_supported_clock_speed_index = 2;
1375 for (channel = 0; channel < NUM_CHANNELS; channel++)
1376 if (info->populated_ranks[channel][0][0]
1377 && (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
1378 3) {
1379 info->silicon_revision = 2;
1380 info->max_supported_clock_speed_index = 1;
1382 } else {
1383 switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
1384 case 1:
1385 case 2:
1386 info->silicon_revision = 3;
1387 break;
1388 case 3:
1389 info->silicon_revision = 0;
1390 break;
1391 case 0:
1392 info->silicon_revision = 2;
1393 break;
1395 switch (pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
1396 case 0x40:
1397 info->silicon_revision = 0;
1398 break;
1399 case 0x48:
1400 info->silicon_revision = 1;
1401 break;
1406 static void write_training_data(struct raminfo *info)
1408 int tm, channel, slot, rank, lane;
1409 if (info->revision < 8)
1410 return;
1412 for (tm = 0; tm < 4; tm++)
1413 for (channel = 0; channel < NUM_CHANNELS; channel++)
1414 for (slot = 0; slot < NUM_SLOTS; slot++)
1415 for (rank = 0; rank < NUM_RANKS; rank++)
1416 for (lane = 0; lane < 9; lane++)
1417 write_500(info, channel,
1418 info->
1419 cached_training->
1420 lane_timings[tm]
1421 [channel][slot][rank]
1422 [lane],
1423 get_timing_register_addr
1424 (lane, tm, slot,
1425 rank), 9, 0);
1426 write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
1427 write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
1430 static void dump_timings(struct raminfo *info)
1432 int channel, slot, rank, lane, i;
1433 printk(RAM_SPEW, "Timings:\n");
1434 FOR_POPULATED_RANKS {
1435 printk(RAM_SPEW, "channel %d, slot %d, rank %d\n", channel,
1436 slot, rank);
1437 for (lane = 0; lane < 9; lane++) {
1438 printk(RAM_SPEW, "lane %d: ", lane);
1439 for (i = 0; i < 4; i++) {
1440 printk(RAM_SPEW, "%x (%x) ",
1441 read_500(info, channel,
1442 get_timing_register_addr
1443 (lane, i, slot, rank),
1445 info->training.
1446 lane_timings[i][channel][slot][rank]
1447 [lane]);
1449 printk(RAM_SPEW, "\n");
1452 printk(RAM_SPEW, "[178] = %x (%x)\n", read_1d0(0x178, 7),
1453 info->training.reg_178);
1454 printk(RAM_SPEW, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
1455 info->training.reg_10b);
1458 /* Read timings and other registers that need to be restored verbatim and
1459 put them to CBMEM.
1461 static void save_timings(struct raminfo *info)
1463 struct ram_training train;
1464 int channel, slot, rank, lane, i;
1466 train = info->training;
1467 FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
1468 for (i = 0; i < 4; i++)
1469 train.lane_timings[i][channel][slot][rank][lane] =
1470 read_500(info, channel,
1471 get_timing_register_addr(lane, i, slot,
1472 rank), 9);
1473 train.reg_178 = read_1d0(0x178, 7);
1474 train.reg_10b = read_1d0(0x10b, 6);
1476 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1477 u32 reg32;
1478 reg32 = mchbar_read32((channel << 10) + 0x274);
1479 train.reg274265[channel][0] = reg32 >> 16;
1480 train.reg274265[channel][1] = reg32 & 0xffff;
1481 train.reg274265[channel][2] = mchbar_read16((channel << 10) + 0x265) >> 8;
1483 train.reg2ca9_bit0 = mchbar_read8(0x2ca9) & 1;
1484 train.reg_6dc = mchbar_read32(0x6dc);
1485 train.reg_6e8 = mchbar_read32(0x6e8);
1487 printk(RAM_SPEW, "[6dc] = %x\n", train.reg_6dc);
1488 printk(RAM_SPEW, "[6e8] = %x\n", train.reg_6e8);
1490 /* Save the MRC S3 restore data to cbmem */
1491 mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION,
1492 &train, sizeof(train));
1495 static const struct ram_training *get_cached_training(void)
1497 return mrc_cache_current_mmap_leak(MRC_TRAINING_DATA,
1498 MRC_CACHE_VERSION,
1499 NULL);
1502 static int have_match_ranks(struct raminfo *info, int channel, int ranks)
1504 int ranks_in_channel;
1505 ranks_in_channel = info->populated_ranks[channel][0][0]
1506 + info->populated_ranks[channel][0][1]
1507 + info->populated_ranks[channel][1][0]
1508 + info->populated_ranks[channel][1][1];
1510 /* empty channel */
1511 if (ranks_in_channel == 0)
1512 return 1;
1514 if (ranks_in_channel != ranks)
1515 return 0;
1516 /* single slot */
1517 if (info->populated_ranks[channel][0][0] !=
1518 info->populated_ranks[channel][1][0])
1519 return 1;
1520 if (info->populated_ranks[channel][0][1] !=
1521 info->populated_ranks[channel][1][1])
1522 return 1;
1523 if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
1524 return 0;
1525 if (info->density[channel][0] != info->density[channel][1])
1526 return 0;
1527 return 1;
1530 static void read_4090(struct raminfo *info)
1532 int i, channel, slot, rank, lane;
1533 for (i = 0; i < 2; i++)
1534 for (slot = 0; slot < NUM_SLOTS; slot++)
1535 for (rank = 0; rank < NUM_RANKS; rank++)
1536 for (lane = 0; lane < 9; lane++)
1537 info->training.
1538 lane_timings[0][i][slot][rank][lane]
1539 = 32;
1541 for (i = 1; i < 4; i++)
1542 for (channel = 0; channel < NUM_CHANNELS; channel++)
1543 for (slot = 0; slot < NUM_SLOTS; slot++)
1544 for (rank = 0; rank < NUM_RANKS; rank++)
1545 for (lane = 0; lane < 9; lane++) {
1546 info->training.
1547 lane_timings[i][channel]
1548 [slot][rank][lane] =
1549 read_500(info, channel,
1550 get_timing_register_addr
1551 (lane, i, slot,
1552 rank), 9)
1553 + (i == 1) * 11; // !!!!
1557 static u32 get_etalon2(int flip, u32 addr)
1559 const u16 invmask[] = {
1560 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
1561 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
1563 u32 ret;
1564 u32 comp4 = addr / 480;
1565 addr %= 480;
1566 u32 comp1 = addr & 0xf;
1567 u32 comp2 = (addr >> 4) & 1;
1568 u32 comp3 = addr >> 5;
1570 if (comp4)
1571 ret = 0x1010101 << (comp4 - 1);
1572 else
1573 ret = 0;
1574 if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
1575 ret = ~ret;
1577 return ret;
1580 static void disable_cache_region(void)
1582 msr_t msr = {.lo = 0, .hi = 0 };
1584 wrmsr(MTRR_PHYS_BASE(3), msr);
1585 wrmsr(MTRR_PHYS_MASK(3), msr);
1588 static void enable_cache_region(unsigned int base, unsigned int size)
1590 msr_t msr;
1591 msr.lo = base | MTRR_TYPE_WRPROT;
1592 msr.hi = 0;
1593 wrmsr(MTRR_PHYS_BASE(3), msr);
1594 msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRR_DEF_TYPE_EN)
1595 & 0xffffffff);
1596 msr.hi = 0x0000000f;
1597 wrmsr(MTRR_PHYS_MASK(3), msr);
1600 static void flush_cache(u32 start, u32 size)
1602 u32 end;
1603 u32 addr;
1605 end = start + (ALIGN_DOWN(size + 4096, 4096));
1606 for (addr = start; addr < end; addr += 64)
1607 clflush((void *)(uintptr_t)addr);
1610 static void clear_errors(void)
1612 pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
1615 static void write_testing(struct raminfo *info, int totalrank, int flip)
1617 int nwrites = 0;
1618 /* in 8-byte units. */
1619 u32 offset;
1620 u8 *base;
1622 base = (u8 *)(uintptr_t)(totalrank << 28);
1623 for (offset = 0; offset < 9 * 480; offset += 2) {
1624 write32(base + offset * 8, get_etalon2(flip, offset));
1625 write32(base + offset * 8 + 4, get_etalon2(flip, offset));
1626 write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
1627 write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
1628 nwrites += 4;
1629 if (nwrites >= 320) {
1630 clear_errors();
1631 nwrites = 0;
1636 static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
1638 u8 failmask = 0;
1639 int i;
1640 int comp1, comp2, comp3;
1641 u32 failxor[2] = { 0, 0 };
1643 enable_cache_region((total_rank << 28), 1728 * 5 * 4);
1645 for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
1646 for (comp1 = 0; comp1 < 4; comp1++)
1647 for (comp2 = 0; comp2 < 60; comp2++) {
1648 u32 re[4];
1649 u32 curroffset =
1650 comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
1651 read128((total_rank << 28) | (curroffset << 3),
1652 (u64 *)re);
1653 failxor[0] |=
1654 get_etalon2(flip, curroffset) ^ re[0];
1655 failxor[1] |=
1656 get_etalon2(flip, curroffset) ^ re[1];
1657 failxor[0] |=
1658 get_etalon2(flip, curroffset | 1) ^ re[2];
1659 failxor[1] |=
1660 get_etalon2(flip, curroffset | 1) ^ re[3];
1662 for (i = 0; i < 8; i++)
1663 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1664 failmask |= 1 << i;
1666 disable_cache_region();
1667 flush_cache((total_rank << 28), 1728 * 5 * 4);
1668 return failmask;
1671 const u32 seed1[0x18] = {
1672 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
1673 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
1674 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
1675 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
1676 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
1677 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
1680 static u32 get_seed2(int a, int b)
1682 const u32 seed2[5] = {
1683 0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
1684 0x5b6db6db,
1686 u32 r;
1687 r = seed2[(a + (a >= 10)) / 5];
1688 return b ? ~r : r;
1691 static int make_shift(int comp2, int comp5, int x)
1693 const u8 seed3[32] = {
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
1696 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
1697 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1700 return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
1703 static u32 get_etalon(int flip, u32 addr)
1705 u32 mask_byte = 0;
1706 int comp1 = (addr >> 1) & 1;
1707 int comp2 = (addr >> 3) & 0x1f;
1708 int comp3 = (addr >> 8) & 0xf;
1709 int comp4 = (addr >> 12) & 0xf;
1710 int comp5 = (addr >> 16) & 0x1f;
1711 u32 mask_bit = ~(0x10001 << comp3);
1712 u32 part1;
1713 u32 part2;
1714 int byte;
1716 part2 =
1717 ((seed1[comp5] >>
1718 make_shift(comp2, comp5,
1719 (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
1720 part1 =
1721 ((seed1[comp5] >>
1722 make_shift(comp2, comp5,
1723 (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
1725 for (byte = 0; byte < 4; byte++)
1726 if ((get_seed2(comp5, comp4) >>
1727 make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
1728 mask_byte |= 0xff << (8 * byte);
1730 return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
1731 (comp3 + 16));
1734 static void
1735 write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1736 char flip)
1738 int i;
1739 for (i = 0; i < 2048; i++)
1740 write32p((totalrank << 28) | (region << 25) | (block << 16) |
1741 (i << 2), get_etalon(flip, (block << 16) | (i << 2)));
1744 static u8
1745 check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1746 char flip)
1748 u8 failmask = 0;
1749 u32 failxor[2];
1750 int i;
1751 int comp1, comp2, comp3;
1753 failxor[0] = 0;
1754 failxor[1] = 0;
1756 enable_cache_region(totalrank << 28, 134217728);
1757 for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
1758 for (comp1 = 0; comp1 < 16; comp1++)
1759 for (comp2 = 0; comp2 < 64; comp2++) {
1760 u32 addr =
1761 (totalrank << 28) | (region << 25) | (block
1762 << 16)
1763 | (comp3 << 12) | (comp2 << 6) | (comp1 <<
1765 failxor[comp1 & 1] |=
1766 read32p(addr) ^ get_etalon(flip, addr);
1768 for (i = 0; i < 8; i++)
1769 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1770 failmask |= 1 << i;
1772 disable_cache_region();
1773 flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
1774 return failmask;
1777 static int check_bounded(unsigned short *vals, u16 bound)
1779 int i;
1781 for (i = 0; i < 8; i++)
1782 if (vals[i] < bound)
1783 return 0;
1784 return 1;
1787 enum state {
1788 BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
1791 static int validate_state(enum state *in)
1793 int i;
1794 for (i = 0; i < 8; i++)
1795 if (in[i] != COMPLETE)
1796 return 0;
1797 return 1;
1800 static void
1801 do_fsm(enum state *state, u16 *counter,
1802 u8 fail_mask, int margin, int uplimit,
1803 u8 *res_low, u8 *res_high, u8 val)
1805 int lane;
1807 for (lane = 0; lane < 8; lane++) {
1808 int is_fail = (fail_mask >> lane) & 1;
1809 switch (state[lane]) {
1810 case BEFORE_USABLE:
1811 if (!is_fail) {
1812 counter[lane] = 1;
1813 state[lane] = AT_USABLE;
1814 break;
1816 counter[lane] = 0;
1817 state[lane] = BEFORE_USABLE;
1818 break;
1819 case AT_USABLE:
1820 if (!is_fail) {
1821 ++counter[lane];
1822 if (counter[lane] >= margin) {
1823 state[lane] = AT_MARGIN;
1824 res_low[lane] = val - margin + 1;
1825 break;
1827 state[lane] = 1;
1828 break;
1830 counter[lane] = 0;
1831 state[lane] = BEFORE_USABLE;
1832 break;
1833 case AT_MARGIN:
1834 if (is_fail) {
1835 state[lane] = COMPLETE;
1836 res_high[lane] = val - 1;
1837 } else {
1838 counter[lane]++;
1839 state[lane] = AT_MARGIN;
1840 if (val == uplimit) {
1841 state[lane] = COMPLETE;
1842 res_high[lane] = uplimit;
1845 break;
1846 case COMPLETE:
1847 break;
1852 static void
1853 train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
1854 u8 total_rank, u8 reg_178, int first_run, int niter,
1855 timing_bounds_t * timings)
1857 int lane;
1858 enum state state[8];
1859 u16 count[8];
1860 u8 lower_usable[8];
1861 u8 upper_usable[8];
1862 unsigned short num_successfully_checked[8];
1863 u8 reg1b3;
1864 int i;
1866 for (i = 0; i < 8; i++)
1867 state[i] = BEFORE_USABLE;
1869 if (!first_run) {
1870 int is_all_ok = 1;
1871 for (lane = 0; lane < 8; lane++)
1872 if (timings[reg_178][channel][slot][rank][lane].
1873 smallest ==
1874 timings[reg_178][channel][slot][rank][lane].
1875 largest) {
1876 timings[reg_178][channel][slot][rank][lane].
1877 smallest = 0;
1878 timings[reg_178][channel][slot][rank][lane].
1879 largest = 0;
1880 is_all_ok = 0;
1882 if (is_all_ok) {
1883 for (i = 0; i < 8; i++)
1884 state[i] = COMPLETE;
1888 for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
1889 u8 failmask = 0;
1890 write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
1891 write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
1892 failmask = check_testing(info, total_rank, 0);
1893 mchbar_setbits32(0xfb0, 3 << 16);
1894 do_fsm(state, count, failmask, 5, 47, lower_usable,
1895 upper_usable, reg1b3);
1898 if (reg1b3) {
1899 write_1d0(0, 0x1b3, 6, 1);
1900 write_1d0(0, 0x1a3, 6, 1);
1901 for (lane = 0; lane < 8; lane++) {
1902 if (state[lane] == COMPLETE) {
1903 timings[reg_178][channel][slot][rank][lane].
1904 smallest =
1905 lower_usable[lane] +
1906 (info->training.
1907 lane_timings[0][channel][slot][rank][lane]
1908 & 0x3F) - 32;
1909 timings[reg_178][channel][slot][rank][lane].
1910 largest =
1911 upper_usable[lane] +
1912 (info->training.
1913 lane_timings[0][channel][slot][rank][lane]
1914 & 0x3F) - 32;
1919 if (!first_run) {
1920 for (lane = 0; lane < 8; lane++)
1921 if (state[lane] == COMPLETE) {
1922 write_500(info, channel,
1923 timings[reg_178][channel][slot][rank]
1924 [lane].smallest,
1925 get_timing_register_addr(lane, 0,
1926 slot, rank),
1927 9, 1);
1928 write_500(info, channel,
1929 timings[reg_178][channel][slot][rank]
1930 [lane].smallest +
1931 info->training.
1932 lane_timings[1][channel][slot][rank]
1933 [lane]
1935 info->training.
1936 lane_timings[0][channel][slot][rank]
1937 [lane], get_timing_register_addr(lane,
1939 slot,
1940 rank),
1941 9, 1);
1942 num_successfully_checked[lane] = 0;
1943 } else
1944 num_successfully_checked[lane] = -1;
1946 do {
1947 u8 failmask = 0;
1948 for (i = 0; i < niter; i++) {
1949 if (failmask == 0xFF)
1950 break;
1951 failmask |=
1952 check_testing_type2(info, total_rank, 2, i,
1954 failmask |=
1955 check_testing_type2(info, total_rank, 3, i,
1958 mchbar_setbits32(0xfb0, 3 << 16);
1959 for (lane = 0; lane < 8; lane++)
1960 if (num_successfully_checked[lane] != 0xffff) {
1961 if ((1 << lane) & failmask) {
1962 if (timings[reg_178][channel]
1963 [slot][rank][lane].
1964 largest <=
1965 timings[reg_178][channel]
1966 [slot][rank][lane].smallest)
1967 num_successfully_checked
1968 [lane] = -1;
1969 else {
1970 num_successfully_checked
1971 [lane] = 0;
1972 timings[reg_178]
1973 [channel][slot]
1974 [rank][lane].
1975 smallest++;
1976 write_500(info, channel,
1977 timings
1978 [reg_178]
1979 [channel]
1980 [slot][rank]
1981 [lane].
1982 smallest,
1983 get_timing_register_addr
1984 (lane, 0,
1985 slot, rank),
1986 9, 1);
1987 write_500(info, channel,
1988 timings
1989 [reg_178]
1990 [channel]
1991 [slot][rank]
1992 [lane].
1993 smallest +
1994 info->
1995 training.
1996 lane_timings
1997 [1][channel]
1998 [slot][rank]
1999 [lane]
2001 info->
2002 training.
2003 lane_timings
2004 [0][channel]
2005 [slot][rank]
2006 [lane],
2007 get_timing_register_addr
2008 (lane, 1,
2009 slot, rank),
2010 9, 1);
2012 } else
2013 num_successfully_checked[lane]
2017 while (!check_bounded(num_successfully_checked, 2))
2020 for (lane = 0; lane < 8; lane++)
2021 if (state[lane] == COMPLETE) {
2022 write_500(info, channel,
2023 timings[reg_178][channel][slot][rank]
2024 [lane].largest,
2025 get_timing_register_addr(lane, 0,
2026 slot, rank),
2027 9, 1);
2028 write_500(info, channel,
2029 timings[reg_178][channel][slot][rank]
2030 [lane].largest +
2031 info->training.
2032 lane_timings[1][channel][slot][rank]
2033 [lane]
2035 info->training.
2036 lane_timings[0][channel][slot][rank]
2037 [lane], get_timing_register_addr(lane,
2039 slot,
2040 rank),
2041 9, 1);
2042 num_successfully_checked[lane] = 0;
2043 } else
2044 num_successfully_checked[lane] = -1;
2046 do {
2047 int failmask = 0;
2048 for (i = 0; i < niter; i++) {
2049 if (failmask == 0xFF)
2050 break;
2051 failmask |=
2052 check_testing_type2(info, total_rank, 2, i,
2054 failmask |=
2055 check_testing_type2(info, total_rank, 3, i,
2059 mchbar_setbits32(0xfb0, 3 << 16);
2060 for (lane = 0; lane < 8; lane++) {
2061 if (num_successfully_checked[lane] != 0xffff) {
2062 if ((1 << lane) & failmask) {
2063 if (timings[reg_178][channel]
2064 [slot][rank][lane].
2065 largest <=
2066 timings[reg_178][channel]
2067 [slot][rank][lane].
2068 smallest) {
2069 num_successfully_checked
2070 [lane] = -1;
2071 } else {
2072 num_successfully_checked
2073 [lane] = 0;
2074 timings[reg_178]
2075 [channel][slot]
2076 [rank][lane].
2077 largest--;
2078 write_500(info, channel,
2079 timings
2080 [reg_178]
2081 [channel]
2082 [slot][rank]
2083 [lane].
2084 largest,
2085 get_timing_register_addr
2086 (lane, 0,
2087 slot, rank),
2088 9, 1);
2089 write_500(info, channel,
2090 timings
2091 [reg_178]
2092 [channel]
2093 [slot][rank]
2094 [lane].
2095 largest +
2096 info->
2097 training.
2098 lane_timings
2099 [1][channel]
2100 [slot][rank]
2101 [lane]
2103 info->
2104 training.
2105 lane_timings
2106 [0][channel]
2107 [slot][rank]
2108 [lane],
2109 get_timing_register_addr
2110 (lane, 1,
2111 slot, rank),
2112 9, 1);
2114 } else
2115 num_successfully_checked[lane]
2120 while (!check_bounded(num_successfully_checked, 3))
2123 for (lane = 0; lane < 8; lane++) {
2124 write_500(info, channel,
2125 info->training.
2126 lane_timings[0][channel][slot][rank][lane],
2127 get_timing_register_addr(lane, 0, slot, rank),
2128 9, 1);
2129 write_500(info, channel,
2130 info->training.
2131 lane_timings[1][channel][slot][rank][lane],
2132 get_timing_register_addr(lane, 1, slot, rank),
2133 9, 1);
2134 if (timings[reg_178][channel][slot][rank][lane].
2135 largest <=
2136 timings[reg_178][channel][slot][rank][lane].
2137 smallest) {
2138 timings[reg_178][channel][slot][rank][lane].
2139 largest = 0;
2140 timings[reg_178][channel][slot][rank][lane].
2141 smallest = 0;
2147 static void set_10b(struct raminfo *info, u8 val)
2149 int channel;
2150 int slot, rank;
2151 int lane;
2153 if (read_1d0(0x10b, 6) == val)
2154 return;
2156 write_1d0(val, 0x10b, 6, 1);
2158 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) {
2159 u16 reg_500;
2160 reg_500 = read_500(info, channel,
2161 get_timing_register_addr(lane, 0, slot,
2162 rank), 9);
2163 if (val == 1) {
2164 if (lut16[info->clock_speed_index] <= reg_500)
2165 reg_500 -= lut16[info->clock_speed_index];
2166 else
2167 reg_500 = 0;
2168 } else {
2169 reg_500 += lut16[info->clock_speed_index];
2171 write_500(info, channel, reg_500,
2172 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2176 static void set_ecc(int onoff)
2178 int channel;
2179 for (channel = 0; channel < NUM_CHANNELS; channel++) {
2180 u8 t;
2181 t = mchbar_read8((channel << 10) + 0x5f8);
2182 if (onoff)
2183 t |= 1;
2184 else
2185 t &= ~1;
2186 mchbar_write8((channel << 10) + 0x5f8, t);
2190 static void set_178(u8 val)
2192 if (val >= 31)
2193 val = val - 31;
2194 else
2195 val = 63 - val;
2197 write_1d0(2 * val, 0x178, 7, 1);
2200 static void
2201 write_500_timings_type(struct raminfo *info, int channel, int slot, int rank,
2202 int type)
2204 int lane;
2206 for (lane = 0; lane < 8; lane++)
2207 write_500(info, channel,
2208 info->training.
2209 lane_timings[type][channel][slot][rank][lane],
2210 get_timing_register_addr(lane, type, slot, rank), 9,
2214 static void
2215 try_timing_offsets(struct raminfo *info, int channel,
2216 int slot, int rank, int totalrank)
2218 u16 count[8];
2219 enum state state[8];
2220 u8 lower_usable[8], upper_usable[8];
2221 int lane;
2222 int i;
2223 int flip = 1;
2224 int timing_offset;
2226 for (i = 0; i < 8; i++)
2227 state[i] = BEFORE_USABLE;
2229 memset(count, 0, sizeof(count));
2231 for (lane = 0; lane < 8; lane++)
2232 write_500(info, channel,
2233 info->training.
2234 lane_timings[2][channel][slot][rank][lane] + 32,
2235 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2237 for (timing_offset = 0; !validate_state(state) && timing_offset < 64;
2238 timing_offset++) {
2239 u8 failmask;
2240 write_1d0(timing_offset ^ 32, 0x1bb, 6, 1);
2241 failmask = 0;
2242 for (i = 0; i < 2 && failmask != 0xff; i++) {
2243 flip = !flip;
2244 write_testing(info, totalrank, flip);
2245 failmask |= check_testing(info, totalrank, flip);
2247 do_fsm(state, count, failmask, 10, 63, lower_usable,
2248 upper_usable, timing_offset);
2250 write_1d0(0, 0x1bb, 6, 1);
2251 dump_timings(info);
2252 if (!validate_state(state))
2253 die("Couldn't discover DRAM timings (1)\n");
2255 for (lane = 0; lane < 8; lane++) {
2256 u8 bias = 0;
2258 if (info->silicon_revision) {
2259 int usable_length;
2261 usable_length = upper_usable[lane] - lower_usable[lane];
2262 if (usable_length >= 20) {
2263 bias = usable_length / 2 - 10;
2264 if (bias >= 2)
2265 bias = 2;
2268 write_500(info, channel,
2269 info->training.
2270 lane_timings[2][channel][slot][rank][lane] +
2271 (upper_usable[lane] + lower_usable[lane]) / 2 - bias,
2272 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2273 info->training.timing2_bounds[channel][slot][rank][lane][0] =
2274 info->training.lane_timings[2][channel][slot][rank][lane] +
2275 lower_usable[lane];
2276 info->training.timing2_bounds[channel][slot][rank][lane][1] =
2277 info->training.lane_timings[2][channel][slot][rank][lane] +
2278 upper_usable[lane];
2279 info->training.timing2_offset[channel][slot][rank][lane] =
2280 info->training.lane_timings[2][channel][slot][rank][lane];
2284 static u8
2285 choose_training(struct raminfo *info, int channel, int slot, int rank,
2286 int lane, timing_bounds_t * timings, u8 center_178)
2288 u16 central_weight;
2289 u16 side_weight;
2290 unsigned int sum = 0, count = 0;
2291 u8 span;
2292 u8 lower_margin, upper_margin;
2293 u8 reg_178;
2294 u8 result;
2296 span = 12;
2297 central_weight = 20;
2298 side_weight = 20;
2299 if (info->silicon_revision == 1 && channel == 1) {
2300 central_weight = 5;
2301 side_weight = 20;
2302 if ((info->
2303 populated_ranks_mask[1] ^ (info->
2304 populated_ranks_mask[1] >> 2)) &
2306 span = 18;
2308 if ((info->populated_ranks_mask[0] & 5) == 5) {
2309 central_weight = 20;
2310 side_weight = 20;
2312 if (info->clock_speed_index >= 2
2313 && (info->populated_ranks_mask[0] & 5) == 5 && slot == 1) {
2314 if (info->silicon_revision == 1) {
2315 switch (channel) {
2316 case 0:
2317 if (lane == 1) {
2318 central_weight = 10;
2319 side_weight = 20;
2321 break;
2322 case 1:
2323 if (lane == 6) {
2324 side_weight = 5;
2325 central_weight = 20;
2327 break;
2330 if (info->silicon_revision == 0 && channel == 0 && lane == 0) {
2331 side_weight = 5;
2332 central_weight = 20;
2335 for (reg_178 = center_178 - span; reg_178 <= center_178 + span;
2336 reg_178 += span) {
2337 u8 smallest;
2338 u8 largest;
2339 largest = timings[reg_178][channel][slot][rank][lane].largest;
2340 smallest = timings[reg_178][channel][slot][rank][lane].smallest;
2341 if (largest - smallest + 1 >= 5) {
2342 unsigned int weight;
2343 if (reg_178 == center_178)
2344 weight = central_weight;
2345 else
2346 weight = side_weight;
2347 sum += weight * (largest + smallest);
2348 count += weight;
2351 dump_timings(info);
2352 if (count == 0)
2353 die("Couldn't discover DRAM timings (2)\n");
2354 result = sum / (2 * count);
2355 lower_margin =
2356 result - timings[center_178][channel][slot][rank][lane].smallest;
2357 upper_margin =
2358 timings[center_178][channel][slot][rank][lane].largest - result;
2359 if (upper_margin < 10 && lower_margin > 10)
2360 result -= MIN(lower_margin - 10, 10 - upper_margin);
2361 if (upper_margin > 10 && lower_margin < 10)
2362 result += MIN(upper_margin - 10, 10 - lower_margin);
2363 return result;
2366 #define STANDARD_MIN_MARGIN 5
2368 static u8 choose_reg178(struct raminfo *info, timing_bounds_t * timings)
2370 u16 margin[64];
2371 int lane, rank, slot, channel;
2372 u8 reg178;
2373 int count = 0, sum = 0;
2375 for (reg178 = reg178_min[info->clock_speed_index];
2376 reg178 < reg178_max[info->clock_speed_index];
2377 reg178 += reg178_step[info->clock_speed_index]) {
2378 margin[reg178] = -1;
2379 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2380 int curmargin =
2381 timings[reg178][channel][slot][rank][lane].largest -
2382 timings[reg178][channel][slot][rank][lane].
2383 smallest + 1;
2384 if (curmargin < margin[reg178])
2385 margin[reg178] = curmargin;
2387 if (margin[reg178] >= STANDARD_MIN_MARGIN) {
2388 u16 weight;
2389 weight = margin[reg178] - STANDARD_MIN_MARGIN;
2390 sum += weight * reg178;
2391 count += weight;
2394 dump_timings(info);
2395 if (count == 0)
2396 die("Couldn't discover DRAM timings (3)\n");
2398 u8 threshold;
2400 for (threshold = 30; threshold >= 5; threshold--) {
2401 int usable_length = 0;
2402 int smallest_fount = 0;
2403 for (reg178 = reg178_min[info->clock_speed_index];
2404 reg178 < reg178_max[info->clock_speed_index];
2405 reg178 += reg178_step[info->clock_speed_index])
2406 if (margin[reg178] >= threshold) {
2407 usable_length +=
2408 reg178_step[info->clock_speed_index];
2409 info->training.reg178_largest =
2410 reg178 -
2411 2 * reg178_step[info->clock_speed_index];
2413 if (!smallest_fount) {
2414 smallest_fount = 1;
2415 info->training.reg178_smallest =
2416 reg178 +
2417 reg178_step[info->
2418 clock_speed_index];
2421 if (usable_length >= 0x21)
2422 break;
2425 return sum / count;
2428 static int check_cached_sanity(struct raminfo *info)
2430 int lane;
2431 int slot, rank;
2432 int channel;
2434 if (!info->cached_training)
2435 return 0;
2437 for (channel = 0; channel < NUM_CHANNELS; channel++)
2438 for (slot = 0; slot < NUM_SLOTS; slot++)
2439 for (rank = 0; rank < NUM_RANKS; rank++)
2440 for (lane = 0; lane < 8 + info->use_ecc; lane++) {
2441 u16 cached_value, estimation_value;
2442 cached_value =
2443 info->cached_training->
2444 lane_timings[1][channel][slot][rank]
2445 [lane];
2446 if (cached_value >= 0x18
2447 && cached_value <= 0x1E7) {
2448 estimation_value =
2449 info->training.
2450 lane_timings[1][channel]
2451 [slot][rank][lane];
2452 if (estimation_value <
2453 cached_value - 24)
2454 return 0;
2455 if (estimation_value >
2456 cached_value + 24)
2457 return 0;
2460 return 1;
2463 static int try_cached_training(struct raminfo *info)
2465 u8 saved_243[2];
2466 u8 tm;
2468 int channel, slot, rank, lane;
2469 int flip = 1;
2470 int i, j;
2472 if (!check_cached_sanity(info))
2473 return 0;
2475 info->training.reg178_center = info->cached_training->reg178_center;
2476 info->training.reg178_smallest = info->cached_training->reg178_smallest;
2477 info->training.reg178_largest = info->cached_training->reg178_largest;
2478 memcpy(&info->training.timing_bounds,
2479 &info->cached_training->timing_bounds,
2480 sizeof(info->training.timing_bounds));
2481 memcpy(&info->training.timing_offset,
2482 &info->cached_training->timing_offset,
2483 sizeof(info->training.timing_offset));
2485 write_1d0(2, 0x142, 3, 1);
2486 saved_243[0] = mchbar_read8(0x243);
2487 saved_243[1] = mchbar_read8(0x643);
2488 mchbar_write8(0x243, saved_243[0] | 2);
2489 mchbar_write8(0x643, saved_243[1] | 2);
2490 set_ecc(0);
2491 pci_write_config16(NORTHBRIDGE, 0xc8, 3);
2492 if (read_1d0(0x10b, 6) & 1)
2493 set_10b(info, 0);
2494 for (tm = 0; tm < 2; tm++) {
2495 int totalrank;
2497 set_178(tm ? info->cached_training->reg178_largest : info->
2498 cached_training->reg178_smallest);
2500 totalrank = 0;
2501 /* Check timing ranges. With i == 0 we check smallest one and with
2502 i == 1 the largest bound. With j == 0 we check that on the bound
2503 it still works whereas with j == 1 we check that just outside of
2504 bound we fail.
2506 FOR_POPULATED_RANKS_BACKWARDS {
2507 for (i = 0; i < 2; i++) {
2508 for (lane = 0; lane < 8; lane++) {
2509 write_500(info, channel,
2510 info->cached_training->
2511 timing2_bounds[channel][slot]
2512 [rank][lane][i],
2513 get_timing_register_addr(lane,
2515 slot,
2516 rank),
2517 9, 1);
2519 if (!i)
2520 write_500(info, channel,
2521 info->
2522 cached_training->
2523 timing2_offset
2524 [channel][slot][rank]
2525 [lane],
2526 get_timing_register_addr
2527 (lane, 2, slot, rank),
2528 9, 1);
2529 write_500(info, channel,
2530 i ? info->cached_training->
2531 timing_bounds[tm][channel]
2532 [slot][rank][lane].
2533 largest : info->
2534 cached_training->
2535 timing_bounds[tm][channel]
2536 [slot][rank][lane].smallest,
2537 get_timing_register_addr(lane,
2539 slot,
2540 rank),
2541 9, 1);
2542 write_500(info, channel,
2543 info->cached_training->
2544 timing_offset[channel][slot]
2545 [rank][lane] +
2546 (i ? info->cached_training->
2547 timing_bounds[tm][channel]
2548 [slot][rank][lane].
2549 largest : info->
2550 cached_training->
2551 timing_bounds[tm][channel]
2552 [slot][rank][lane].
2553 smallest) - 64,
2554 get_timing_register_addr(lane,
2556 slot,
2557 rank),
2558 9, 1);
2560 for (j = 0; j < 2; j++) {
2561 u8 failmask;
2562 u8 expected_failmask;
2563 char reg1b3;
2565 reg1b3 = (j == 1) + 4;
2566 reg1b3 =
2567 j == i ? reg1b3 : (-reg1b3) & 0x3f;
2568 write_1d0(reg1b3, 0x1bb, 6, 1);
2569 write_1d0(reg1b3, 0x1b3, 6, 1);
2570 write_1d0(reg1b3, 0x1a3, 6, 1);
2572 flip = !flip;
2573 write_testing(info, totalrank, flip);
2574 failmask =
2575 check_testing(info, totalrank,
2576 flip);
2577 expected_failmask =
2578 j == 0 ? 0x00 : 0xff;
2579 if (failmask != expected_failmask)
2580 goto fail;
2583 totalrank++;
2587 set_178(info->cached_training->reg178_center);
2588 if (info->use_ecc)
2589 set_ecc(1);
2590 write_training_data(info);
2591 write_1d0(0, 322, 3, 1);
2592 info->training = *info->cached_training;
2594 write_1d0(0, 0x1bb, 6, 1);
2595 write_1d0(0, 0x1b3, 6, 1);
2596 write_1d0(0, 0x1a3, 6, 1);
2597 mchbar_write8(0x243, saved_243[0]);
2598 mchbar_write8(0x643, saved_243[1]);
2600 return 1;
2602 fail:
2603 FOR_POPULATED_RANKS {
2604 write_500_timings_type(info, channel, slot, rank, 1);
2605 write_500_timings_type(info, channel, slot, rank, 2);
2606 write_500_timings_type(info, channel, slot, rank, 3);
2609 write_1d0(0, 0x1bb, 6, 1);
2610 write_1d0(0, 0x1b3, 6, 1);
2611 write_1d0(0, 0x1a3, 6, 1);
2612 mchbar_write8(0x243, saved_243[0]);
2613 mchbar_write8(0x643, saved_243[1]);
2615 return 0;
2618 static void do_ram_training(struct raminfo *info)
2620 u8 saved_243[2];
2621 int totalrank = 0;
2622 u8 reg_178;
2623 int niter;
2625 timing_bounds_t *timings = timings_car;
2626 int lane, rank, slot, channel;
2627 u8 reg178_center;
2629 write_1d0(2, 0x142, 3, 1);
2630 saved_243[0] = mchbar_read8(0x243);
2631 saved_243[1] = mchbar_read8(0x643);
2632 mchbar_write8(0x243, saved_243[0] | 2);
2633 mchbar_write8(0x643, saved_243[1] | 2);
2634 switch (info->clock_speed_index) {
2635 case 0:
2636 niter = 5;
2637 break;
2638 case 1:
2639 niter = 10;
2640 break;
2641 default:
2642 niter = 19;
2643 break;
2645 set_ecc(0);
2647 FOR_POPULATED_RANKS_BACKWARDS {
2648 int i;
2650 write_500_timings_type(info, channel, slot, rank, 0);
2652 write_testing(info, totalrank, 0);
2653 for (i = 0; i < niter; i++) {
2654 write_testing_type2(info, totalrank, 2, i, 0);
2655 write_testing_type2(info, totalrank, 3, i, 1);
2657 pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
2658 totalrank++;
2661 if (reg178_min[info->clock_speed_index] <
2662 reg178_max[info->clock_speed_index])
2663 memset(timings[reg178_min[info->clock_speed_index]], 0,
2664 sizeof(timings[0]) *
2665 (reg178_max[info->clock_speed_index] -
2666 reg178_min[info->clock_speed_index]));
2667 for (reg_178 = reg178_min[info->clock_speed_index];
2668 reg_178 < reg178_max[info->clock_speed_index];
2669 reg_178 += reg178_step[info->clock_speed_index]) {
2670 totalrank = 0;
2671 set_178(reg_178);
2672 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--)
2673 for (slot = 0; slot < NUM_SLOTS; slot++)
2674 for (rank = 0; rank < NUM_RANKS; rank++) {
2675 memset(&timings[reg_178][channel][slot]
2676 [rank][0].smallest, 0, 16);
2677 if (info->
2678 populated_ranks[channel][slot]
2679 [rank]) {
2680 train_ram_at_178(info, channel,
2681 slot, rank,
2682 totalrank,
2683 reg_178, 1,
2684 niter,
2685 timings);
2686 totalrank++;
2691 reg178_center = choose_reg178(info, timings);
2693 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2694 info->training.timing_bounds[0][channel][slot][rank][lane].
2695 smallest =
2696 timings[info->training.
2697 reg178_smallest][channel][slot][rank][lane].
2698 smallest;
2699 info->training.timing_bounds[0][channel][slot][rank][lane].
2700 largest =
2701 timings[info->training.
2702 reg178_smallest][channel][slot][rank][lane].largest;
2703 info->training.timing_bounds[1][channel][slot][rank][lane].
2704 smallest =
2705 timings[info->training.
2706 reg178_largest][channel][slot][rank][lane].smallest;
2707 info->training.timing_bounds[1][channel][slot][rank][lane].
2708 largest =
2709 timings[info->training.
2710 reg178_largest][channel][slot][rank][lane].largest;
2711 info->training.timing_offset[channel][slot][rank][lane] =
2712 info->training.lane_timings[1][channel][slot][rank][lane]
2714 info->training.lane_timings[0][channel][slot][rank][lane] +
2718 if (info->silicon_revision == 1
2719 && (info->
2720 populated_ranks_mask[1] ^ (info->
2721 populated_ranks_mask[1] >> 2)) & 1) {
2722 int ranks_after_channel1;
2724 totalrank = 0;
2725 for (reg_178 = reg178_center - 18;
2726 reg_178 <= reg178_center + 18; reg_178 += 18) {
2727 totalrank = 0;
2728 set_178(reg_178);
2729 for (slot = 0; slot < NUM_SLOTS; slot++)
2730 for (rank = 0; rank < NUM_RANKS; rank++) {
2731 if (info->
2732 populated_ranks[1][slot][rank]) {
2733 train_ram_at_178(info, 1, slot,
2734 rank,
2735 totalrank,
2736 reg_178, 0,
2737 niter,
2738 timings);
2739 totalrank++;
2743 ranks_after_channel1 = totalrank;
2745 for (reg_178 = reg178_center - 12;
2746 reg_178 <= reg178_center + 12; reg_178 += 12) {
2747 totalrank = ranks_after_channel1;
2748 set_178(reg_178);
2749 for (slot = 0; slot < NUM_SLOTS; slot++)
2750 for (rank = 0; rank < NUM_RANKS; rank++)
2751 if (info->
2752 populated_ranks[0][slot][rank]) {
2753 train_ram_at_178(info, 0, slot,
2754 rank,
2755 totalrank,
2756 reg_178, 0,
2757 niter,
2758 timings);
2759 totalrank++;
2762 } else {
2763 for (reg_178 = reg178_center - 12;
2764 reg_178 <= reg178_center + 12; reg_178 += 12) {
2765 totalrank = 0;
2766 set_178(reg_178);
2767 FOR_POPULATED_RANKS_BACKWARDS {
2768 train_ram_at_178(info, channel, slot, rank,
2769 totalrank, reg_178, 0, niter,
2770 timings);
2771 totalrank++;
2776 set_178(reg178_center);
2777 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2778 u16 tm0;
2780 tm0 =
2781 choose_training(info, channel, slot, rank, lane, timings,
2782 reg178_center);
2783 write_500(info, channel, tm0,
2784 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2785 write_500(info, channel,
2786 tm0 +
2787 info->training.
2788 lane_timings[1][channel][slot][rank][lane] -
2789 info->training.
2790 lane_timings[0][channel][slot][rank][lane],
2791 get_timing_register_addr(lane, 1, slot, rank), 9, 1);
2794 totalrank = 0;
2795 FOR_POPULATED_RANKS_BACKWARDS {
2796 try_timing_offsets(info, channel, slot, rank, totalrank);
2797 totalrank++;
2799 mchbar_write8(0x243, saved_243[0]);
2800 mchbar_write8(0x643, saved_243[1]);
2801 write_1d0(0, 0x142, 3, 1);
2802 info->training.reg178_center = reg178_center;
2805 static void ram_training(struct raminfo *info)
2807 u16 saved_fc4;
2809 saved_fc4 = mchbar_read16(0xfc4);
2810 mchbar_write16(0xfc4, 0xffff);
2812 if (info->revision >= 8)
2813 read_4090(info);
2815 if (!try_cached_training(info))
2816 do_ram_training(info);
2817 if ((info->silicon_revision == 2 || info->silicon_revision == 3)
2818 && info->clock_speed_index < 2)
2819 set_10b(info, 1);
2820 mchbar_write16(0xfc4, saved_fc4);
2823 u16 get_max_timing(struct raminfo *info, int channel)
2825 int slot, rank, lane;
2826 u16 ret = 0;
2828 if ((mchbar_read8(0x2ca8) >> 2) < 1)
2829 return 384;
2831 if (info->revision < 8)
2832 return 256;
2834 for (slot = 0; slot < NUM_SLOTS; slot++)
2835 for (rank = 0; rank < NUM_RANKS; rank++)
2836 if (info->populated_ranks[channel][slot][rank])
2837 for (lane = 0; lane < 8 + info->use_ecc; lane++)
2838 ret = MAX(ret, read_500(info, channel,
2839 get_timing_register_addr
2840 (lane, 0, slot,
2841 rank), 9));
2842 return ret;
2845 static void dmi_setup(void)
2847 gav(dmibar_read8(0x254));
2848 dmibar_write8(0x254, 1 << 0);
2849 dmibar_write16(0x1b8, 0x18f2);
2850 mchbar_clrsetbits16(0x48, ~0, 1 << 1);
2852 dmibar_setbits32(0xd68, 1 << 27);
2854 outl((gav(inl(DEFAULT_GPIOBASE | 0x38)) & ~0x140000) | 0x400000,
2855 DEFAULT_GPIOBASE | 0x38);
2856 gav(inb(DEFAULT_GPIOBASE | 0xe)); // = 0xfdcaff6e
2859 void chipset_init(const int s3resume)
2861 u8 x2ca8;
2862 u16 ggc;
2863 u8 gfxsize;
2865 x2ca8 = mchbar_read8(0x2ca8);
2866 if ((x2ca8 & 1) || (x2ca8 == 8 && !s3resume)) {
2867 printk(BIOS_DEBUG, "soft reset detected, rebooting properly\n");
2868 mchbar_write8(0x2ca8, 0);
2869 system_reset();
2872 dmi_setup();
2874 mchbar_write16(0x1170, 0xa880);
2875 mchbar_write8(0x11c1, 1 << 0);
2876 mchbar_write16(0x1170, 0xb880);
2877 mchbar_clrsetbits8(0x1210, ~0, 0x84);
2879 gfxsize = get_uint_option("gfx_uma_size", 0); /* 0 for 32MB */
2881 ggc = 0xb00 | ((gfxsize + 5) << 4);
2883 pci_write_config16(NORTHBRIDGE, GGC, ggc | 2);
2885 u16 deven;
2886 deven = pci_read_config16(NORTHBRIDGE, DEVEN); // = 0x3
2888 if (deven & 8) {
2889 mchbar_write8(0x2c30, 1 << 5);
2890 pci_read_config8(NORTHBRIDGE, 0x8); // = 0x18
2891 mchbar_setbits16(0x2c30, 1 << 9);
2892 mchbar_write16(0x2c32, 0x434);
2893 mchbar_clrsetbits32(0x2c44, ~0, 0x1053687);
2894 pci_read_config8(GMA, MSAC); // = 0x2
2895 pci_write_config8(GMA, MSAC, 0x2);
2896 RCBA8(0x2318);
2897 RCBA8(0x2318) = 0x47;
2898 RCBA8(0x2320);
2899 RCBA8(0x2320) = 0xfc;
2902 mchbar_clrsetbits32(0x30, ~0, 0x40);
2904 pci_write_config16(NORTHBRIDGE, GGC, ggc);
2905 gav(RCBA32(0x3428));
2906 RCBA32(0x3428) = 0x1d;
2909 static u8 get_bits_420(const u32 reg32)
2911 u8 val = 0;
2912 val |= (reg32 >> 4) & (1 << 0);
2913 val |= (reg32 >> 2) & (1 << 1);
2914 val |= (reg32 >> 0) & (1 << 2);
2915 return val;
2918 void raminit(const int s3resume, const u8 *spd_addrmap)
2920 unsigned int channel, slot, lane, rank;
2921 struct raminfo info;
2922 u8 x2ca8;
2923 int cbmem_wasnot_inited;
2925 x2ca8 = mchbar_read8(0x2ca8);
2927 printk(RAM_DEBUG, "Scratchpad MCHBAR8(0x2ca8): 0x%04x\n", x2ca8);
2929 memset(&info, 0x5a, sizeof(info));
2931 info.last_500_command[0] = 0;
2932 info.last_500_command[1] = 0;
2934 info.board_lane_delay[0] = 0x14;
2935 info.board_lane_delay[1] = 0x07;
2936 info.board_lane_delay[2] = 0x07;
2937 info.board_lane_delay[3] = 0x08;
2938 info.board_lane_delay[4] = 0x56;
2939 info.board_lane_delay[5] = 0x04;
2940 info.board_lane_delay[6] = 0x04;
2941 info.board_lane_delay[7] = 0x05;
2942 info.board_lane_delay[8] = 0x10;
2944 info.training.reg_178 = 0;
2945 info.training.reg_10b = 0;
2947 /* Wait for some bit, maybe TXT clear. */
2948 while (!(read8((u8 *)0xfed40000) & (1 << 7)))
2951 /* Wait for ME to be ready */
2952 intel_early_me_init();
2953 info.memory_reserved_for_heci_mb = intel_early_me_uma_size();
2955 /* before SPD */
2956 timestamp_add_now(101);
2958 if (!s3resume || 1) { // possible error
2959 memset(&info.populated_ranks, 0, sizeof(info.populated_ranks));
2961 info.use_ecc = 1;
2962 for (channel = 0; channel < NUM_CHANNELS; channel++)
2963 for (slot = 0; slot < NUM_SLOTS; slot++) {
2964 int v;
2965 int try;
2966 int addr;
2967 const u8 useful_addresses[] = {
2968 DEVICE_TYPE,
2969 MODULE_TYPE,
2970 DENSITY,
2971 RANKS_AND_DQ,
2972 MEMORY_BUS_WIDTH,
2973 TIMEBASE_DIVIDEND,
2974 TIMEBASE_DIVISOR,
2975 CYCLETIME,
2976 CAS_LATENCIES_LSB,
2977 CAS_LATENCIES_MSB,
2978 CAS_LATENCY_TIME,
2979 0x11, 0x12, 0x13, 0x14, 0x15,
2980 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b,
2981 0x1c, 0x1d,
2982 THERMAL_AND_REFRESH,
2983 0x20,
2984 REFERENCE_RAW_CARD_USED,
2985 RANK1_ADDRESS_MAPPING,
2986 0x75, 0x76, 0x77, 0x78,
2987 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
2988 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84,
2989 0x85, 0x86, 0x87, 0x88,
2990 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
2991 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
2992 0x95
2994 if (!spd_addrmap[2 * channel + slot])
2995 continue;
2996 for (try = 0; try < 5; try++) {
2997 v = smbus_read_byte(spd_addrmap[2 * channel + slot],
2998 DEVICE_TYPE);
2999 if (v >= 0)
3000 break;
3002 if (v < 0)
3003 continue;
3004 for (addr = 0;
3005 addr <
3006 ARRAY_SIZE(useful_addresses); addr++)
3007 gav(info.
3008 spd[channel][0][useful_addresses
3009 [addr]] =
3010 smbus_read_byte(spd_addrmap[2 * channel + slot],
3011 useful_addresses
3012 [addr]));
3013 if (info.spd[channel][0][DEVICE_TYPE] != 11)
3014 die("Only DDR3 is supported");
3016 v = info.spd[channel][0][RANKS_AND_DQ];
3017 info.populated_ranks[channel][0][0] = 1;
3018 info.populated_ranks[channel][0][1] =
3019 ((v >> 3) & 7);
3020 if (((v >> 3) & 7) > 1)
3021 die("At most 2 ranks are supported");
3022 if ((v & 7) == 0 || (v & 7) > 2)
3023 die("Only x8 and x16 modules are supported");
3024 if ((info.
3025 spd[channel][slot][MODULE_TYPE] & 0xF) != 2
3026 && (info.
3027 spd[channel][slot][MODULE_TYPE] & 0xF)
3028 != 3)
3029 die("Registered memory is not supported");
3030 info.is_x16_module[channel][0] = (v & 7) - 1;
3031 info.density[channel][slot] =
3032 info.spd[channel][slot][DENSITY] & 0xF;
3033 if (!
3034 (info.
3035 spd[channel][slot][MEMORY_BUS_WIDTH] &
3036 0x18))
3037 info.use_ecc = 0;
3040 gav(0x55);
3042 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3043 int v = 0;
3044 for (slot = 0; slot < NUM_SLOTS; slot++)
3045 for (rank = 0; rank < NUM_RANKS; rank++)
3046 v |= info.
3047 populated_ranks[channel][slot][rank]
3048 << (2 * slot + rank);
3049 info.populated_ranks_mask[channel] = v;
3052 gav(0x55);
3054 gav(pci_read_config32(NORTHBRIDGE, CAPID0 + 4));
3057 /* after SPD */
3058 timestamp_add_now(102);
3060 mchbar_clrbits8(0x2ca8, 1 << 1 | 1 << 0);
3062 collect_system_info(&info);
3063 calculate_timings(&info);
3065 if (!s3resume) {
3066 u8 reg8 = pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2);
3067 if (x2ca8 == 0 && (reg8 & 0x80)) {
3068 /* Don't enable S4-assertion stretch. Makes trouble on roda/rk9.
3069 reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4);
3070 pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08);
3073 /* Clear bit7. */
3075 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3076 (reg8 & ~(1 << 7)));
3078 printk(BIOS_INFO,
3079 "Interrupted RAM init, reset required.\n");
3080 system_reset();
3084 if (!s3resume && x2ca8 == 0)
3085 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3086 pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) | 0x80);
3088 compute_derived_timings(&info);
3090 early_quickpath_init(&info, x2ca8);
3092 info.cached_training = get_cached_training();
3094 if (x2ca8 == 0)
3095 late_quickpath_init(&info, s3resume);
3097 mchbar_setbits32(0x2c80, 1 << 24);
3098 mchbar_write32(0x1804, mchbar_read32(0x1c04) & ~(1 << 27));
3100 mchbar_read8(0x2ca8); // !!!!
3102 if (x2ca8 == 0) {
3103 mchbar_clrbits8(0x2ca8, 3);
3104 mchbar_write8(0x2ca8, mchbar_read8(0x2ca8) + 4); // "+" or "|"?
3105 /* This issues a CPU reset without resetting the platform */
3106 printk(BIOS_DEBUG, "Issuing a CPU reset\n");
3107 /* Write back the S3 state to PM1_CNT to let the reset CPU
3108 know it also needs to take the s3 path. */
3109 if (s3resume)
3110 write_pmbase32(PM1_CNT, read_pmbase32(PM1_CNT)
3111 | (SLP_TYP_S3 << 10));
3112 mchbar_setbits32(0x1af0, 1 << 4);
3113 halt();
3116 mchbar_clrbits8(0x2ca8, 0); // !!!!
3118 mchbar_clrbits32(0x2c80, 1 << 24);
3120 pci_write_config32(QPI_NON_CORE, MAX_RTIDS, 0x20220);
3123 u8 x2c20 = (mchbar_read16(0x2c20) >> 8) & 3;
3124 u16 x2c10 = mchbar_read16(0x2c10);
3125 u16 value = mchbar_read16(0x2c00);
3126 if (x2c20 == 0 && (x2c10 & 0x300) == 0)
3127 value |= (1 << 7);
3128 else
3129 value &= ~(1 << 0);
3131 mchbar_write16(0x2c00, value);
3134 udelay(1000); // !!!!
3136 write_1d0(0, 0x33d, 0, 0);
3137 write_500(&info, 0, 0, 0xb61, 0, 0);
3138 write_500(&info, 1, 0, 0xb61, 0, 0);
3139 mchbar_write32(0x1a30, 0);
3140 mchbar_write32(0x1a34, 0);
3141 mchbar_write16(0x614, 0xb5b | (info.populated_ranks[1][0][0] * 0x404) |
3142 (info.populated_ranks[0][0][0] * 0xa0));
3143 mchbar_write16(0x616, 0x26a);
3144 mchbar_write32(0x134, 0x856000);
3145 mchbar_write32(0x160, 0x5ffffff);
3146 mchbar_clrsetbits32(0x114, ~0, 0xc2024440); // !!!!
3147 mchbar_clrsetbits32(0x118, ~0, 0x4); // !!!!
3148 for (channel = 0; channel < NUM_CHANNELS; channel++)
3149 mchbar_write32(0x260 + (channel << 10), 0x30809ff |
3150 (info.populated_ranks_mask[channel] & 3) << 20);
3151 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3152 mchbar_write16(0x31c + (channel << 10), 0x101);
3153 mchbar_write16(0x360 + (channel << 10), 0x909);
3154 mchbar_write16(0x3a4 + (channel << 10), 0x101);
3155 mchbar_write16(0x3e8 + (channel << 10), 0x101);
3156 mchbar_write32(0x320 + (channel << 10), 0x29002900);
3157 mchbar_write32(0x324 + (channel << 10), 0);
3158 mchbar_write32(0x368 + (channel << 10), 0x32003200);
3159 mchbar_write16(0x352 + (channel << 10), 0x505);
3160 mchbar_write16(0x354 + (channel << 10), 0x3c3c);
3161 mchbar_write16(0x356 + (channel << 10), 0x1040);
3162 mchbar_write16(0x39a + (channel << 10), 0x73e4);
3163 mchbar_write16(0x3de + (channel << 10), 0x77ed);
3164 mchbar_write16(0x422 + (channel << 10), 0x1040);
3167 write_1d0(0x4, 0x151, 4, 1);
3168 write_1d0(0, 0x142, 3, 1);
3169 rdmsr(0x1ac); // !!!!
3170 write_500(&info, 1, 1, 0x6b3, 4, 1);
3171 write_500(&info, 1, 1, 0x6cf, 4, 1);
3173 rmw_1d0(0x21c, 0x38, 0, 6);
3175 write_1d0(((!info.populated_ranks[1][0][0]) << 1) | ((!info.
3176 populated_ranks[0]
3177 [0][0]) << 0),
3178 0x1d1, 3, 1);
3179 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3180 mchbar_write16(0x38e + (channel << 10), 0x5f5f);
3181 mchbar_write16(0x3d2 + (channel << 10), 0x5f5f);
3184 set_334(0);
3186 program_base_timings(&info);
3188 mchbar_setbits8(0x5ff, 1 << 7);
3190 write_1d0(0x2, 0x1d5, 2, 1);
3191 write_1d0(0x20, 0x166, 7, 1);
3192 write_1d0(0x0, 0xeb, 3, 1);
3193 write_1d0(0x0, 0xf3, 6, 1);
3195 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3196 u8 a = 0;
3197 if (info.populated_ranks[channel][0][1] && info.clock_speed_index > 1)
3198 a = 3;
3199 if (info.silicon_revision == 0 || info.silicon_revision == 1)
3200 a = 3;
3202 for (lane = 0; lane < 9; lane++) {
3203 const u16 addr = 0x125 + get_lane_offset(0, 0, lane);
3204 rmw_500(&info, channel, addr, 6, 0xf, a);
3208 if (s3resume) {
3209 if (!info.cached_training) {
3210 u32 reg32;
3211 printk(BIOS_ERR,
3212 "Couldn't find training data. Rebooting\n");
3213 reg32 = inl(DEFAULT_PMBASE + 0x04);
3214 outl(reg32 & ~(7 << 10), DEFAULT_PMBASE + 0x04);
3215 full_reset();
3217 int tm;
3218 info.training = *info.cached_training;
3219 for (tm = 0; tm < 4; tm++)
3220 for (channel = 0; channel < NUM_CHANNELS; channel++)
3221 for (slot = 0; slot < NUM_SLOTS; slot++)
3222 for (rank = 0; rank < NUM_RANKS; rank++)
3223 for (lane = 0; lane < 9; lane++)
3224 write_500(&info,
3225 channel,
3226 info.training.
3227 lane_timings
3228 [tm][channel]
3229 [slot][rank]
3230 [lane],
3231 get_timing_register_addr
3232 (lane, tm,
3233 slot, rank),
3234 9, 0);
3235 write_1d0(info.cached_training->reg_178, 0x178, 7, 1);
3236 write_1d0(info.cached_training->reg_10b, 0x10b, 6, 1);
3239 mchbar_clrsetbits32(0x1f4, ~0, 1 << 17); // !!!!
3240 mchbar_write32(0x1f0, 0x1d000200);
3241 mchbar_setbits8(0x1f0, 1 << 0);
3242 while (mchbar_read8(0x1f0) & 1)
3245 program_board_delay(&info);
3247 mchbar_write8(0x5ff, 0);
3248 mchbar_write8(0x5ff, 1 << 7);
3249 mchbar_write8(0x5f4, 1 << 0);
3251 mchbar_clrbits32(0x130, 1 << 1); // | 2 when ?
3252 while (mchbar_read32(0x130) & 1)
3255 rmw_1d0(0x14b, 0x47, 0x30, 7);
3256 rmw_1d0(0xd6, 0x38, 7, 6);
3257 rmw_1d0(0x328, 0x38, 7, 6);
3259 for (channel = 0; channel < NUM_CHANNELS; channel++)
3260 set_4cf(&info, channel, 1, 0);
3262 rmw_1d0(0x116, 0xe, 0, 4);
3263 rmw_1d0(0xae, 0x3e, 0, 6);
3264 rmw_1d0(0x300, 0x3e, 0, 6);
3265 mchbar_clrbits16(0x356, 1 << 15);
3266 mchbar_clrbits16(0x756, 1 << 15);
3267 mchbar_clrbits32(0x140, 7 << 24);
3268 mchbar_clrbits32(0x138, 7 << 24);
3269 mchbar_write32(0x130, 0x31111301);
3270 /* Wait until REG130b0 is 1. */
3271 while (mchbar_read32(0x130) & 1)
3274 u8 value_a1;
3276 const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6)); // = 0x1cf4040 // !!!!
3277 const u8 val_2f3 = get_bits_420(read_1d0(0x2f3, 6)); // = 0x10a4040 // !!!!
3278 value_a1 = val_xa1;
3279 rmw_1d0(0x320, 0x38, val_2f3, 6);
3280 rmw_1d0(0x14b, 0x78, val_xa1, 7);
3281 rmw_1d0(0xce, 0x38, val_xa1, 6);
3284 for (channel = 0; channel < NUM_CHANNELS; channel++)
3285 set_4cf(&info, channel, 1, 1);
3287 rmw_1d0(0x116, 0xe, 1, 4); // = 0x4040432 // !!!!
3289 if ((mchbar_read32(0x144) & 0x1f) < 0x13)
3290 value_a1 += 2;
3291 else
3292 value_a1 += 1;
3294 if (value_a1 > 7)
3295 value_a1 = 7;
3297 write_1d0(2, 0xae, 6, 1);
3298 write_1d0(2, 0x300, 6, 1);
3299 write_1d0(value_a1, 0x121, 3, 1);
3300 rmw_1d0(0xd6, 0x38, 4, 6);
3301 rmw_1d0(0x328, 0x38, 4, 6);
3304 for (channel = 0; channel < NUM_CHANNELS; channel++)
3305 set_4cf(&info, channel, 2, 0);
3307 mchbar_write32(0x130, 0x11111301 | info.populated_ranks[1][0][0] << 30 |
3308 info.populated_ranks[0][0][0] << 29);
3309 while (mchbar_read8(0x130) & 1)
3313 const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6));
3314 read_1d0(0x2f3, 6); // = 0x10a4054 // !!!!
3315 rmw_1d0(0x21c, 0x38, 0, 6);
3316 rmw_1d0(0x14b, 0x78, val_xa1, 7);
3319 for (channel = 0; channel < NUM_CHANNELS; channel++)
3320 set_4cf(&info, channel, 2, 1);
3322 set_334(1);
3324 mchbar_write8(0x1e8, 1 << 2);
3326 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3327 write_500(&info, channel,
3328 0x3 & ~(info.populated_ranks_mask[channel]), 0x6b7, 2,
3330 write_500(&info, channel, 0x3, 0x69b, 2, 1);
3332 mchbar_clrsetbits32(0x2d0, ~0xff0c01ff, 0x200000);
3333 mchbar_write16(0x6c0, 0x14a0);
3334 mchbar_clrsetbits32(0x6d0, ~0xff0000ff, 0x8000);
3335 mchbar_write16(0x232, 1 << 3);
3336 /* 0x40004 or 0 depending on ? */
3337 mchbar_clrsetbits32(0x234, 0x40004, 0x40004);
3338 mchbar_clrsetbits32(0x34, 0x7, 5);
3339 mchbar_write32(0x128, 0x2150d05);
3340 mchbar_write8(0x12c, 0x1f);
3341 mchbar_write8(0x12d, 0x56);
3342 mchbar_write8(0x12e, 0x31);
3343 mchbar_write8(0x12f, 0);
3344 mchbar_write8(0x271, 1 << 1);
3345 mchbar_write8(0x671, 1 << 1);
3346 mchbar_write8(0x1e8, 1 << 2);
3347 for (channel = 0; channel < NUM_CHANNELS; channel++)
3348 mchbar_write32(0x294 + (channel << 10),
3349 (info.populated_ranks_mask[channel] & 3) << 16);
3350 mchbar_clrsetbits32(0x134, ~0xfc01ffff, 0x10000);
3351 mchbar_clrsetbits32(0x134, ~0xfc85ffff, 0x850000);
3352 for (channel = 0; channel < NUM_CHANNELS; channel++)
3353 mchbar_clrsetbits32(0x260 + (channel << 10), 0xf << 20, 1 << 27 |
3354 (info.populated_ranks_mask[channel] & 3) << 20);
3356 if (!s3resume)
3357 jedec_init(&info);
3359 int totalrank = 0;
3360 for (channel = 0; channel < NUM_CHANNELS; channel++)
3361 for (slot = 0; slot < NUM_SLOTS; slot++)
3362 for (rank = 0; rank < NUM_RANKS; rank++)
3363 if (info.populated_ranks[channel][slot][rank]) {
3364 jedec_read(&info, channel, slot, rank,
3365 totalrank, 0xa, 0x400);
3366 totalrank++;
3369 mchbar_write8(0x12c, 0x9f);
3371 mchbar_clrsetbits8(0x271, 0x3e, 0x0e);
3372 mchbar_clrsetbits8(0x671, 0x3e, 0x0e);
3374 if (!s3resume) {
3375 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3376 mchbar_write32(0x294 + (channel << 10),
3377 (info.populated_ranks_mask[channel] & 3) << 16);
3378 mchbar_write16(0x298 + (channel << 10),
3379 info.populated_ranks[channel][0][0] |
3380 info.populated_ranks[channel][0][1] << 5);
3381 mchbar_write32(0x29c + (channel << 10), 0x77a);
3383 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3386 u8 a, b;
3387 a = mchbar_read8(0x243);
3388 b = mchbar_read8(0x643);
3389 mchbar_write8(0x243, a | 2);
3390 mchbar_write8(0x643, b | 2);
3393 write_1d0(7, 0x19b, 3, 1);
3394 write_1d0(7, 0x1c0, 3, 1);
3395 write_1d0(4, 0x1c6, 4, 1);
3396 write_1d0(4, 0x1cc, 4, 1);
3397 rmw_1d0(0x151, 0xf, 0x4, 4);
3398 mchbar_write32(0x584, 0xfffff);
3399 mchbar_write32(0x984, 0xfffff);
3401 for (channel = 0; channel < NUM_CHANNELS; channel++)
3402 for (slot = 0; slot < NUM_SLOTS; slot++)
3403 for (rank = 0; rank < NUM_RANKS; rank++)
3404 if (info.
3405 populated_ranks[channel][slot]
3406 [rank])
3407 config_rank(&info, s3resume,
3408 channel, slot,
3409 rank);
3411 mchbar_write8(0x243, 1);
3412 mchbar_write8(0x643, 1);
3415 /* was == 1 but is common */
3416 pci_write_config16(NORTHBRIDGE, 0xc8, 3);
3417 write_26c(0, 0x820);
3418 write_26c(1, 0x820);
3419 mchbar_setbits32(0x130, 1 << 1);
3420 /* end */
3422 if (s3resume) {
3423 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3424 mchbar_write32(0x294 + (channel << 10),
3425 (info.populated_ranks_mask[channel] & 3) << 16);
3426 mchbar_write16(0x298 + (channel << 10),
3427 info.populated_ranks[channel][0][0] |
3428 info.populated_ranks[channel][0][1] << 5);
3429 mchbar_write32(0x29c + (channel << 10), 0x77a);
3431 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3434 mchbar_clrbits32(0xfa4, 1 << 24 | 1 << 1);
3435 mchbar_write32(0xfb0, 0x2000e019);
3437 /* Before training. */
3438 timestamp_add_now(103);
3440 if (!s3resume)
3441 ram_training(&info);
3443 /* After training. */
3444 timestamp_add_now(104);
3446 dump_timings(&info);
3448 program_modules_memory_map(&info, 0);
3449 program_total_memory_map(&info);
3451 if (info.non_interleaved_part_mb != 0 && info.interleaved_part_mb != 0)
3452 mchbar_write8(0x111, 0 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3453 else if (have_match_ranks(&info, 0, 4) && have_match_ranks(&info, 1, 4))
3454 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 1 << 7);
3455 else if (have_match_ranks(&info, 0, 2) && have_match_ranks(&info, 1, 2))
3456 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 0 << 7);
3457 else
3458 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3460 mchbar_clrbits32(0xfac, 1 << 31);
3461 mchbar_write32(0xfb4, 0x4800);
3462 mchbar_write32(0xfb8, (info.revision < 8) ? 0x20 : 0x0);
3463 mchbar_write32(0xe94, 0x7ffff);
3464 mchbar_write32(0xfc0, 0x80002040);
3465 mchbar_write32(0xfc4, 0x701246);
3466 mchbar_clrbits8(0xfc8, 0x70);
3467 mchbar_setbits32(0xe5c, 1 << 24);
3468 mchbar_clrsetbits32(0x1a70, 3 << 20, 2 << 20);
3469 mchbar_write32(0x50, 0x700b0);
3470 mchbar_write32(0x3c, 0x10);
3471 mchbar_clrsetbits8(0x1aa8, 0x3f, 0xa);
3472 mchbar_setbits8(0xff4, 1 << 1);
3473 mchbar_clrsetbits32(0xff8, 0xe008, 0x1020);
3475 mchbar_write32(0xd00, IOMMU_BASE2 | 1);
3476 mchbar_write32(0xd40, IOMMU_BASE1 | 1);
3477 mchbar_write32(0xdc0, IOMMU_BASE4 | 1);
3479 write32p(IOMMU_BASE1 | 0xffc, 0x80000000);
3480 write32p(IOMMU_BASE2 | 0xffc, 0xc0000000);
3481 write32p(IOMMU_BASE4 | 0xffc, 0x80000000);
3484 u32 eax;
3486 eax = info.fsb_frequency / 9;
3487 mchbar_clrsetbits32(0xfcc, 0x3ffff,
3488 (eax * 0x280) | (eax * 0x5000) | eax | 0x40000);
3489 mchbar_write32(0x20, 0x33001);
3492 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3493 mchbar_clrbits32(0x220 + (channel << 10), 0x7770);
3494 if (info.max_slots_used_in_channel == 1)
3495 mchbar_setbits16(0x237 + (channel << 10), 0x0201);
3496 else
3497 mchbar_clrbits16(0x237 + (channel << 10), 0x0201);
3499 mchbar_setbits8(0x241 + (channel << 10), 1 << 0);
3501 if (info.clock_speed_index <= 1 && (info.silicon_revision == 2
3502 || info.silicon_revision == 3))
3503 mchbar_setbits32(0x248 + (channel << 10), 0x00102000);
3504 else
3505 mchbar_clrbits32(0x248 + (channel << 10), 0x00102000);
3508 mchbar_setbits32(0x115, 1 << 24);
3511 u8 al;
3512 al = 0xd;
3513 if (!(info.silicon_revision == 0 || info.silicon_revision == 1))
3514 al += 2;
3515 al |= ((1 << (info.max_slots_used_in_channel - 1)) - 1) << 4;
3516 mchbar_write32(0x210, al << 16 | 0x20);
3519 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3520 mchbar_write32(0x288 + (channel << 10), 0x70605040);
3521 mchbar_write32(0x28c + (channel << 10), 0xfffec080);
3522 mchbar_write32(0x290 + (channel << 10), 0x282091c |
3523 (info.max_slots_used_in_channel - 1) << 0x16);
3525 u32 reg1c;
3526 pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3527 reg1c = epbar_read32(EPVC1RCAP); // = 0x8001 // OK
3528 pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3529 epbar_write32(EPVC1RCAP, reg1c); // OK
3530 mchbar_read8(0xe08); // = 0x0
3531 pci_read_config32(NORTHBRIDGE, 0xe4); // = 0x316126
3532 mchbar_setbits8(0x1210, 1 << 1);
3533 mchbar_write32(0x1200, 0x8800440);
3534 mchbar_write32(0x1204, 0x53ff0453);
3535 mchbar_write32(0x1208, 0x19002043);
3536 mchbar_write16(0x1214, 0x320);
3538 if (info.revision == 0x10 || info.revision == 0x11) {
3539 mchbar_write16(0x1214, 0x220);
3540 mchbar_setbits8(0x1210, 1 << 6);
3543 mchbar_setbits8(0x1214, 1 << 2);
3544 mchbar_write8(0x120c, 1);
3545 mchbar_write8(0x1218, 3);
3546 mchbar_write8(0x121a, 3);
3547 mchbar_write8(0x121c, 3);
3548 mchbar_write16(0xc14, 0);
3549 mchbar_write16(0xc20, 0);
3550 mchbar_write32(0x1c, 0);
3552 /* revision dependent here. */
3554 mchbar_setbits16(0x1230, 0x1f07);
3556 if (info.uma_enabled)
3557 mchbar_setbits32(0x11f4, 1 << 28);
3559 mchbar_setbits16(0x1230, 1 << 15);
3560 mchbar_setbits8(0x1214, 1 << 0);
3562 u8 bl, ebpb;
3563 u16 reg_1020;
3565 reg_1020 = mchbar_read32(0x1020); // = 0x6c733c // OK
3566 mchbar_write8(0x1070, 1);
3568 mchbar_write32(0x1000, 0x100);
3569 mchbar_write8(0x1007, 0);
3571 if (reg_1020 != 0) {
3572 mchbar_write16(0x1018, 0);
3573 bl = reg_1020 >> 8;
3574 ebpb = reg_1020 & 0xff;
3575 } else {
3576 ebpb = 0;
3577 bl = 8;
3580 rdmsr(0x1a2);
3582 mchbar_write32(0x1014, 0xffffffff);
3584 mchbar_write32(0x1010, ((((ebpb + 0x7d) << 7) / bl) & 0xff) * !!reg_1020);
3586 mchbar_write8(0x101c, 0xb8);
3588 mchbar_clrsetbits8(0x123e, 0xf0, 0x60);
3589 if (reg_1020 != 0) {
3590 mchbar_clrsetbits32(0x123c, 0xf << 20, 0x6 << 20);
3591 mchbar_write8(0x101c, 0xb8);
3594 const u64 heci_uma_addr =
3595 ((u64)
3596 ((((u64)pci_read_config16(NORTHBRIDGE, TOM)) << 6) -
3597 info.memory_reserved_for_heci_mb)) << 20;
3599 setup_heci_uma(heci_uma_addr, info.memory_reserved_for_heci_mb);
3601 if (info.uma_enabled) {
3602 u16 ax;
3603 mchbar_setbits32(0x11b0, 1 << 14);
3604 mchbar_setbits32(0x11b4, 1 << 14);
3605 mchbar_setbits16(0x1190, 1 << 14);
3607 ax = mchbar_read16(0x1190) & 0xf00; // = 0x480a // OK
3608 mchbar_write16(0x1170, ax | (mchbar_read16(0x1170) & 0x107f) | 0x4080);
3609 mchbar_setbits16(0x1170, 1 << 12);
3611 udelay(1000);
3613 u16 ecx;
3614 for (ecx = 0xffff; ecx && (mchbar_read16(0x1170) & (1 << 12)); ecx--)
3616 mchbar_clrbits16(0x1190, 1 << 14);
3619 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3620 pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) & ~0x80);
3621 udelay(10000);
3622 mchbar_write16(0x2ca8, 1 << 3);
3624 udelay(1000);
3625 dump_timings(&info);
3626 cbmem_wasnot_inited = cbmem_recovery(s3resume);
3628 if (!s3resume)
3629 save_timings(&info);
3630 if (s3resume && cbmem_wasnot_inited) {
3631 printk(BIOS_ERR, "Failed S3 resume.\n");
3632 ram_check_nodie(1 * MiB);
3634 /* Failed S3 resume, reset to come up cleanly */
3635 full_reset();