nb/intel/ironlake: Fix some quickpath init magic
[coreboot.git] / src / northbridge / intel / ironlake / raminit.c
blobfe67f0d0a3e93c6ca340da35a57a5e8a727a8dc4
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include <console/console.h>
4 #include <commonlib/helpers.h>
5 #include <string.h>
6 #include <arch/io.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <device/smbus_host.h>
10 #include <cpu/x86/msr.h>
11 #include <cpu/x86/cache.h>
12 #include <cbmem.h>
13 #include <cf9_reset.h>
14 #include <ip_checksum.h>
15 #include <option.h>
16 #include <device/pci_def.h>
17 #include <device/device.h>
18 #include <halt.h>
19 #include <spd.h>
20 #include <timestamp.h>
21 #include <cpu/x86/mtrr.h>
22 #include <cpu/intel/speedstep.h>
23 #include <cpu/intel/turbo.h>
24 #include <mrc_cache.h>
25 #include <southbridge/intel/ibexpeak/me.h>
26 #include <southbridge/intel/common/pmbase.h>
27 #include <delay.h>
28 #include <types.h>
30 #include "chip.h"
31 #include "ironlake.h"
32 #include "raminit.h"
33 #include "raminit_tables.h"
35 #define NORTHBRIDGE PCI_DEV(0, 0, 0)
36 #define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
37 #define GMA PCI_DEV (0, 0x2, 0x0)
39 #define FOR_ALL_RANKS \
40 for (channel = 0; channel < NUM_CHANNELS; channel++) \
41 for (slot = 0; slot < NUM_SLOTS; slot++) \
42 for (rank = 0; rank < NUM_RANKS; rank++)
44 #define FOR_POPULATED_RANKS \
45 for (channel = 0; channel < NUM_CHANNELS; channel++) \
46 for (slot = 0; slot < NUM_SLOTS; slot++) \
47 for (rank = 0; rank < NUM_RANKS; rank++) \
48 if (info->populated_ranks[channel][slot][rank])
50 #define FOR_POPULATED_RANKS_BACKWARDS \
51 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
52 for (slot = 0; slot < NUM_SLOTS; slot++) \
53 for (rank = 0; rank < NUM_RANKS; rank++) \
54 if (info->populated_ranks[channel][slot][rank])
56 #include <lib.h> /* Prototypes */
58 typedef struct _u128 {
59 u64 lo;
60 u64 hi;
61 } u128;
63 static void read128(u32 addr, u64 * out)
65 u128 ret;
66 u128 stor;
67 asm volatile ("movdqu %%xmm0, %0\n"
68 "movdqa (%2), %%xmm0\n"
69 "movdqu %%xmm0, %1\n"
70 "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
71 out[0] = ret.lo;
72 out[1] = ret.hi;
76 * Ironlake memory I/O timings are located in scan chains, accessible
77 * through MCHBAR register groups. Each channel has a scan chain, and
78 * there's a global scan chain too. Each chain is broken into smaller
79 * sections of N bits, where N <= 32. Each section allows reading and
80 * writing a certain parameter. Each section contains N - 2 data bits
81 * and two additional bits: a Mask bit, and a Halt bit.
84 /* OK */
85 static void write_1d0(u32 val, u16 addr, int bits, int flag)
87 mchbar_write32(0x1d0, 0);
88 while (mchbar_read32(0x1d0) & (1 << 23))
90 mchbar_write32(0x1d4, (val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
91 mchbar_write32(0x1d0, 1 << 30 | addr);
92 while (mchbar_read32(0x1d0) & (1 << 23))
96 /* OK */
97 static u16 read_1d0(u16 addr, int split)
99 u32 val;
100 mchbar_write32(0x1d0, 0);
101 while (mchbar_read32(0x1d0) & (1 << 23))
103 mchbar_write32(0x1d0, 1 << 31 | (((mchbar_read8(0x246) >> 2) & 3) + 0x361 - addr));
104 while (mchbar_read32(0x1d0) & (1 << 23))
106 val = mchbar_read32(0x1d8);
107 write_1d0(0, 0x33d, 0, 0);
108 write_1d0(0, 0x33d, 0, 0);
109 val &= ((1 << split) - 1);
110 // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
111 return val;
114 static void sfence(void)
116 asm volatile ("sfence");
119 static inline u16 get_lane_offset(int slot, int rank, int lane)
121 return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
122 0x452 * (lane == 8);
125 static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
127 const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
128 return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
131 static u32 gav_real(int line, u32 in)
133 // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
134 return in;
137 #define gav(x) gav_real (__LINE__, (x))
139 /* Global allocation of timings_car */
140 timing_bounds_t timings_car[64];
142 /* OK */
143 static u16
144 read_500(struct raminfo *info, int channel, u16 addr, int split)
146 u32 val;
147 info->last_500_command[channel] = 1 << 31;
148 mchbar_write32(0x500 + (channel << 10), 0);
149 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
151 mchbar_write32(0x500 + (channel << 10),
152 1 << 31 | (((mchbar_read8(0x246 + (channel << 10)) >> 2) & 3) + 0xb88 - addr));
153 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
155 val = mchbar_read32(0x508 + (channel << 10));
156 return val & ((1 << split) - 1);
159 /* OK */
160 static void
161 write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
162 int flag)
164 if (info->last_500_command[channel] == 1 << 31) {
165 info->last_500_command[channel] = 1 << 30;
166 write_500(info, channel, 0, 0xb61, 0, 0);
168 mchbar_write32(0x500 + (channel << 10), 0);
169 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
171 mchbar_write32(0x504 + (channel << 10),
172 (val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
173 mchbar_write32(0x500 + (channel << 10), 1 << 30 | addr);
174 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
178 static void rmw_500(struct raminfo *info, int channel, u16 addr, int bits, u32 and, u32 or)
180 const u32 val = read_500(info, channel, addr, bits) & and;
181 write_500(info, channel, val | or, addr, bits, 1);
184 static int rw_test(int rank)
186 const u32 mask = 0xf00fc33c;
187 int ok = 0xff;
188 int i;
189 for (i = 0; i < 64; i++)
190 write32p((rank << 28) | (i << 2), 0);
191 sfence();
192 for (i = 0; i < 64; i++)
193 gav(read32p((rank << 28) | (i << 2)));
194 sfence();
195 for (i = 0; i < 32; i++) {
196 u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
197 write32p((rank << 28) | (i << 3), pat);
198 write32p((rank << 28) | (i << 3) | 4, pat);
200 sfence();
201 for (i = 0; i < 32; i++) {
202 u8 pat = (((mask >> i) & 1) ? 0xff : 0);
203 int j;
204 u32 val;
205 gav(val = read32p((rank << 28) | (i << 3)));
206 for (j = 0; j < 4; j++)
207 if (((val >> (j * 8)) & 0xff) != pat)
208 ok &= ~(1 << j);
209 gav(val = read32p((rank << 28) | (i << 3) | 4));
210 for (j = 0; j < 4; j++)
211 if (((val >> (j * 8)) & 0xff) != pat)
212 ok &= ~(16 << j);
214 sfence();
215 for (i = 0; i < 64; i++)
216 write32p((rank << 28) | (i << 2), 0);
217 sfence();
218 for (i = 0; i < 64; i++)
219 gav(read32p((rank << 28) | (i << 2)));
221 return ok;
224 static void
225 program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
227 int lane;
228 for (lane = 0; lane < 8; lane++) {
229 write_500(info, channel,
230 base +
231 info->training.
232 lane_timings[2][channel][slot][rank][lane],
233 get_timing_register_addr(lane, 2, slot, rank), 9, 0);
234 write_500(info, channel,
235 base +
236 info->training.
237 lane_timings[3][channel][slot][rank][lane],
238 get_timing_register_addr(lane, 3, slot, rank), 9, 0);
242 static void write_26c(int channel, u16 si)
244 mchbar_write32(0x26c + (channel << 10), 0x03243f35);
245 mchbar_write32(0x268 + (channel << 10), 0xcfc00000 | si << 9);
246 mchbar_write16(0x2b9 + (channel << 10), si);
249 static void toggle_1d0_142_5ff(void)
251 u32 reg32 = gav(read_1d0(0x142, 3));
252 if (reg32 & (1 << 1))
253 write_1d0(0, 0x142, 3, 1);
255 mchbar_write8(0x5ff, 0);
256 mchbar_write8(0x5ff, 1 << 7);
257 if (reg32 & (1 << 1))
258 write_1d0(0x2, 0x142, 3, 1);
261 static u32 get_580(int channel, u8 addr)
263 u32 ret;
264 toggle_1d0_142_5ff();
265 mchbar_write32(0x580 + (channel << 10), 0x8493c012 | addr);
266 mchbar_setbits8(0x580 + (channel << 10), 1 << 0);
267 while (!((ret = mchbar_read32(0x580 + (channel << 10))) & (1 << 16)))
269 mchbar_clrbits8(0x580 + (channel << 10), 1 << 0);
270 return ret;
273 #define RANK_SHIFT 28
274 #define CHANNEL_SHIFT 10
276 static void seq9(struct raminfo *info, int channel, int slot, int rank)
278 int i, lane;
280 for (i = 0; i < 2; i++)
281 for (lane = 0; lane < 8; lane++)
282 write_500(info, channel,
283 info->training.lane_timings[i +
284 1][channel][slot]
285 [rank][lane], get_timing_register_addr(lane,
286 i + 1,
287 slot,
288 rank),
289 9, 0);
291 write_1d0(1, 0x103, 6, 1);
292 for (lane = 0; lane < 8; lane++)
293 write_500(info, channel,
294 info->training.
295 lane_timings[0][channel][slot][rank][lane],
296 get_timing_register_addr(lane, 0, slot, rank), 9, 0);
298 for (i = 0; i < 2; i++) {
299 for (lane = 0; lane < 8; lane++)
300 write_500(info, channel,
301 info->training.lane_timings[i +
302 1][channel][slot]
303 [rank][lane], get_timing_register_addr(lane,
304 i + 1,
305 slot,
306 rank),
307 9, 0);
308 gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
311 toggle_1d0_142_5ff();
312 write_1d0(0x2, 0x142, 3, 1);
314 for (lane = 0; lane < 8; lane++) {
315 // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
316 info->training.lane_timings[2][channel][slot][rank][lane] =
317 read_500(info, channel,
318 get_timing_register_addr(lane, 2, slot, rank), 9);
319 //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
320 info->training.lane_timings[3][channel][slot][rank][lane] =
321 info->training.lane_timings[2][channel][slot][rank][lane] +
322 0x20;
326 static int count_ranks_in_channel(struct raminfo *info, int channel)
328 int slot, rank;
329 int res = 0;
330 for (slot = 0; slot < NUM_SLOTS; slot++)
331 for (rank = 0; rank < NUM_SLOTS; rank++)
332 res += info->populated_ranks[channel][slot][rank];
333 return res;
336 static void
337 config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
339 int add;
341 write_1d0(0, 0x178, 7, 1);
342 seq9(info, channel, slot, rank);
343 program_timings(info, 0x80, channel, slot, rank);
345 if (channel == 0)
346 add = count_ranks_in_channel(info, 1);
347 else
348 add = 0;
349 if (!s3resume)
350 gav(rw_test(rank + add));
351 program_timings(info, 0x00, channel, slot, rank);
352 if (!s3resume)
353 gav(rw_test(rank + add));
354 if (!s3resume)
355 gav(rw_test(rank + add));
356 write_1d0(0, 0x142, 3, 1);
357 write_1d0(0, 0x103, 6, 1);
359 gav(get_580(channel, 0xc | (rank << 5)));
360 gav(read_1d0(0x142, 3));
362 mchbar_write8(0x5ff, 0);
363 mchbar_write8(0x5ff, 1 << 7);
366 static void set_4cf(struct raminfo *info, int channel, u8 bit, u8 val)
368 const u16 regtable[] = { 0x4cf, 0x659, 0x697 };
370 val &= 1;
371 for (int i = 0; i < ARRAY_SIZE(regtable); i++)
372 rmw_500(info, channel, regtable[i], 4, ~(1 << bit), val << bit);
375 static void set_334(int zero)
377 int j, k, channel;
378 const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
379 u32 vd8[2][16];
381 for (channel = 0; channel < NUM_CHANNELS; channel++) {
382 for (j = 0; j < 4; j++) {
383 u32 a = (j == 1) ? 0x29292929 : 0x31313131;
384 u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
385 u16 c;
386 if ((j == 0 || j == 3) && zero)
387 c = 0;
388 else if (j == 3)
389 c = 0x5f;
390 else
391 c = 0x5f5f;
393 for (k = 0; k < 2; k++) {
394 mchbar_write32(0x138 + 8 * k, channel << 26 | j << 24);
395 gav(vd8[1][(channel << 3) | (j << 1) | k] =
396 mchbar_read32(0x138 + 8 * k));
397 gav(vd8[0][(channel << 3) | (j << 1) | k] =
398 mchbar_read32(0x13c + 8 * k));
401 mchbar_write32(0x334 + (channel << 10) + j * 0x44, zero ? 0 : val3[j]);
402 mchbar_write32(0x32c + (channel << 10) + j * 0x44,
403 zero ? 0 : 0x18191819 & lmask);
404 mchbar_write16(0x34a + (channel << 10) + j * 0x44, c);
405 mchbar_write32(0x33c + (channel << 10) + j * 0x44,
406 zero ? 0 : a & lmask);
407 mchbar_write32(0x344 + (channel << 10) + j * 0x44,
408 zero ? 0 : a & lmask);
412 mchbar_setbits32(0x130, 1 << 0);
413 while (mchbar_read8(0x130) & 1)
417 static void rmw_1d0(u16 addr, u32 and, u32 or, int split)
419 u32 v;
420 v = read_1d0(addr, split);
421 write_1d0((v & and) | or, addr, split, 1);
424 static int find_highest_bit_set(u16 val)
426 int i;
427 for (i = 15; i >= 0; i--)
428 if (val & (1 << i))
429 return i;
430 return -1;
433 static int find_lowest_bit_set32(u32 val)
435 int i;
436 for (i = 0; i < 32; i++)
437 if (val & (1 << i))
438 return i;
439 return -1;
442 enum {
443 DEVICE_TYPE = 2,
444 MODULE_TYPE = 3,
445 DENSITY = 4,
446 RANKS_AND_DQ = 7,
447 MEMORY_BUS_WIDTH = 8,
448 TIMEBASE_DIVIDEND = 10,
449 TIMEBASE_DIVISOR = 11,
450 CYCLETIME = 12,
452 CAS_LATENCIES_LSB = 14,
453 CAS_LATENCIES_MSB = 15,
454 CAS_LATENCY_TIME = 16,
455 THERMAL_AND_REFRESH = 31,
456 REFERENCE_RAW_CARD_USED = 62,
457 RANK1_ADDRESS_MAPPING = 63
460 static void calculate_timings(struct raminfo *info)
462 unsigned int cycletime;
463 unsigned int cas_latency_time;
464 unsigned int supported_cas_latencies;
465 unsigned int channel, slot;
466 unsigned int clock_speed_index;
467 unsigned int min_cas_latency;
468 unsigned int cas_latency;
469 unsigned int max_clock_index;
471 /* Find common CAS latency */
472 supported_cas_latencies = 0x3fe;
473 for (channel = 0; channel < NUM_CHANNELS; channel++)
474 for (slot = 0; slot < NUM_SLOTS; slot++)
475 if (info->populated_ranks[channel][slot][0])
476 supported_cas_latencies &=
478 (info->
479 spd[channel][slot][CAS_LATENCIES_LSB] |
480 (info->
481 spd[channel][slot][CAS_LATENCIES_MSB] <<
482 8));
484 max_clock_index = MIN(3, info->max_supported_clock_speed_index);
486 cycletime = min_cycletime[max_clock_index];
487 cas_latency_time = min_cas_latency_time[max_clock_index];
489 for (channel = 0; channel < NUM_CHANNELS; channel++)
490 for (slot = 0; slot < NUM_SLOTS; slot++)
491 if (info->populated_ranks[channel][slot][0]) {
492 unsigned int timebase;
493 timebase =
494 1000 *
495 info->
496 spd[channel][slot][TIMEBASE_DIVIDEND] /
497 info->spd[channel][slot][TIMEBASE_DIVISOR];
498 cycletime =
499 MAX(cycletime,
500 timebase *
501 info->spd[channel][slot][CYCLETIME]);
502 cas_latency_time =
503 MAX(cas_latency_time,
504 timebase *
505 info->
506 spd[channel][slot][CAS_LATENCY_TIME]);
508 if (cycletime > min_cycletime[0])
509 die("RAM init: Decoded SPD DRAM freq is slower than the controller minimum!");
510 for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
511 if (cycletime == min_cycletime[clock_speed_index])
512 break;
513 if (cycletime > min_cycletime[clock_speed_index]) {
514 clock_speed_index--;
515 cycletime = min_cycletime[clock_speed_index];
516 break;
519 min_cas_latency = DIV_ROUND_UP(cas_latency_time, cycletime);
520 cas_latency = 0;
521 while (supported_cas_latencies) {
522 cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
523 if (cas_latency <= min_cas_latency)
524 break;
525 supported_cas_latencies &=
526 ~(1 << find_highest_bit_set(supported_cas_latencies));
529 if (cas_latency != min_cas_latency && clock_speed_index)
530 clock_speed_index--;
532 if (cas_latency * min_cycletime[clock_speed_index] > 20000)
533 die("Couldn't configure DRAM");
534 info->clock_speed_index = clock_speed_index;
535 info->cas_latency = cas_latency;
538 static void program_base_timings(struct raminfo *info)
540 unsigned int channel;
541 unsigned int slot, rank, lane;
542 unsigned int extended_silicon_revision;
543 int i;
545 extended_silicon_revision = info->silicon_revision;
546 if (info->silicon_revision == 0)
547 for (channel = 0; channel < NUM_CHANNELS; channel++)
548 for (slot = 0; slot < NUM_SLOTS; slot++)
549 if ((info->
550 spd[channel][slot][MODULE_TYPE] & 0xF) ==
552 extended_silicon_revision = 4;
554 for (channel = 0; channel < NUM_CHANNELS; channel++) {
555 for (slot = 0; slot < NUM_SLOTS; slot++)
556 for (rank = 0; rank < NUM_SLOTS; rank++) {
557 int card_timing_2;
558 if (!info->populated_ranks[channel][slot][rank])
559 continue;
561 for (lane = 0; lane < 9; lane++) {
562 int tm_reg;
563 int card_timing;
565 card_timing = 0;
566 if ((info->
567 spd[channel][slot][MODULE_TYPE] &
568 0xF) == 3) {
569 int reference_card;
570 reference_card =
571 info->
572 spd[channel][slot]
573 [REFERENCE_RAW_CARD_USED] &
574 0x1f;
575 if (reference_card == 3)
576 card_timing =
577 u16_ffd1188[0][lane]
578 [info->
579 clock_speed_index];
580 if (reference_card == 5)
581 card_timing =
582 u16_ffd1188[1][lane]
583 [info->
584 clock_speed_index];
587 info->training.
588 lane_timings[0][channel][slot][rank]
589 [lane] =
590 u8_FFFD1218[info->
591 clock_speed_index];
592 info->training.
593 lane_timings[1][channel][slot][rank]
594 [lane] = 256;
596 for (tm_reg = 2; tm_reg < 4; tm_reg++)
597 info->training.
598 lane_timings[tm_reg]
599 [channel][slot][rank][lane]
601 u8_FFFD1240[channel]
602 [extended_silicon_revision]
603 [lane][2 * slot +
604 rank][info->
605 clock_speed_index]
606 + info->max4048[channel]
608 u8_FFFD0C78[channel]
609 [extended_silicon_revision]
610 [info->
611 mode4030[channel]][slot]
612 [rank][info->
613 clock_speed_index]
614 + card_timing;
615 for (tm_reg = 0; tm_reg < 4; tm_reg++)
616 write_500(info, channel,
617 info->training.
618 lane_timings[tm_reg]
619 [channel][slot][rank]
620 [lane],
621 get_timing_register_addr
622 (lane, tm_reg, slot,
623 rank), 9, 0);
626 card_timing_2 = 0;
627 if (!(extended_silicon_revision != 4
628 || (info->
629 populated_ranks_mask[channel] & 5) ==
630 5)) {
631 if ((info->
632 spd[channel][slot]
633 [REFERENCE_RAW_CARD_USED] & 0x1F)
634 == 3)
635 card_timing_2 =
636 u16_FFFE0EB8[0][info->
637 clock_speed_index];
638 if ((info->
639 spd[channel][slot]
640 [REFERENCE_RAW_CARD_USED] & 0x1F)
641 == 5)
642 card_timing_2 =
643 u16_FFFE0EB8[1][info->
644 clock_speed_index];
647 for (i = 0; i < 3; i++)
648 write_500(info, channel,
649 (card_timing_2 +
650 info->max4048[channel]
652 u8_FFFD0EF8[channel]
653 [extended_silicon_revision]
654 [info->
655 mode4030[channel]][info->
656 clock_speed_index]),
657 u16_fffd0c50[i][slot][rank],
658 8, 1);
659 write_500(info, channel,
660 (info->max4048[channel] +
661 u8_FFFD0C78[channel]
662 [extended_silicon_revision][info->
663 mode4030
664 [channel]]
665 [slot][rank][info->
666 clock_speed_index]),
667 u16_fffd0c70[slot][rank], 7, 1);
669 if (!info->populated_ranks_mask[channel])
670 continue;
671 for (i = 0; i < 3; i++)
672 write_500(info, channel,
673 (info->max4048[channel] +
674 info->avg4044[channel]
676 u8_FFFD17E0[channel]
677 [extended_silicon_revision][info->
678 mode4030
679 [channel]][info->
680 clock_speed_index]),
681 u16_fffd0c68[i], 8, 1);
685 /* The time of clock cycle in ps. */
686 static unsigned int cycle_ps(struct raminfo *info)
688 return 2 * halfcycle_ps(info);
691 /* Frequency in 0.1 MHz units. */
692 static unsigned int frequency_01(struct raminfo *info)
694 return 100 * frequency_11(info) / 9;
697 static unsigned int ps_to_halfcycles(struct raminfo *info, unsigned int ps)
699 return (frequency_11(info) * 2) * ps / 900000;
702 static unsigned int ns_to_cycles(struct raminfo *info, unsigned int ns)
704 return (frequency_11(info)) * ns / 900;
707 static void compute_derived_timings(struct raminfo *info)
709 unsigned int channel, slot, rank;
710 int extended_silicon_revision;
711 int some_delay_1_ps;
712 int some_delay_2_ps;
713 int some_delay_2_halfcycles_ceil;
714 int some_delay_2_halfcycles_floor;
715 int some_delay_3_ps;
716 int some_delay_3_ps_rounded;
717 int some_delay_1_cycle_ceil;
718 int some_delay_1_cycle_floor;
720 some_delay_3_ps_rounded = 0;
721 extended_silicon_revision = info->silicon_revision;
722 if (!info->silicon_revision)
723 for (channel = 0; channel < NUM_CHANNELS; channel++)
724 for (slot = 0; slot < NUM_SLOTS; slot++)
725 if ((info->
726 spd[channel][slot][MODULE_TYPE] & 0xF) ==
728 extended_silicon_revision = 4;
729 if (info->board_lane_delay[7] < 5)
730 info->board_lane_delay[7] = 5;
731 info->revision_flag_1 = 2;
732 if (info->silicon_revision == 2 || info->silicon_revision == 3)
733 info->revision_flag_1 = 0;
734 if (info->revision < 16)
735 info->revision_flag_1 = 0;
737 if (info->revision < 8)
738 info->revision_flag_1 = 0;
739 if (info->revision >= 8 && (info->silicon_revision == 0
740 || info->silicon_revision == 1))
741 some_delay_2_ps = 735;
742 else
743 some_delay_2_ps = 750;
745 if (info->revision >= 0x10 && (info->silicon_revision == 0
746 || info->silicon_revision == 1))
747 some_delay_1_ps = 3929;
748 else
749 some_delay_1_ps = 3490;
751 some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
752 some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
753 if (some_delay_1_ps % cycle_ps(info))
754 some_delay_1_cycle_ceil++;
755 else
756 some_delay_1_cycle_floor--;
757 info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
758 if (info->revision_flag_1)
759 some_delay_2_ps = halfcycle_ps(info) >> 6;
760 some_delay_2_ps +=
761 MAX(some_delay_1_ps - 30,
762 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
763 375;
764 some_delay_3_ps =
765 halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
766 if (info->revision_flag_1) {
767 if (some_delay_3_ps >= 150) {
768 const int some_delay_3_halfcycles =
769 (some_delay_3_ps << 6) / halfcycle_ps(info);
770 some_delay_3_ps_rounded =
771 halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
774 some_delay_2_halfcycles_ceil =
775 (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
776 2 * (some_delay_1_cycle_ceil - 1);
777 if (info->revision_flag_1 && some_delay_3_ps < 150)
778 some_delay_2_halfcycles_ceil++;
779 some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
780 if (info->revision < 0x10)
781 some_delay_2_halfcycles_floor =
782 some_delay_2_halfcycles_ceil - 1;
783 if (!info->revision_flag_1)
784 some_delay_2_halfcycles_floor++;
785 info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
786 info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
787 if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
788 || (info->populated_ranks[1][0][0]
789 && info->populated_ranks[1][1][0]))
790 info->max_slots_used_in_channel = 2;
791 else
792 info->max_slots_used_in_channel = 1;
793 for (channel = 0; channel < NUM_CHANNELS; channel++)
794 mchbar_write32(0x244 + (channel << 10),
795 ((info->revision < 8) ? 1 : 0x200) |
796 ((2 - info->max_slots_used_in_channel) << 17) |
797 (channel << 21) |
798 (info->some_delay_1_cycle_floor << 18) | 0x9510);
799 if (info->max_slots_used_in_channel == 1) {
800 info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
801 info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
802 } else {
803 info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
804 info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
805 || (count_ranks_in_channel(info, 1) ==
806 2)) ? 2 : 3;
808 for (channel = 0; channel < NUM_CHANNELS; channel++) {
809 int max_of_unk;
810 int min_of_unk_2;
812 int i, count;
813 int sum;
815 if (!info->populated_ranks_mask[channel])
816 continue;
818 max_of_unk = 0;
819 min_of_unk_2 = 32767;
821 sum = 0;
822 count = 0;
823 for (i = 0; i < 3; i++) {
824 int unk1;
825 if (info->revision < 8)
826 unk1 =
827 u8_FFFD1891[0][channel][info->
828 clock_speed_index]
829 [i];
830 else if (!
831 (info->revision >= 0x10
832 || info->revision_flag_1))
833 unk1 =
834 u8_FFFD1891[1][channel][info->
835 clock_speed_index]
836 [i];
837 else
838 unk1 = 0;
839 for (slot = 0; slot < NUM_SLOTS; slot++)
840 for (rank = 0; rank < NUM_RANKS; rank++) {
841 int a = 0;
842 int b = 0;
844 if (!info->
845 populated_ranks[channel][slot]
846 [rank])
847 continue;
848 if (extended_silicon_revision == 4
849 && (info->
850 populated_ranks_mask[channel] &
851 5) != 5) {
852 if ((info->
853 spd[channel][slot]
854 [REFERENCE_RAW_CARD_USED] &
855 0x1F) == 3) {
856 a = u16_ffd1178[0]
857 [info->
858 clock_speed_index];
859 b = u16_fe0eb8[0][info->
860 clock_speed_index];
861 } else
862 if ((info->
863 spd[channel][slot]
864 [REFERENCE_RAW_CARD_USED]
865 & 0x1F) == 5) {
866 a = u16_ffd1178[1]
867 [info->
868 clock_speed_index];
869 b = u16_fe0eb8[1][info->
870 clock_speed_index];
873 min_of_unk_2 = MIN(min_of_unk_2, a);
874 min_of_unk_2 = MIN(min_of_unk_2, b);
875 if (rank == 0) {
876 sum += a;
877 count++;
880 int t;
881 t = b +
882 u8_FFFD0EF8[channel]
883 [extended_silicon_revision]
884 [info->
885 mode4030[channel]][info->
886 clock_speed_index];
887 if (unk1 >= t)
888 max_of_unk =
889 MAX(max_of_unk,
890 unk1 - t);
894 int t =
895 u8_FFFD17E0[channel]
896 [extended_silicon_revision][info->
897 mode4030
898 [channel]]
899 [info->clock_speed_index] + min_of_unk_2;
900 if (unk1 >= t)
901 max_of_unk = MAX(max_of_unk, unk1 - t);
905 if (count == 0)
906 die("No memory ranks found for channel %u\n", channel);
908 info->avg4044[channel] = sum / count;
909 info->max4048[channel] = max_of_unk;
913 static void jedec_read(struct raminfo *info,
914 int channel, int slot, int rank,
915 int total_rank, u8 addr3, unsigned int value)
917 /* Handle mirrored mapping. */
918 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
919 addr3 = (addr3 & 0xCF) | ((addr3 & 0x10) << 1) |
920 ((addr3 >> 1) & 0x10);
922 mchbar_clrsetbits8(0x271, 0x1f << 1, addr3);
923 mchbar_clrsetbits8(0x671, 0x1f << 1, addr3);
925 /* Handle mirrored mapping. */
926 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
927 value =
928 (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8)
929 << 1);
931 read32p((value << 3) | (total_rank << 28));
933 mchbar_clrsetbits8(0x271, 0x1f << 1, 1 << 1);
934 mchbar_clrsetbits8(0x671, 0x1f << 1, 1 << 1);
936 read32p(total_rank << 28);
939 enum {
940 MR1_RZQ12 = 512,
941 MR1_RZQ2 = 64,
942 MR1_RZQ4 = 4,
943 MR1_ODS34OHM = 2
946 enum {
947 MR0_BT_INTERLEAVED = 8,
948 MR0_DLL_RESET_ON = 256
951 enum {
952 MR2_RTT_WR_DISABLED = 0,
953 MR2_RZQ2 = 1 << 10
956 static void jedec_init(struct raminfo *info)
958 int write_recovery;
959 int channel, slot, rank;
960 int total_rank;
961 int dll_on;
962 int self_refresh_temperature;
963 int auto_self_refresh;
965 auto_self_refresh = 1;
966 self_refresh_temperature = 1;
967 if (info->board_lane_delay[3] <= 10) {
968 if (info->board_lane_delay[3] <= 8)
969 write_recovery = info->board_lane_delay[3] - 4;
970 else
971 write_recovery = 5;
972 } else {
973 write_recovery = 6;
975 FOR_POPULATED_RANKS {
976 auto_self_refresh &=
977 (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
978 self_refresh_temperature &=
979 info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
981 if (auto_self_refresh == 1)
982 self_refresh_temperature = 0;
984 dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
985 || (info->populated_ranks[0][0][0]
986 && info->populated_ranks[0][1][0])
987 || (info->populated_ranks[1][0][0]
988 && info->populated_ranks[1][1][0]));
990 total_rank = 0;
992 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
993 int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
994 int rzq_reg58e;
996 if (info->silicon_revision == 2 || info->silicon_revision == 3) {
997 rzq_reg58e = 64;
998 rtt = MR1_RZQ2;
999 if (info->clock_speed_index != 0) {
1000 rzq_reg58e = 4;
1001 if (info->populated_ranks_mask[channel] == 3)
1002 rtt = MR1_RZQ4;
1004 } else {
1005 if ((info->populated_ranks_mask[channel] & 5) == 5) {
1006 rtt = MR1_RZQ12;
1007 rzq_reg58e = 64;
1008 rtt_wr = MR2_RZQ2;
1009 } else {
1010 rzq_reg58e = 4;
1011 rtt = MR1_RZQ4;
1015 mchbar_write16(0x588 + (channel << 10), 0);
1016 mchbar_write16(0x58a + (channel << 10), 4);
1017 mchbar_write16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
1018 mchbar_write16(0x58e + (channel << 10), rzq_reg58e | 0x82);
1019 mchbar_write16(0x590 + (channel << 10), 0x1282);
1021 for (slot = 0; slot < NUM_SLOTS; slot++)
1022 for (rank = 0; rank < NUM_RANKS; rank++)
1023 if (info->populated_ranks[channel][slot][rank]) {
1024 jedec_read(info, channel, slot, rank,
1025 total_rank, 0x28,
1026 rtt_wr | (info->
1027 clock_speed_index
1028 << 3)
1029 | (auto_self_refresh << 6) |
1030 (self_refresh_temperature <<
1031 7));
1032 jedec_read(info, channel, slot, rank,
1033 total_rank, 0x38, 0);
1034 jedec_read(info, channel, slot, rank,
1035 total_rank, 0x18,
1036 rtt | MR1_ODS34OHM);
1037 jedec_read(info, channel, slot, rank,
1038 total_rank, 6,
1039 (dll_on << 12) |
1040 (write_recovery << 9)
1041 | ((info->cas_latency - 4) <<
1042 4) | MR0_BT_INTERLEAVED |
1043 MR0_DLL_RESET_ON);
1044 total_rank++;
1049 static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
1051 unsigned int channel, slot, rank;
1052 unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */
1053 unsigned int channel_0_non_interleaved;
1055 FOR_ALL_RANKS {
1056 if (info->populated_ranks[channel][slot][rank]) {
1057 total_mb[channel] +=
1058 pre_jedec ? 256 : (256 << info->
1059 density[channel][slot] >> info->
1060 is_x16_module[channel][slot]);
1061 mchbar_write8(0x208 + rank + 2 * slot + (channel << 10),
1062 (pre_jedec ? (1 | ((1 + 1) << 1)) :
1063 (info->is_x16_module[channel][slot] |
1064 ((info->density[channel][slot] + 1) << 1))) |
1065 0x80);
1067 mchbar_write16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
1068 total_mb[channel] >> 6);
1071 info->total_memory_mb = total_mb[0] + total_mb[1];
1073 info->interleaved_part_mb =
1074 pre_jedec ? 0 : 2 * MIN(total_mb[0], total_mb[1]);
1075 info->non_interleaved_part_mb =
1076 total_mb[0] + total_mb[1] - info->interleaved_part_mb;
1077 channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
1078 mchbar_write32(0x100, channel_0_non_interleaved | info->non_interleaved_part_mb << 16);
1079 if (!pre_jedec)
1080 mchbar_write16(0x104, info->interleaved_part_mb);
1083 static void program_board_delay(struct raminfo *info)
1085 int cas_latency_shift;
1086 int some_delay_ns;
1087 int some_delay_3_half_cycles;
1089 unsigned int channel, i;
1090 int high_multiplier;
1091 int lane_3_delay;
1092 int cas_latency_derived;
1094 high_multiplier = 0;
1095 some_delay_ns = 200;
1096 some_delay_3_half_cycles = 4;
1097 cas_latency_shift = info->silicon_revision == 0
1098 || info->silicon_revision == 1 ? 1 : 0;
1099 if (info->revision < 8) {
1100 some_delay_ns = 600;
1101 cas_latency_shift = 0;
1104 int speed_bit;
1105 speed_bit =
1106 ((info->clock_speed_index > 1
1107 || (info->silicon_revision != 2
1108 && info->silicon_revision != 3))) ^ (info->revision >=
1109 0x10);
1110 write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1111 3, 1);
1112 write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1113 3, 1);
1114 if (info->revision >= 0x10 && info->clock_speed_index <= 1
1115 && (info->silicon_revision == 2
1116 || info->silicon_revision == 3))
1117 rmw_1d0(0x116, 5, 2, 4);
1119 mchbar_write32(0x120, 1 << (info->max_slots_used_in_channel + 28) | 0x188e7f9f);
1121 mchbar_write8(0x124, info->board_lane_delay[4] + (frequency_01(info) + 999) / 1000);
1122 mchbar_write16(0x125, 0x1360);
1123 mchbar_write8(0x127, 0x40);
1124 if (info->fsb_frequency < frequency_11(info) / 2) {
1125 unsigned int some_delay_2_half_cycles;
1126 high_multiplier = 1;
1127 some_delay_2_half_cycles = ps_to_halfcycles(info,
1128 ((3 *
1129 fsbcycle_ps(info))
1130 >> 1) +
1131 (halfcycle_ps(info)
1133 reg178_min[info->
1134 clock_speed_index]
1135 >> 6)
1138 halfcycle_ps(info)
1139 + 2230);
1140 some_delay_3_half_cycles =
1141 MIN((some_delay_2_half_cycles +
1142 (frequency_11(info) * 2) * (28 -
1143 some_delay_2_half_cycles) /
1144 (frequency_11(info) * 2 -
1145 4 * (info->fsb_frequency))) >> 3, 7);
1147 if (mchbar_read8(0x2ca9) & 1)
1148 some_delay_3_half_cycles = 3;
1149 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1150 mchbar_setbits32(0x220 + (channel << 10), 0x18001117);
1151 mchbar_write32(0x224 + (channel << 10),
1152 (info->max_slots_used_in_channel - 1) |
1153 (info->cas_latency - 5 - info->clock_speed_index)
1154 << 21 | (info->max_slots_used_in_channel +
1155 info->cas_latency - cas_latency_shift - 4) << 16 |
1156 (info->cas_latency - cas_latency_shift - 4) << 26 |
1157 (info->cas_latency - info->clock_speed_index +
1158 info->max_slots_used_in_channel - 6) << 8);
1159 mchbar_write32(0x228 + (channel << 10), info->max_slots_used_in_channel);
1160 mchbar_write8(0x239 + (channel << 10), 32);
1161 mchbar_write32(0x248 + (channel << 10), high_multiplier << 24 |
1162 some_delay_3_half_cycles << 25 | 0x840000);
1163 mchbar_write32(0x278 + (channel << 10), 0xc362042);
1164 mchbar_write32(0x27c + (channel << 10), 0x8b000062);
1165 mchbar_write32(0x24c + (channel << 10),
1166 (!!info->clock_speed_index) << 17 |
1167 ((2 + info->clock_speed_index -
1168 (!!info->clock_speed_index))) << 12 | 0x10200);
1170 mchbar_write8(0x267 + (channel << 10), 4);
1171 mchbar_write16(0x272 + (channel << 10), 0x155);
1172 mchbar_clrsetbits32(0x2bc + (channel << 10), 0xffffff, 0x707070);
1174 write_500(info, channel,
1175 ((!info->populated_ranks[channel][1][1])
1176 | (!info->populated_ranks[channel][1][0] << 1)
1177 | (!info->populated_ranks[channel][0][1] << 2)
1178 | (!info->populated_ranks[channel][0][0] << 3)),
1179 0x4c9, 4, 1);
1182 mchbar_write8(0x2c4, (1 + (info->clock_speed_index != 0)) << 6 | 0xc);
1184 u8 freq_divisor = 2;
1185 if (info->fsb_frequency == frequency_11(info))
1186 freq_divisor = 3;
1187 else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
1188 freq_divisor = 1;
1189 else
1190 freq_divisor = 2;
1191 mchbar_write32(0x2c0, freq_divisor << 11 | 0x6009c400);
1194 if (info->board_lane_delay[3] <= 10) {
1195 if (info->board_lane_delay[3] <= 8)
1196 lane_3_delay = info->board_lane_delay[3];
1197 else
1198 lane_3_delay = 10;
1199 } else {
1200 lane_3_delay = 12;
1202 cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
1203 if (info->clock_speed_index > 1)
1204 cas_latency_derived++;
1205 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1206 mchbar_write32(0x240 + (channel << 10),
1207 ((info->clock_speed_index == 0) * 0x11000) |
1208 0x1002100 | (2 + info->clock_speed_index) << 4 |
1209 (info->cas_latency - 3));
1210 write_500(info, channel, (info->clock_speed_index << 1) | 1,
1211 0x609, 6, 1);
1212 write_500(info, channel,
1213 info->clock_speed_index + 2 * info->cas_latency - 7,
1214 0x601, 6, 1);
1216 mchbar_write32(0x250 + (channel << 10),
1217 (lane_3_delay + info->clock_speed_index + 9) << 6 |
1218 info->board_lane_delay[7] << 2 |
1219 info->board_lane_delay[4] << 16 |
1220 info->board_lane_delay[1] << 25 |
1221 info->board_lane_delay[1] << 29 | 1);
1222 mchbar_write32(0x254 + (channel << 10),
1223 info->board_lane_delay[1] >> 3 |
1224 (info->board_lane_delay[8] + 4 * info->use_ecc) << 6 |
1225 0x80 | info->board_lane_delay[6] << 1 |
1226 info->board_lane_delay[2] << 28 |
1227 cas_latency_derived << 16 | 0x4700000);
1228 mchbar_write32(0x258 + (channel << 10),
1229 (info->board_lane_delay[5] + info->clock_speed_index + 9) << 12 |
1230 (info->clock_speed_index - info->cas_latency + 12) << 8 |
1231 info->board_lane_delay[2] << 17 |
1232 info->board_lane_delay[4] << 24 | 0x47);
1233 mchbar_write32(0x25c + (channel << 10),
1234 info->board_lane_delay[1] << 1 |
1235 info->board_lane_delay[0] << 8 | 0x1da50000);
1236 mchbar_write8(0x264 + (channel << 10), 0xff);
1237 mchbar_write8(0x5f8 + (channel << 10), cas_latency_shift << 3 | info->use_ecc);
1240 program_modules_memory_map(info, 1);
1242 mchbar_clrsetbits16(0x610, 0xfe3c,
1243 MIN(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9 | 0x3c);
1244 mchbar_setbits16(0x612, 1 << 8);
1245 mchbar_setbits16(0x214, 0x3e00);
1246 for (i = 0; i < 8; i++) {
1247 pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1248 (info->total_memory_mb - 64) | !i | 2);
1249 pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1253 #define DEFAULT_PCI_MMIO_SIZE 2048
1255 static void program_total_memory_map(struct raminfo *info)
1257 unsigned int tom, tolud, touud;
1258 unsigned int quickpath_reserved;
1259 unsigned int remap_base;
1260 unsigned int uma_base_igd;
1261 unsigned int uma_base_gtt;
1262 unsigned int mmio_size;
1263 int memory_remap;
1264 unsigned int memory_map[8];
1265 int i;
1266 unsigned int current_limit;
1267 unsigned int tseg_base;
1268 int uma_size_igd = 0, uma_size_gtt = 0;
1270 memset(memory_map, 0, sizeof(memory_map));
1272 if (info->uma_enabled) {
1273 u16 t = pci_read_config16(NORTHBRIDGE, GGC);
1274 gav(t);
1275 const int uma_sizes_gtt[16] =
1276 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1277 /* Igd memory */
1278 const int uma_sizes_igd[16] = {
1279 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1280 256, 512
1283 uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1284 uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1287 mmio_size = DEFAULT_PCI_MMIO_SIZE;
1289 tom = info->total_memory_mb;
1290 if (tom == 4096)
1291 tom = 4032;
1292 touud = ALIGN_DOWN(tom - info->memory_reserved_for_heci_mb, 64);
1293 tolud = ALIGN_DOWN(MIN(4096 - mmio_size + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
1294 , touud), 64);
1295 memory_remap = 0;
1296 if (touud - tolud > 64) {
1297 memory_remap = 1;
1298 remap_base = MAX(4096, touud);
1299 touud = touud - tolud + 4096;
1301 if (touud > 4096)
1302 memory_map[2] = touud | 1;
1303 quickpath_reserved = 0;
1305 u32 t = pci_read_config32(QPI_SAD, 0x68);
1307 gav(t);
1309 if (t & 0x800) {
1310 u32 shift = t >> 20;
1311 if (shift == 0)
1312 die("Quickpath value is 0\n");
1313 quickpath_reserved = (u32)1 << find_lowest_bit_set32(shift);
1316 if (memory_remap)
1317 touud -= quickpath_reserved;
1319 uma_base_igd = tolud - uma_size_igd;
1320 uma_base_gtt = uma_base_igd - uma_size_gtt;
1321 tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
1322 if (!memory_remap)
1323 tseg_base -= quickpath_reserved;
1324 tseg_base = ALIGN_DOWN(tseg_base, 8);
1326 pci_write_config16(NORTHBRIDGE, TOLUD, tolud << 4);
1327 pci_write_config16(NORTHBRIDGE, TOM, tom >> 6);
1328 if (memory_remap) {
1329 pci_write_config16(NORTHBRIDGE, REMAPBASE, remap_base >> 6);
1330 pci_write_config16(NORTHBRIDGE, REMAPLIMIT, (touud - 64) >> 6);
1332 pci_write_config16(NORTHBRIDGE, TOUUD, touud);
1334 if (info->uma_enabled) {
1335 pci_write_config32(NORTHBRIDGE, IGD_BASE, uma_base_igd << 20);
1336 pci_write_config32(NORTHBRIDGE, GTT_BASE, uma_base_gtt << 20);
1338 pci_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
1340 current_limit = 0;
1341 memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
1342 memory_map[1] = 4096;
1343 for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
1344 current_limit = MAX(current_limit, memory_map[i] & ~1);
1345 pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1346 (memory_map[i] & 1) | ALIGN_DOWN(current_limit -
1347 1, 64) | 2);
1348 pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1352 static void collect_system_info(struct raminfo *info)
1354 u32 capid0[3];
1355 int i;
1356 unsigned int channel;
1358 for (i = 0; i < 3; i++) {
1359 capid0[i] = pci_read_config32(NORTHBRIDGE, CAPID0 | (i << 2));
1360 printk(BIOS_DEBUG, "CAPID0[%d] = 0x%08x\n", i, capid0[i]);
1362 info->revision = pci_read_config8(NORTHBRIDGE, PCI_REVISION_ID);
1363 printk(BIOS_DEBUG, "Revision ID: 0x%x\n", info->revision);
1364 printk(BIOS_DEBUG, "Device ID: 0x%x\n", pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID));
1366 info->max_supported_clock_speed_index = (~capid0[1] & 7);
1368 if ((capid0[1] >> 11) & 1)
1369 info->uma_enabled = 0;
1370 else
1371 gav(info->uma_enabled =
1372 pci_read_config8(NORTHBRIDGE, DEVEN) & 8);
1373 /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
1374 info->silicon_revision = 0;
1376 if (capid0[2] & 2) {
1377 info->silicon_revision = 0;
1378 info->max_supported_clock_speed_index = 2;
1379 for (channel = 0; channel < NUM_CHANNELS; channel++)
1380 if (info->populated_ranks[channel][0][0]
1381 && (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
1382 3) {
1383 info->silicon_revision = 2;
1384 info->max_supported_clock_speed_index = 1;
1386 } else {
1387 switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
1388 case 1:
1389 case 2:
1390 info->silicon_revision = 3;
1391 break;
1392 case 3:
1393 info->silicon_revision = 0;
1394 break;
1395 case 0:
1396 info->silicon_revision = 2;
1397 break;
1399 switch (pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
1400 case 0x40:
1401 info->silicon_revision = 0;
1402 break;
1403 case 0x48:
1404 info->silicon_revision = 1;
1405 break;
1410 static void write_training_data(struct raminfo *info)
1412 int tm, channel, slot, rank, lane;
1413 if (info->revision < 8)
1414 return;
1416 for (tm = 0; tm < 4; tm++)
1417 for (channel = 0; channel < NUM_CHANNELS; channel++)
1418 for (slot = 0; slot < NUM_SLOTS; slot++)
1419 for (rank = 0; rank < NUM_RANKS; rank++)
1420 for (lane = 0; lane < 9; lane++)
1421 write_500(info, channel,
1422 info->
1423 cached_training->
1424 lane_timings[tm]
1425 [channel][slot][rank]
1426 [lane],
1427 get_timing_register_addr
1428 (lane, tm, slot,
1429 rank), 9, 0);
1430 write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
1431 write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
1434 static void dump_timings(struct raminfo *info)
1436 int channel, slot, rank, lane, i;
1437 printk(RAM_SPEW, "Timings:\n");
1438 FOR_POPULATED_RANKS {
1439 printk(RAM_SPEW, "channel %d, slot %d, rank %d\n", channel,
1440 slot, rank);
1441 for (lane = 0; lane < 9; lane++) {
1442 printk(RAM_SPEW, "lane %d: ", lane);
1443 for (i = 0; i < 4; i++) {
1444 printk(RAM_SPEW, "%x (%x) ",
1445 read_500(info, channel,
1446 get_timing_register_addr
1447 (lane, i, slot, rank),
1449 info->training.
1450 lane_timings[i][channel][slot][rank]
1451 [lane]);
1453 printk(RAM_SPEW, "\n");
1456 printk(RAM_SPEW, "[178] = %x (%x)\n", read_1d0(0x178, 7),
1457 info->training.reg_178);
1458 printk(RAM_SPEW, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
1459 info->training.reg_10b);
1462 /* Read timings and other registers that need to be restored verbatim and
1463 put them to CBMEM.
1465 static void save_timings(struct raminfo *info)
1467 struct ram_training train;
1468 int channel, slot, rank, lane, i;
1470 train = info->training;
1471 FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
1472 for (i = 0; i < 4; i++)
1473 train.lane_timings[i][channel][slot][rank][lane] =
1474 read_500(info, channel,
1475 get_timing_register_addr(lane, i, slot,
1476 rank), 9);
1477 train.reg_178 = read_1d0(0x178, 7);
1478 train.reg_10b = read_1d0(0x10b, 6);
1480 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1481 u32 reg32;
1482 reg32 = mchbar_read32((channel << 10) + 0x274);
1483 train.reg274265[channel][0] = reg32 >> 16;
1484 train.reg274265[channel][1] = reg32 & 0xffff;
1485 train.reg274265[channel][2] = mchbar_read16((channel << 10) + 0x265) >> 8;
1487 train.reg2ca9_bit0 = mchbar_read8(0x2ca9) & 1;
1488 train.reg_6dc = mchbar_read32(0x6dc);
1489 train.reg_6e8 = mchbar_read32(0x6e8);
1491 printk(RAM_SPEW, "[6dc] = %x\n", train.reg_6dc);
1492 printk(RAM_SPEW, "[6e8] = %x\n", train.reg_6e8);
1494 /* Save the MRC S3 restore data to cbmem */
1495 mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION,
1496 &train, sizeof(train));
1499 static const struct ram_training *get_cached_training(void)
1501 return mrc_cache_current_mmap_leak(MRC_TRAINING_DATA,
1502 MRC_CACHE_VERSION,
1503 NULL);
1506 static int have_match_ranks(struct raminfo *info, int channel, int ranks)
1508 int ranks_in_channel;
1509 ranks_in_channel = info->populated_ranks[channel][0][0]
1510 + info->populated_ranks[channel][0][1]
1511 + info->populated_ranks[channel][1][0]
1512 + info->populated_ranks[channel][1][1];
1514 /* empty channel */
1515 if (ranks_in_channel == 0)
1516 return 1;
1518 if (ranks_in_channel != ranks)
1519 return 0;
1520 /* single slot */
1521 if (info->populated_ranks[channel][0][0] !=
1522 info->populated_ranks[channel][1][0])
1523 return 1;
1524 if (info->populated_ranks[channel][0][1] !=
1525 info->populated_ranks[channel][1][1])
1526 return 1;
1527 if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
1528 return 0;
1529 if (info->density[channel][0] != info->density[channel][1])
1530 return 0;
1531 return 1;
1534 static void read_4090(struct raminfo *info)
1536 int i, channel, slot, rank, lane;
1537 for (i = 0; i < 2; i++)
1538 for (slot = 0; slot < NUM_SLOTS; slot++)
1539 for (rank = 0; rank < NUM_RANKS; rank++)
1540 for (lane = 0; lane < 9; lane++)
1541 info->training.
1542 lane_timings[0][i][slot][rank][lane]
1543 = 32;
1545 for (i = 1; i < 4; i++)
1546 for (channel = 0; channel < NUM_CHANNELS; channel++)
1547 for (slot = 0; slot < NUM_SLOTS; slot++)
1548 for (rank = 0; rank < NUM_RANKS; rank++)
1549 for (lane = 0; lane < 9; lane++) {
1550 info->training.
1551 lane_timings[i][channel]
1552 [slot][rank][lane] =
1553 read_500(info, channel,
1554 get_timing_register_addr
1555 (lane, i, slot,
1556 rank), 9)
1557 + (i == 1) * 11; // !!!!
1562 static u32 get_etalon2(int flip, u32 addr)
1564 const u16 invmask[] = {
1565 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
1566 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
1568 u32 ret;
1569 u32 comp4 = addr / 480;
1570 addr %= 480;
1571 u32 comp1 = addr & 0xf;
1572 u32 comp2 = (addr >> 4) & 1;
1573 u32 comp3 = addr >> 5;
1575 if (comp4)
1576 ret = 0x1010101 << (comp4 - 1);
1577 else
1578 ret = 0;
1579 if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
1580 ret = ~ret;
1582 return ret;
1585 static void disable_cache_region(void)
1587 msr_t msr = {.lo = 0, .hi = 0 };
1589 wrmsr(MTRR_PHYS_BASE(3), msr);
1590 wrmsr(MTRR_PHYS_MASK(3), msr);
1593 static void enable_cache_region(unsigned int base, unsigned int size)
1595 msr_t msr;
1596 msr.lo = base | MTRR_TYPE_WRPROT;
1597 msr.hi = 0;
1598 wrmsr(MTRR_PHYS_BASE(3), msr);
1599 msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRR_DEF_TYPE_EN)
1600 & 0xffffffff);
1601 msr.hi = 0x0000000f;
1602 wrmsr(MTRR_PHYS_MASK(3), msr);
1605 static void flush_cache(u32 start, u32 size)
1607 u32 end;
1608 u32 addr;
1610 end = start + (ALIGN_DOWN(size + 4096, 4096));
1611 for (addr = start; addr < end; addr += 64)
1612 clflush((void *)(uintptr_t)addr);
1615 static void clear_errors(void)
1617 pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
1620 static void write_testing(struct raminfo *info, int totalrank, int flip)
1622 int nwrites = 0;
1623 /* in 8-byte units. */
1624 u32 offset;
1625 u8 *base;
1627 base = (u8 *)(uintptr_t)(totalrank << 28);
1628 for (offset = 0; offset < 9 * 480; offset += 2) {
1629 write32(base + offset * 8, get_etalon2(flip, offset));
1630 write32(base + offset * 8 + 4, get_etalon2(flip, offset));
1631 write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
1632 write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
1633 nwrites += 4;
1634 if (nwrites >= 320) {
1635 clear_errors();
1636 nwrites = 0;
1641 static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
1643 u8 failmask = 0;
1644 int i;
1645 int comp1, comp2, comp3;
1646 u32 failxor[2] = { 0, 0 };
1648 enable_cache_region((total_rank << 28), 1728 * 5 * 4);
1650 for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
1651 for (comp1 = 0; comp1 < 4; comp1++)
1652 for (comp2 = 0; comp2 < 60; comp2++) {
1653 u32 re[4];
1654 u32 curroffset =
1655 comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
1656 read128((total_rank << 28) | (curroffset << 3),
1657 (u64 *) re);
1658 failxor[0] |=
1659 get_etalon2(flip, curroffset) ^ re[0];
1660 failxor[1] |=
1661 get_etalon2(flip, curroffset) ^ re[1];
1662 failxor[0] |=
1663 get_etalon2(flip, curroffset | 1) ^ re[2];
1664 failxor[1] |=
1665 get_etalon2(flip, curroffset | 1) ^ re[3];
1667 for (i = 0; i < 8; i++)
1668 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1669 failmask |= 1 << i;
1671 disable_cache_region();
1672 flush_cache((total_rank << 28), 1728 * 5 * 4);
1673 return failmask;
1676 const u32 seed1[0x18] = {
1677 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
1678 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
1679 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
1680 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
1681 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
1682 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
1685 static u32 get_seed2(int a, int b)
1687 const u32 seed2[5] = {
1688 0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
1689 0x5b6db6db,
1691 u32 r;
1692 r = seed2[(a + (a >= 10)) / 5];
1693 return b ? ~r : r;
1696 static int make_shift(int comp2, int comp5, int x)
1698 const u8 seed3[32] = {
1699 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1700 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
1701 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
1702 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1705 return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
1708 static u32 get_etalon(int flip, u32 addr)
1710 u32 mask_byte = 0;
1711 int comp1 = (addr >> 1) & 1;
1712 int comp2 = (addr >> 3) & 0x1f;
1713 int comp3 = (addr >> 8) & 0xf;
1714 int comp4 = (addr >> 12) & 0xf;
1715 int comp5 = (addr >> 16) & 0x1f;
1716 u32 mask_bit = ~(0x10001 << comp3);
1717 u32 part1;
1718 u32 part2;
1719 int byte;
1721 part2 =
1722 ((seed1[comp5] >>
1723 make_shift(comp2, comp5,
1724 (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
1725 part1 =
1726 ((seed1[comp5] >>
1727 make_shift(comp2, comp5,
1728 (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
1730 for (byte = 0; byte < 4; byte++)
1731 if ((get_seed2(comp5, comp4) >>
1732 make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
1733 mask_byte |= 0xff << (8 * byte);
1735 return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
1736 (comp3 + 16));
1739 static void
1740 write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1741 char flip)
1743 int i;
1744 for (i = 0; i < 2048; i++)
1745 write32p((totalrank << 28) | (region << 25) | (block << 16) |
1746 (i << 2), get_etalon(flip, (block << 16) | (i << 2)));
1749 static u8
1750 check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1751 char flip)
1753 u8 failmask = 0;
1754 u32 failxor[2];
1755 int i;
1756 int comp1, comp2, comp3;
1758 failxor[0] = 0;
1759 failxor[1] = 0;
1761 enable_cache_region(totalrank << 28, 134217728);
1762 for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
1763 for (comp1 = 0; comp1 < 16; comp1++)
1764 for (comp2 = 0; comp2 < 64; comp2++) {
1765 u32 addr =
1766 (totalrank << 28) | (region << 25) | (block
1767 << 16)
1768 | (comp3 << 12) | (comp2 << 6) | (comp1 <<
1770 failxor[comp1 & 1] |=
1771 read32p(addr) ^ get_etalon(flip, addr);
1773 for (i = 0; i < 8; i++)
1774 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1775 failmask |= 1 << i;
1777 disable_cache_region();
1778 flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
1779 return failmask;
1782 static int check_bounded(unsigned short *vals, u16 bound)
1784 int i;
1786 for (i = 0; i < 8; i++)
1787 if (vals[i] < bound)
1788 return 0;
1789 return 1;
1792 enum state {
1793 BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
1796 static int validate_state(enum state *in)
1798 int i;
1799 for (i = 0; i < 8; i++)
1800 if (in[i] != COMPLETE)
1801 return 0;
1802 return 1;
1805 static void
1806 do_fsm(enum state *state, u16 *counter,
1807 u8 fail_mask, int margin, int uplimit,
1808 u8 *res_low, u8 *res_high, u8 val)
1810 int lane;
1812 for (lane = 0; lane < 8; lane++) {
1813 int is_fail = (fail_mask >> lane) & 1;
1814 switch (state[lane]) {
1815 case BEFORE_USABLE:
1816 if (!is_fail) {
1817 counter[lane] = 1;
1818 state[lane] = AT_USABLE;
1819 break;
1821 counter[lane] = 0;
1822 state[lane] = BEFORE_USABLE;
1823 break;
1824 case AT_USABLE:
1825 if (!is_fail) {
1826 ++counter[lane];
1827 if (counter[lane] >= margin) {
1828 state[lane] = AT_MARGIN;
1829 res_low[lane] = val - margin + 1;
1830 break;
1832 state[lane] = 1;
1833 break;
1835 counter[lane] = 0;
1836 state[lane] = BEFORE_USABLE;
1837 break;
1838 case AT_MARGIN:
1839 if (is_fail) {
1840 state[lane] = COMPLETE;
1841 res_high[lane] = val - 1;
1842 } else {
1843 counter[lane]++;
1844 state[lane] = AT_MARGIN;
1845 if (val == uplimit) {
1846 state[lane] = COMPLETE;
1847 res_high[lane] = uplimit;
1850 break;
1851 case COMPLETE:
1852 break;
1857 static void
1858 train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
1859 u8 total_rank, u8 reg_178, int first_run, int niter,
1860 timing_bounds_t * timings)
1862 int lane;
1863 enum state state[8];
1864 u16 count[8];
1865 u8 lower_usable[8];
1866 u8 upper_usable[8];
1867 unsigned short num_successfully_checked[8];
1868 u8 reg1b3;
1869 int i;
1871 for (i = 0; i < 8; i++)
1872 state[i] = BEFORE_USABLE;
1874 if (!first_run) {
1875 int is_all_ok = 1;
1876 for (lane = 0; lane < 8; lane++)
1877 if (timings[reg_178][channel][slot][rank][lane].
1878 smallest ==
1879 timings[reg_178][channel][slot][rank][lane].
1880 largest) {
1881 timings[reg_178][channel][slot][rank][lane].
1882 smallest = 0;
1883 timings[reg_178][channel][slot][rank][lane].
1884 largest = 0;
1885 is_all_ok = 0;
1887 if (is_all_ok) {
1888 for (i = 0; i < 8; i++)
1889 state[i] = COMPLETE;
1893 for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
1894 u8 failmask = 0;
1895 write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
1896 write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
1897 failmask = check_testing(info, total_rank, 0);
1898 mchbar_setbits32(0xfb0, 3 << 16);
1899 do_fsm(state, count, failmask, 5, 47, lower_usable,
1900 upper_usable, reg1b3);
1903 if (reg1b3) {
1904 write_1d0(0, 0x1b3, 6, 1);
1905 write_1d0(0, 0x1a3, 6, 1);
1906 for (lane = 0; lane < 8; lane++) {
1907 if (state[lane] == COMPLETE) {
1908 timings[reg_178][channel][slot][rank][lane].
1909 smallest =
1910 lower_usable[lane] +
1911 (info->training.
1912 lane_timings[0][channel][slot][rank][lane]
1913 & 0x3F) - 32;
1914 timings[reg_178][channel][slot][rank][lane].
1915 largest =
1916 upper_usable[lane] +
1917 (info->training.
1918 lane_timings[0][channel][slot][rank][lane]
1919 & 0x3F) - 32;
1924 if (!first_run) {
1925 for (lane = 0; lane < 8; lane++)
1926 if (state[lane] == COMPLETE) {
1927 write_500(info, channel,
1928 timings[reg_178][channel][slot][rank]
1929 [lane].smallest,
1930 get_timing_register_addr(lane, 0,
1931 slot, rank),
1932 9, 1);
1933 write_500(info, channel,
1934 timings[reg_178][channel][slot][rank]
1935 [lane].smallest +
1936 info->training.
1937 lane_timings[1][channel][slot][rank]
1938 [lane]
1940 info->training.
1941 lane_timings[0][channel][slot][rank]
1942 [lane], get_timing_register_addr(lane,
1944 slot,
1945 rank),
1946 9, 1);
1947 num_successfully_checked[lane] = 0;
1948 } else
1949 num_successfully_checked[lane] = -1;
1951 do {
1952 u8 failmask = 0;
1953 for (i = 0; i < niter; i++) {
1954 if (failmask == 0xFF)
1955 break;
1956 failmask |=
1957 check_testing_type2(info, total_rank, 2, i,
1959 failmask |=
1960 check_testing_type2(info, total_rank, 3, i,
1963 mchbar_setbits32(0xfb0, 3 << 16);
1964 for (lane = 0; lane < 8; lane++)
1965 if (num_successfully_checked[lane] != 0xffff) {
1966 if ((1 << lane) & failmask) {
1967 if (timings[reg_178][channel]
1968 [slot][rank][lane].
1969 largest <=
1970 timings[reg_178][channel]
1971 [slot][rank][lane].smallest)
1972 num_successfully_checked
1973 [lane] = -1;
1974 else {
1975 num_successfully_checked
1976 [lane] = 0;
1977 timings[reg_178]
1978 [channel][slot]
1979 [rank][lane].
1980 smallest++;
1981 write_500(info, channel,
1982 timings
1983 [reg_178]
1984 [channel]
1985 [slot][rank]
1986 [lane].
1987 smallest,
1988 get_timing_register_addr
1989 (lane, 0,
1990 slot, rank),
1991 9, 1);
1992 write_500(info, channel,
1993 timings
1994 [reg_178]
1995 [channel]
1996 [slot][rank]
1997 [lane].
1998 smallest +
1999 info->
2000 training.
2001 lane_timings
2002 [1][channel]
2003 [slot][rank]
2004 [lane]
2006 info->
2007 training.
2008 lane_timings
2009 [0][channel]
2010 [slot][rank]
2011 [lane],
2012 get_timing_register_addr
2013 (lane, 1,
2014 slot, rank),
2015 9, 1);
2017 } else
2018 num_successfully_checked[lane]
2022 while (!check_bounded(num_successfully_checked, 2))
2025 for (lane = 0; lane < 8; lane++)
2026 if (state[lane] == COMPLETE) {
2027 write_500(info, channel,
2028 timings[reg_178][channel][slot][rank]
2029 [lane].largest,
2030 get_timing_register_addr(lane, 0,
2031 slot, rank),
2032 9, 1);
2033 write_500(info, channel,
2034 timings[reg_178][channel][slot][rank]
2035 [lane].largest +
2036 info->training.
2037 lane_timings[1][channel][slot][rank]
2038 [lane]
2040 info->training.
2041 lane_timings[0][channel][slot][rank]
2042 [lane], get_timing_register_addr(lane,
2044 slot,
2045 rank),
2046 9, 1);
2047 num_successfully_checked[lane] = 0;
2048 } else
2049 num_successfully_checked[lane] = -1;
2051 do {
2052 int failmask = 0;
2053 for (i = 0; i < niter; i++) {
2054 if (failmask == 0xFF)
2055 break;
2056 failmask |=
2057 check_testing_type2(info, total_rank, 2, i,
2059 failmask |=
2060 check_testing_type2(info, total_rank, 3, i,
2064 mchbar_setbits32(0xfb0, 3 << 16);
2065 for (lane = 0; lane < 8; lane++) {
2066 if (num_successfully_checked[lane] != 0xffff) {
2067 if ((1 << lane) & failmask) {
2068 if (timings[reg_178][channel]
2069 [slot][rank][lane].
2070 largest <=
2071 timings[reg_178][channel]
2072 [slot][rank][lane].
2073 smallest) {
2074 num_successfully_checked
2075 [lane] = -1;
2076 } else {
2077 num_successfully_checked
2078 [lane] = 0;
2079 timings[reg_178]
2080 [channel][slot]
2081 [rank][lane].
2082 largest--;
2083 write_500(info, channel,
2084 timings
2085 [reg_178]
2086 [channel]
2087 [slot][rank]
2088 [lane].
2089 largest,
2090 get_timing_register_addr
2091 (lane, 0,
2092 slot, rank),
2093 9, 1);
2094 write_500(info, channel,
2095 timings
2096 [reg_178]
2097 [channel]
2098 [slot][rank]
2099 [lane].
2100 largest +
2101 info->
2102 training.
2103 lane_timings
2104 [1][channel]
2105 [slot][rank]
2106 [lane]
2108 info->
2109 training.
2110 lane_timings
2111 [0][channel]
2112 [slot][rank]
2113 [lane],
2114 get_timing_register_addr
2115 (lane, 1,
2116 slot, rank),
2117 9, 1);
2119 } else
2120 num_successfully_checked[lane]
2125 while (!check_bounded(num_successfully_checked, 3))
2128 for (lane = 0; lane < 8; lane++) {
2129 write_500(info, channel,
2130 info->training.
2131 lane_timings[0][channel][slot][rank][lane],
2132 get_timing_register_addr(lane, 0, slot, rank),
2133 9, 1);
2134 write_500(info, channel,
2135 info->training.
2136 lane_timings[1][channel][slot][rank][lane],
2137 get_timing_register_addr(lane, 1, slot, rank),
2138 9, 1);
2139 if (timings[reg_178][channel][slot][rank][lane].
2140 largest <=
2141 timings[reg_178][channel][slot][rank][lane].
2142 smallest) {
2143 timings[reg_178][channel][slot][rank][lane].
2144 largest = 0;
2145 timings[reg_178][channel][slot][rank][lane].
2146 smallest = 0;
2152 static void set_10b(struct raminfo *info, u8 val)
2154 int channel;
2155 int slot, rank;
2156 int lane;
2158 if (read_1d0(0x10b, 6) == val)
2159 return;
2161 write_1d0(val, 0x10b, 6, 1);
2163 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) {
2164 u16 reg_500;
2165 reg_500 = read_500(info, channel,
2166 get_timing_register_addr(lane, 0, slot,
2167 rank), 9);
2168 if (val == 1) {
2169 if (lut16[info->clock_speed_index] <= reg_500)
2170 reg_500 -= lut16[info->clock_speed_index];
2171 else
2172 reg_500 = 0;
2173 } else {
2174 reg_500 += lut16[info->clock_speed_index];
2176 write_500(info, channel, reg_500,
2177 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2181 static void set_ecc(int onoff)
2183 int channel;
2184 for (channel = 0; channel < NUM_CHANNELS; channel++) {
2185 u8 t;
2186 t = mchbar_read8((channel << 10) + 0x5f8);
2187 if (onoff)
2188 t |= 1;
2189 else
2190 t &= ~1;
2191 mchbar_write8((channel << 10) + 0x5f8, t);
2195 static void set_178(u8 val)
2197 if (val >= 31)
2198 val = val - 31;
2199 else
2200 val = 63 - val;
2202 write_1d0(2 * val, 0x178, 7, 1);
2205 static void
2206 write_500_timings_type(struct raminfo *info, int channel, int slot, int rank,
2207 int type)
2209 int lane;
2211 for (lane = 0; lane < 8; lane++)
2212 write_500(info, channel,
2213 info->training.
2214 lane_timings[type][channel][slot][rank][lane],
2215 get_timing_register_addr(lane, type, slot, rank), 9,
2219 static void
2220 try_timing_offsets(struct raminfo *info, int channel,
2221 int slot, int rank, int totalrank)
2223 u16 count[8];
2224 enum state state[8];
2225 u8 lower_usable[8], upper_usable[8];
2226 int lane;
2227 int i;
2228 int flip = 1;
2229 int timing_offset;
2231 for (i = 0; i < 8; i++)
2232 state[i] = BEFORE_USABLE;
2234 memset(count, 0, sizeof(count));
2236 for (lane = 0; lane < 8; lane++)
2237 write_500(info, channel,
2238 info->training.
2239 lane_timings[2][channel][slot][rank][lane] + 32,
2240 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2242 for (timing_offset = 0; !validate_state(state) && timing_offset < 64;
2243 timing_offset++) {
2244 u8 failmask;
2245 write_1d0(timing_offset ^ 32, 0x1bb, 6, 1);
2246 failmask = 0;
2247 for (i = 0; i < 2 && failmask != 0xff; i++) {
2248 flip = !flip;
2249 write_testing(info, totalrank, flip);
2250 failmask |= check_testing(info, totalrank, flip);
2252 do_fsm(state, count, failmask, 10, 63, lower_usable,
2253 upper_usable, timing_offset);
2255 write_1d0(0, 0x1bb, 6, 1);
2256 dump_timings(info);
2257 if (!validate_state(state))
2258 die("Couldn't discover DRAM timings (1)\n");
2260 for (lane = 0; lane < 8; lane++) {
2261 u8 bias = 0;
2263 if (info->silicon_revision) {
2264 int usable_length;
2266 usable_length = upper_usable[lane] - lower_usable[lane];
2267 if (usable_length >= 20) {
2268 bias = usable_length / 2 - 10;
2269 if (bias >= 2)
2270 bias = 2;
2273 write_500(info, channel,
2274 info->training.
2275 lane_timings[2][channel][slot][rank][lane] +
2276 (upper_usable[lane] + lower_usable[lane]) / 2 - bias,
2277 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2278 info->training.timing2_bounds[channel][slot][rank][lane][0] =
2279 info->training.lane_timings[2][channel][slot][rank][lane] +
2280 lower_usable[lane];
2281 info->training.timing2_bounds[channel][slot][rank][lane][1] =
2282 info->training.lane_timings[2][channel][slot][rank][lane] +
2283 upper_usable[lane];
2284 info->training.timing2_offset[channel][slot][rank][lane] =
2285 info->training.lane_timings[2][channel][slot][rank][lane];
2289 static u8
2290 choose_training(struct raminfo *info, int channel, int slot, int rank,
2291 int lane, timing_bounds_t * timings, u8 center_178)
2293 u16 central_weight;
2294 u16 side_weight;
2295 unsigned int sum = 0, count = 0;
2296 u8 span;
2297 u8 lower_margin, upper_margin;
2298 u8 reg_178;
2299 u8 result;
2301 span = 12;
2302 central_weight = 20;
2303 side_weight = 20;
2304 if (info->silicon_revision == 1 && channel == 1) {
2305 central_weight = 5;
2306 side_weight = 20;
2307 if ((info->
2308 populated_ranks_mask[1] ^ (info->
2309 populated_ranks_mask[1] >> 2)) &
2311 span = 18;
2313 if ((info->populated_ranks_mask[0] & 5) == 5) {
2314 central_weight = 20;
2315 side_weight = 20;
2317 if (info->clock_speed_index >= 2
2318 && (info->populated_ranks_mask[0] & 5) == 5 && slot == 1) {
2319 if (info->silicon_revision == 1) {
2320 switch (channel) {
2321 case 0:
2322 if (lane == 1) {
2323 central_weight = 10;
2324 side_weight = 20;
2326 break;
2327 case 1:
2328 if (lane == 6) {
2329 side_weight = 5;
2330 central_weight = 20;
2332 break;
2335 if (info->silicon_revision == 0 && channel == 0 && lane == 0) {
2336 side_weight = 5;
2337 central_weight = 20;
2340 for (reg_178 = center_178 - span; reg_178 <= center_178 + span;
2341 reg_178 += span) {
2342 u8 smallest;
2343 u8 largest;
2344 largest = timings[reg_178][channel][slot][rank][lane].largest;
2345 smallest = timings[reg_178][channel][slot][rank][lane].smallest;
2346 if (largest - smallest + 1 >= 5) {
2347 unsigned int weight;
2348 if (reg_178 == center_178)
2349 weight = central_weight;
2350 else
2351 weight = side_weight;
2352 sum += weight * (largest + smallest);
2353 count += weight;
2356 dump_timings(info);
2357 if (count == 0)
2358 die("Couldn't discover DRAM timings (2)\n");
2359 result = sum / (2 * count);
2360 lower_margin =
2361 result - timings[center_178][channel][slot][rank][lane].smallest;
2362 upper_margin =
2363 timings[center_178][channel][slot][rank][lane].largest - result;
2364 if (upper_margin < 10 && lower_margin > 10)
2365 result -= MIN(lower_margin - 10, 10 - upper_margin);
2366 if (upper_margin > 10 && lower_margin < 10)
2367 result += MIN(upper_margin - 10, 10 - lower_margin);
2368 return result;
2371 #define STANDARD_MIN_MARGIN 5
2373 static u8 choose_reg178(struct raminfo *info, timing_bounds_t * timings)
2375 u16 margin[64];
2376 int lane, rank, slot, channel;
2377 u8 reg178;
2378 int count = 0, sum = 0;
2380 for (reg178 = reg178_min[info->clock_speed_index];
2381 reg178 < reg178_max[info->clock_speed_index];
2382 reg178 += reg178_step[info->clock_speed_index]) {
2383 margin[reg178] = -1;
2384 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2385 int curmargin =
2386 timings[reg178][channel][slot][rank][lane].largest -
2387 timings[reg178][channel][slot][rank][lane].
2388 smallest + 1;
2389 if (curmargin < margin[reg178])
2390 margin[reg178] = curmargin;
2392 if (margin[reg178] >= STANDARD_MIN_MARGIN) {
2393 u16 weight;
2394 weight = margin[reg178] - STANDARD_MIN_MARGIN;
2395 sum += weight * reg178;
2396 count += weight;
2399 dump_timings(info);
2400 if (count == 0)
2401 die("Couldn't discover DRAM timings (3)\n");
2403 u8 threshold;
2405 for (threshold = 30; threshold >= 5; threshold--) {
2406 int usable_length = 0;
2407 int smallest_fount = 0;
2408 for (reg178 = reg178_min[info->clock_speed_index];
2409 reg178 < reg178_max[info->clock_speed_index];
2410 reg178 += reg178_step[info->clock_speed_index])
2411 if (margin[reg178] >= threshold) {
2412 usable_length +=
2413 reg178_step[info->clock_speed_index];
2414 info->training.reg178_largest =
2415 reg178 -
2416 2 * reg178_step[info->clock_speed_index];
2418 if (!smallest_fount) {
2419 smallest_fount = 1;
2420 info->training.reg178_smallest =
2421 reg178 +
2422 reg178_step[info->
2423 clock_speed_index];
2426 if (usable_length >= 0x21)
2427 break;
2430 return sum / count;
2433 static int check_cached_sanity(struct raminfo *info)
2435 int lane;
2436 int slot, rank;
2437 int channel;
2439 if (!info->cached_training)
2440 return 0;
2442 for (channel = 0; channel < NUM_CHANNELS; channel++)
2443 for (slot = 0; slot < NUM_SLOTS; slot++)
2444 for (rank = 0; rank < NUM_RANKS; rank++)
2445 for (lane = 0; lane < 8 + info->use_ecc; lane++) {
2446 u16 cached_value, estimation_value;
2447 cached_value =
2448 info->cached_training->
2449 lane_timings[1][channel][slot][rank]
2450 [lane];
2451 if (cached_value >= 0x18
2452 && cached_value <= 0x1E7) {
2453 estimation_value =
2454 info->training.
2455 lane_timings[1][channel]
2456 [slot][rank][lane];
2457 if (estimation_value <
2458 cached_value - 24)
2459 return 0;
2460 if (estimation_value >
2461 cached_value + 24)
2462 return 0;
2465 return 1;
2468 static int try_cached_training(struct raminfo *info)
2470 u8 saved_243[2];
2471 u8 tm;
2473 int channel, slot, rank, lane;
2474 int flip = 1;
2475 int i, j;
2477 if (!check_cached_sanity(info))
2478 return 0;
2480 info->training.reg178_center = info->cached_training->reg178_center;
2481 info->training.reg178_smallest = info->cached_training->reg178_smallest;
2482 info->training.reg178_largest = info->cached_training->reg178_largest;
2483 memcpy(&info->training.timing_bounds,
2484 &info->cached_training->timing_bounds,
2485 sizeof(info->training.timing_bounds));
2486 memcpy(&info->training.timing_offset,
2487 &info->cached_training->timing_offset,
2488 sizeof(info->training.timing_offset));
2490 write_1d0(2, 0x142, 3, 1);
2491 saved_243[0] = mchbar_read8(0x243);
2492 saved_243[1] = mchbar_read8(0x643);
2493 mchbar_write8(0x243, saved_243[0] | 2);
2494 mchbar_write8(0x643, saved_243[1] | 2);
2495 set_ecc(0);
2496 pci_write_config16(NORTHBRIDGE, 0xc8, 3);
2497 if (read_1d0(0x10b, 6) & 1)
2498 set_10b(info, 0);
2499 for (tm = 0; tm < 2; tm++) {
2500 int totalrank;
2502 set_178(tm ? info->cached_training->reg178_largest : info->
2503 cached_training->reg178_smallest);
2505 totalrank = 0;
2506 /* Check timing ranges. With i == 0 we check smallest one and with
2507 i == 1 the largest bound. With j == 0 we check that on the bound
2508 it still works whereas with j == 1 we check that just outside of
2509 bound we fail.
2511 FOR_POPULATED_RANKS_BACKWARDS {
2512 for (i = 0; i < 2; i++) {
2513 for (lane = 0; lane < 8; lane++) {
2514 write_500(info, channel,
2515 info->cached_training->
2516 timing2_bounds[channel][slot]
2517 [rank][lane][i],
2518 get_timing_register_addr(lane,
2520 slot,
2521 rank),
2522 9, 1);
2524 if (!i)
2525 write_500(info, channel,
2526 info->
2527 cached_training->
2528 timing2_offset
2529 [channel][slot][rank]
2530 [lane],
2531 get_timing_register_addr
2532 (lane, 2, slot, rank),
2533 9, 1);
2534 write_500(info, channel,
2535 i ? info->cached_training->
2536 timing_bounds[tm][channel]
2537 [slot][rank][lane].
2538 largest : info->
2539 cached_training->
2540 timing_bounds[tm][channel]
2541 [slot][rank][lane].smallest,
2542 get_timing_register_addr(lane,
2544 slot,
2545 rank),
2546 9, 1);
2547 write_500(info, channel,
2548 info->cached_training->
2549 timing_offset[channel][slot]
2550 [rank][lane] +
2551 (i ? info->cached_training->
2552 timing_bounds[tm][channel]
2553 [slot][rank][lane].
2554 largest : info->
2555 cached_training->
2556 timing_bounds[tm][channel]
2557 [slot][rank][lane].
2558 smallest) - 64,
2559 get_timing_register_addr(lane,
2561 slot,
2562 rank),
2563 9, 1);
2565 for (j = 0; j < 2; j++) {
2566 u8 failmask;
2567 u8 expected_failmask;
2568 char reg1b3;
2570 reg1b3 = (j == 1) + 4;
2571 reg1b3 =
2572 j == i ? reg1b3 : (-reg1b3) & 0x3f;
2573 write_1d0(reg1b3, 0x1bb, 6, 1);
2574 write_1d0(reg1b3, 0x1b3, 6, 1);
2575 write_1d0(reg1b3, 0x1a3, 6, 1);
2577 flip = !flip;
2578 write_testing(info, totalrank, flip);
2579 failmask =
2580 check_testing(info, totalrank,
2581 flip);
2582 expected_failmask =
2583 j == 0 ? 0x00 : 0xff;
2584 if (failmask != expected_failmask)
2585 goto fail;
2588 totalrank++;
2592 set_178(info->cached_training->reg178_center);
2593 if (info->use_ecc)
2594 set_ecc(1);
2595 write_training_data(info);
2596 write_1d0(0, 322, 3, 1);
2597 info->training = *info->cached_training;
2599 write_1d0(0, 0x1bb, 6, 1);
2600 write_1d0(0, 0x1b3, 6, 1);
2601 write_1d0(0, 0x1a3, 6, 1);
2602 mchbar_write8(0x243, saved_243[0]);
2603 mchbar_write8(0x643, saved_243[1]);
2605 return 1;
2607 fail:
2608 FOR_POPULATED_RANKS {
2609 write_500_timings_type(info, channel, slot, rank, 1);
2610 write_500_timings_type(info, channel, slot, rank, 2);
2611 write_500_timings_type(info, channel, slot, rank, 3);
2614 write_1d0(0, 0x1bb, 6, 1);
2615 write_1d0(0, 0x1b3, 6, 1);
2616 write_1d0(0, 0x1a3, 6, 1);
2617 mchbar_write8(0x243, saved_243[0]);
2618 mchbar_write8(0x643, saved_243[1]);
2620 return 0;
2623 static void do_ram_training(struct raminfo *info)
2625 u8 saved_243[2];
2626 int totalrank = 0;
2627 u8 reg_178;
2628 int niter;
2630 timing_bounds_t *timings = timings_car;
2631 int lane, rank, slot, channel;
2632 u8 reg178_center;
2634 write_1d0(2, 0x142, 3, 1);
2635 saved_243[0] = mchbar_read8(0x243);
2636 saved_243[1] = mchbar_read8(0x643);
2637 mchbar_write8(0x243, saved_243[0] | 2);
2638 mchbar_write8(0x643, saved_243[1] | 2);
2639 switch (info->clock_speed_index) {
2640 case 0:
2641 niter = 5;
2642 break;
2643 case 1:
2644 niter = 10;
2645 break;
2646 default:
2647 niter = 19;
2648 break;
2650 set_ecc(0);
2652 FOR_POPULATED_RANKS_BACKWARDS {
2653 int i;
2655 write_500_timings_type(info, channel, slot, rank, 0);
2657 write_testing(info, totalrank, 0);
2658 for (i = 0; i < niter; i++) {
2659 write_testing_type2(info, totalrank, 2, i, 0);
2660 write_testing_type2(info, totalrank, 3, i, 1);
2662 pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
2663 totalrank++;
2666 if (reg178_min[info->clock_speed_index] <
2667 reg178_max[info->clock_speed_index])
2668 memset(timings[reg178_min[info->clock_speed_index]], 0,
2669 sizeof(timings[0]) *
2670 (reg178_max[info->clock_speed_index] -
2671 reg178_min[info->clock_speed_index]));
2672 for (reg_178 = reg178_min[info->clock_speed_index];
2673 reg_178 < reg178_max[info->clock_speed_index];
2674 reg_178 += reg178_step[info->clock_speed_index]) {
2675 totalrank = 0;
2676 set_178(reg_178);
2677 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--)
2678 for (slot = 0; slot < NUM_SLOTS; slot++)
2679 for (rank = 0; rank < NUM_RANKS; rank++) {
2680 memset(&timings[reg_178][channel][slot]
2681 [rank][0].smallest, 0, 16);
2682 if (info->
2683 populated_ranks[channel][slot]
2684 [rank]) {
2685 train_ram_at_178(info, channel,
2686 slot, rank,
2687 totalrank,
2688 reg_178, 1,
2689 niter,
2690 timings);
2691 totalrank++;
2696 reg178_center = choose_reg178(info, timings);
2698 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2699 info->training.timing_bounds[0][channel][slot][rank][lane].
2700 smallest =
2701 timings[info->training.
2702 reg178_smallest][channel][slot][rank][lane].
2703 smallest;
2704 info->training.timing_bounds[0][channel][slot][rank][lane].
2705 largest =
2706 timings[info->training.
2707 reg178_smallest][channel][slot][rank][lane].largest;
2708 info->training.timing_bounds[1][channel][slot][rank][lane].
2709 smallest =
2710 timings[info->training.
2711 reg178_largest][channel][slot][rank][lane].smallest;
2712 info->training.timing_bounds[1][channel][slot][rank][lane].
2713 largest =
2714 timings[info->training.
2715 reg178_largest][channel][slot][rank][lane].largest;
2716 info->training.timing_offset[channel][slot][rank][lane] =
2717 info->training.lane_timings[1][channel][slot][rank][lane]
2719 info->training.lane_timings[0][channel][slot][rank][lane] +
2723 if (info->silicon_revision == 1
2724 && (info->
2725 populated_ranks_mask[1] ^ (info->
2726 populated_ranks_mask[1] >> 2)) & 1) {
2727 int ranks_after_channel1;
2729 totalrank = 0;
2730 for (reg_178 = reg178_center - 18;
2731 reg_178 <= reg178_center + 18; reg_178 += 18) {
2732 totalrank = 0;
2733 set_178(reg_178);
2734 for (slot = 0; slot < NUM_SLOTS; slot++)
2735 for (rank = 0; rank < NUM_RANKS; rank++) {
2736 if (info->
2737 populated_ranks[1][slot][rank]) {
2738 train_ram_at_178(info, 1, slot,
2739 rank,
2740 totalrank,
2741 reg_178, 0,
2742 niter,
2743 timings);
2744 totalrank++;
2748 ranks_after_channel1 = totalrank;
2750 for (reg_178 = reg178_center - 12;
2751 reg_178 <= reg178_center + 12; reg_178 += 12) {
2752 totalrank = ranks_after_channel1;
2753 set_178(reg_178);
2754 for (slot = 0; slot < NUM_SLOTS; slot++)
2755 for (rank = 0; rank < NUM_RANKS; rank++)
2756 if (info->
2757 populated_ranks[0][slot][rank]) {
2758 train_ram_at_178(info, 0, slot,
2759 rank,
2760 totalrank,
2761 reg_178, 0,
2762 niter,
2763 timings);
2764 totalrank++;
2768 } else {
2769 for (reg_178 = reg178_center - 12;
2770 reg_178 <= reg178_center + 12; reg_178 += 12) {
2771 totalrank = 0;
2772 set_178(reg_178);
2773 FOR_POPULATED_RANKS_BACKWARDS {
2774 train_ram_at_178(info, channel, slot, rank,
2775 totalrank, reg_178, 0, niter,
2776 timings);
2777 totalrank++;
2782 set_178(reg178_center);
2783 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2784 u16 tm0;
2786 tm0 =
2787 choose_training(info, channel, slot, rank, lane, timings,
2788 reg178_center);
2789 write_500(info, channel, tm0,
2790 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2791 write_500(info, channel,
2792 tm0 +
2793 info->training.
2794 lane_timings[1][channel][slot][rank][lane] -
2795 info->training.
2796 lane_timings[0][channel][slot][rank][lane],
2797 get_timing_register_addr(lane, 1, slot, rank), 9, 1);
2800 totalrank = 0;
2801 FOR_POPULATED_RANKS_BACKWARDS {
2802 try_timing_offsets(info, channel, slot, rank, totalrank);
2803 totalrank++;
2805 mchbar_write8(0x243, saved_243[0]);
2806 mchbar_write8(0x643, saved_243[1]);
2807 write_1d0(0, 0x142, 3, 1);
2808 info->training.reg178_center = reg178_center;
2811 static void ram_training(struct raminfo *info)
2813 u16 saved_fc4;
2815 saved_fc4 = mchbar_read16(0xfc4);
2816 mchbar_write16(0xfc4, 0xffff);
2818 if (info->revision >= 8)
2819 read_4090(info);
2821 if (!try_cached_training(info))
2822 do_ram_training(info);
2823 if ((info->silicon_revision == 2 || info->silicon_revision == 3)
2824 && info->clock_speed_index < 2)
2825 set_10b(info, 1);
2826 mchbar_write16(0xfc4, saved_fc4);
2829 u16 get_max_timing(struct raminfo *info, int channel)
2831 int slot, rank, lane;
2832 u16 ret = 0;
2834 if ((mchbar_read8(0x2ca8) >> 2) < 1)
2835 return 384;
2837 if (info->revision < 8)
2838 return 256;
2840 for (slot = 0; slot < NUM_SLOTS; slot++)
2841 for (rank = 0; rank < NUM_RANKS; rank++)
2842 if (info->populated_ranks[channel][slot][rank])
2843 for (lane = 0; lane < 8 + info->use_ecc; lane++)
2844 ret = MAX(ret, read_500(info, channel,
2845 get_timing_register_addr
2846 (lane, 0, slot,
2847 rank), 9));
2848 return ret;
2851 static void dmi_setup(void)
2853 gav(dmibar_read8(0x254));
2854 dmibar_write8(0x254, 1 << 0);
2855 dmibar_write16(0x1b8, 0x18f2);
2856 mchbar_clrsetbits16(0x48, ~0, 1 << 1);
2858 dmibar_setbits32(0xd68, 1 << 27);
2860 outl((gav(inl(DEFAULT_GPIOBASE | 0x38)) & ~0x140000) | 0x400000,
2861 DEFAULT_GPIOBASE | 0x38);
2862 gav(inb(DEFAULT_GPIOBASE | 0xe)); // = 0xfdcaff6e
2865 void chipset_init(const int s3resume)
2867 u8 x2ca8;
2868 u16 ggc;
2869 u8 gfxsize;
2871 x2ca8 = mchbar_read8(0x2ca8);
2872 if ((x2ca8 & 1) || (x2ca8 == 8 && !s3resume)) {
2873 printk(BIOS_DEBUG, "soft reset detected, rebooting properly\n");
2874 mchbar_write8(0x2ca8, 0);
2875 system_reset();
2878 dmi_setup();
2880 mchbar_write16(0x1170, 0xa880);
2881 mchbar_write8(0x11c1, 1 << 0);
2882 mchbar_write16(0x1170, 0xb880);
2883 mchbar_clrsetbits8(0x1210, ~0, 0x84);
2885 gfxsize = get_uint_option("gfx_uma_size", 0); /* 0 for 32MB */
2887 ggc = 0xb00 | ((gfxsize + 5) << 4);
2889 pci_write_config16(NORTHBRIDGE, GGC, ggc | 2);
2891 u16 deven;
2892 deven = pci_read_config16(NORTHBRIDGE, DEVEN); // = 0x3
2894 if (deven & 8) {
2895 mchbar_write8(0x2c30, 1 << 5);
2896 pci_read_config8(NORTHBRIDGE, 0x8); // = 0x18
2897 mchbar_setbits16(0x2c30, 1 << 9);
2898 mchbar_write16(0x2c32, 0x434);
2899 mchbar_clrsetbits32(0x2c44, ~0, 0x1053687);
2900 pci_read_config8(GMA, MSAC); // = 0x2
2901 pci_write_config8(GMA, MSAC, 0x2);
2902 RCBA8(0x2318);
2903 RCBA8(0x2318) = 0x47;
2904 RCBA8(0x2320);
2905 RCBA8(0x2320) = 0xfc;
2908 mchbar_clrsetbits32(0x30, ~0, 0x40);
2910 pci_write_config16(NORTHBRIDGE, GGC, ggc);
2911 gav(RCBA32(0x3428));
2912 RCBA32(0x3428) = 0x1d;
2915 static u8 get_bits_420(const u32 reg32)
2917 u8 val = 0;
2918 val |= (reg32 >> 4) & (1 << 0);
2919 val |= (reg32 >> 2) & (1 << 1);
2920 val |= (reg32 >> 0) & (1 << 2);
2921 return val;
2924 void raminit(const int s3resume, const u8 *spd_addrmap)
2926 unsigned int channel, slot, lane, rank;
2927 struct raminfo info;
2928 u8 x2ca8;
2929 int cbmem_wasnot_inited;
2931 x2ca8 = mchbar_read8(0x2ca8);
2933 printk(RAM_DEBUG, "Scratchpad MCHBAR8(0x2ca8): 0x%04x\n", x2ca8);
2935 memset(&info, 0x5a, sizeof(info));
2937 info.last_500_command[0] = 0;
2938 info.last_500_command[1] = 0;
2940 info.board_lane_delay[0] = 0x14;
2941 info.board_lane_delay[1] = 0x07;
2942 info.board_lane_delay[2] = 0x07;
2943 info.board_lane_delay[3] = 0x08;
2944 info.board_lane_delay[4] = 0x56;
2945 info.board_lane_delay[5] = 0x04;
2946 info.board_lane_delay[6] = 0x04;
2947 info.board_lane_delay[7] = 0x05;
2948 info.board_lane_delay[8] = 0x10;
2950 info.training.reg_178 = 0;
2951 info.training.reg_10b = 0;
2953 /* Wait for some bit, maybe TXT clear. */
2954 while (!(read8((u8 *)0xfed40000) & (1 << 7)))
2957 /* Wait for ME to be ready */
2958 intel_early_me_init();
2959 info.memory_reserved_for_heci_mb = intel_early_me_uma_size();
2961 /* before SPD */
2962 timestamp_add_now(101);
2964 if (!s3resume || 1) { // possible error
2965 memset(&info.populated_ranks, 0, sizeof(info.populated_ranks));
2967 info.use_ecc = 1;
2968 for (channel = 0; channel < NUM_CHANNELS; channel++)
2969 for (slot = 0; slot < NUM_SLOTS; slot++) {
2970 int v;
2971 int try;
2972 int addr;
2973 const u8 useful_addresses[] = {
2974 DEVICE_TYPE,
2975 MODULE_TYPE,
2976 DENSITY,
2977 RANKS_AND_DQ,
2978 MEMORY_BUS_WIDTH,
2979 TIMEBASE_DIVIDEND,
2980 TIMEBASE_DIVISOR,
2981 CYCLETIME,
2982 CAS_LATENCIES_LSB,
2983 CAS_LATENCIES_MSB,
2984 CAS_LATENCY_TIME,
2985 0x11, 0x12, 0x13, 0x14, 0x15,
2986 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b,
2987 0x1c, 0x1d,
2988 THERMAL_AND_REFRESH,
2989 0x20,
2990 REFERENCE_RAW_CARD_USED,
2991 RANK1_ADDRESS_MAPPING,
2992 0x75, 0x76, 0x77, 0x78,
2993 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
2994 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84,
2995 0x85, 0x86, 0x87, 0x88,
2996 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
2997 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
2998 0x95
3000 if (!spd_addrmap[2 * channel + slot])
3001 continue;
3002 for (try = 0; try < 5; try++) {
3003 v = smbus_read_byte(spd_addrmap[2 * channel + slot],
3004 DEVICE_TYPE);
3005 if (v >= 0)
3006 break;
3008 if (v < 0)
3009 continue;
3010 for (addr = 0;
3011 addr <
3012 ARRAY_SIZE(useful_addresses); addr++)
3013 gav(info.
3014 spd[channel][0][useful_addresses
3015 [addr]] =
3016 smbus_read_byte(spd_addrmap[2 * channel + slot],
3017 useful_addresses
3018 [addr]));
3019 if (info.spd[channel][0][DEVICE_TYPE] != 11)
3020 die("Only DDR3 is supported");
3022 v = info.spd[channel][0][RANKS_AND_DQ];
3023 info.populated_ranks[channel][0][0] = 1;
3024 info.populated_ranks[channel][0][1] =
3025 ((v >> 3) & 7);
3026 if (((v >> 3) & 7) > 1)
3027 die("At most 2 ranks are supported");
3028 if ((v & 7) == 0 || (v & 7) > 2)
3029 die("Only x8 and x16 modules are supported");
3030 if ((info.
3031 spd[channel][slot][MODULE_TYPE] & 0xF) != 2
3032 && (info.
3033 spd[channel][slot][MODULE_TYPE] & 0xF)
3034 != 3)
3035 die("Registered memory is not supported");
3036 info.is_x16_module[channel][0] = (v & 7) - 1;
3037 info.density[channel][slot] =
3038 info.spd[channel][slot][DENSITY] & 0xF;
3039 if (!
3040 (info.
3041 spd[channel][slot][MEMORY_BUS_WIDTH] &
3042 0x18))
3043 info.use_ecc = 0;
3046 gav(0x55);
3048 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3049 int v = 0;
3050 for (slot = 0; slot < NUM_SLOTS; slot++)
3051 for (rank = 0; rank < NUM_RANKS; rank++)
3052 v |= info.
3053 populated_ranks[channel][slot][rank]
3054 << (2 * slot + rank);
3055 info.populated_ranks_mask[channel] = v;
3058 gav(0x55);
3060 gav(pci_read_config32(NORTHBRIDGE, CAPID0 + 4));
3063 /* after SPD */
3064 timestamp_add_now(102);
3066 mchbar_clrbits8(0x2ca8, 1 << 1 | 1 << 0);
3068 collect_system_info(&info);
3069 calculate_timings(&info);
3071 if (!s3resume) {
3072 u8 reg8 = pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2);
3073 if (x2ca8 == 0 && (reg8 & 0x80)) {
3074 /* Don't enable S4-assertion stretch. Makes trouble on roda/rk9.
3075 reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4);
3076 pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08);
3079 /* Clear bit7. */
3081 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3082 (reg8 & ~(1 << 7)));
3084 printk(BIOS_INFO,
3085 "Interrupted RAM init, reset required.\n");
3086 system_reset();
3090 if (!s3resume && x2ca8 == 0)
3091 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3092 pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) | 0x80);
3094 compute_derived_timings(&info);
3096 early_quickpath_init(&info, x2ca8);
3098 info.cached_training = get_cached_training();
3100 if (x2ca8 == 0)
3101 late_quickpath_init(&info, s3resume);
3103 mchbar_setbits32(0x2c80, 1 << 24);
3104 mchbar_write32(0x1804, mchbar_read32(0x1c04) & ~(1 << 27));
3106 mchbar_read8(0x2ca8); // !!!!
3108 if (x2ca8 == 0) {
3109 mchbar_clrbits8(0x2ca8, 3);
3110 mchbar_write8(0x2ca8, mchbar_read8(0x2ca8) + 4); // "+" or "|"?
3111 /* This issues a CPU reset without resetting the platform */
3112 printk(BIOS_DEBUG, "Issuing a CPU reset\n");
3113 /* Write back the S3 state to PM1_CNT to let the reset CPU
3114 know it also needs to take the s3 path. */
3115 if (s3resume)
3116 write_pmbase32(PM1_CNT, read_pmbase32(PM1_CNT)
3117 | (SLP_TYP_S3 << 10));
3118 mchbar_setbits32(0x1af0, 1 << 4);
3119 halt();
3122 mchbar_clrbits8(0x2ca8, 0); // !!!!
3124 mchbar_clrbits32(0x2c80, 1 << 24);
3126 pci_write_config32(QPI_NON_CORE, MAX_RTIDS, 0x20220);
3129 u8 x2c20 = (mchbar_read16(0x2c20) >> 8) & 3;
3130 u16 x2c10 = mchbar_read16(0x2c10);
3131 u16 value = mchbar_read16(0x2c00);
3132 if (x2c20 == 0 && (x2c10 & 0x300) == 0)
3133 value |= (1 << 7);
3134 else
3135 value &= ~(1 << 0);
3137 mchbar_write16(0x2c00, value);
3140 udelay(1000); // !!!!
3142 write_1d0(0, 0x33d, 0, 0);
3143 write_500(&info, 0, 0, 0xb61, 0, 0);
3144 write_500(&info, 1, 0, 0xb61, 0, 0);
3145 mchbar_write32(0x1a30, 0);
3146 mchbar_write32(0x1a34, 0);
3147 mchbar_write16(0x614, 0xb5b | (info.populated_ranks[1][0][0] * 0x404) |
3148 (info.populated_ranks[0][0][0] * 0xa0));
3149 mchbar_write16(0x616, 0x26a);
3150 mchbar_write32(0x134, 0x856000);
3151 mchbar_write32(0x160, 0x5ffffff);
3152 mchbar_clrsetbits32(0x114, ~0, 0xc2024440); // !!!!
3153 mchbar_clrsetbits32(0x118, ~0, 0x4); // !!!!
3154 for (channel = 0; channel < NUM_CHANNELS; channel++)
3155 mchbar_write32(0x260 + (channel << 10), 0x30809ff |
3156 (info.populated_ranks_mask[channel] & 3) << 20);
3157 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3158 mchbar_write16(0x31c + (channel << 10), 0x101);
3159 mchbar_write16(0x360 + (channel << 10), 0x909);
3160 mchbar_write16(0x3a4 + (channel << 10), 0x101);
3161 mchbar_write16(0x3e8 + (channel << 10), 0x101);
3162 mchbar_write32(0x320 + (channel << 10), 0x29002900);
3163 mchbar_write32(0x324 + (channel << 10), 0);
3164 mchbar_write32(0x368 + (channel << 10), 0x32003200);
3165 mchbar_write16(0x352 + (channel << 10), 0x505);
3166 mchbar_write16(0x354 + (channel << 10), 0x3c3c);
3167 mchbar_write16(0x356 + (channel << 10), 0x1040);
3168 mchbar_write16(0x39a + (channel << 10), 0x73e4);
3169 mchbar_write16(0x3de + (channel << 10), 0x77ed);
3170 mchbar_write16(0x422 + (channel << 10), 0x1040);
3173 write_1d0(0x4, 0x151, 4, 1);
3174 write_1d0(0, 0x142, 3, 1);
3175 rdmsr(0x1ac); // !!!!
3176 write_500(&info, 1, 1, 0x6b3, 4, 1);
3177 write_500(&info, 1, 1, 0x6cf, 4, 1);
3179 rmw_1d0(0x21c, 0x38, 0, 6);
3181 write_1d0(((!info.populated_ranks[1][0][0]) << 1) | ((!info.
3182 populated_ranks[0]
3183 [0][0]) << 0),
3184 0x1d1, 3, 1);
3185 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3186 mchbar_write16(0x38e + (channel << 10), 0x5f5f);
3187 mchbar_write16(0x3d2 + (channel << 10), 0x5f5f);
3190 set_334(0);
3192 program_base_timings(&info);
3194 mchbar_setbits8(0x5ff, 1 << 7);
3196 write_1d0(0x2, 0x1d5, 2, 1);
3197 write_1d0(0x20, 0x166, 7, 1);
3198 write_1d0(0x0, 0xeb, 3, 1);
3199 write_1d0(0x0, 0xf3, 6, 1);
3201 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3202 u8 a = 0;
3203 if (info.populated_ranks[channel][0][1] && info.clock_speed_index > 1)
3204 a = 3;
3205 if (info.silicon_revision == 0 || info.silicon_revision == 1)
3206 a = 3;
3208 for (lane = 0; lane < 9; lane++) {
3209 const u16 addr = 0x125 + get_lane_offset(0, 0, lane);
3210 rmw_500(&info, channel, addr, 6, 0xf, a);
3214 if (s3resume) {
3215 if (info.cached_training == NULL) {
3216 u32 reg32;
3217 printk(BIOS_ERR,
3218 "Couldn't find training data. Rebooting\n");
3219 reg32 = inl(DEFAULT_PMBASE + 0x04);
3220 outl(reg32 & ~(7 << 10), DEFAULT_PMBASE + 0x04);
3221 full_reset();
3223 int tm;
3224 info.training = *info.cached_training;
3225 for (tm = 0; tm < 4; tm++)
3226 for (channel = 0; channel < NUM_CHANNELS; channel++)
3227 for (slot = 0; slot < NUM_SLOTS; slot++)
3228 for (rank = 0; rank < NUM_RANKS; rank++)
3229 for (lane = 0; lane < 9; lane++)
3230 write_500(&info,
3231 channel,
3232 info.training.
3233 lane_timings
3234 [tm][channel]
3235 [slot][rank]
3236 [lane],
3237 get_timing_register_addr
3238 (lane, tm,
3239 slot, rank),
3240 9, 0);
3241 write_1d0(info.cached_training->reg_178, 0x178, 7, 1);
3242 write_1d0(info.cached_training->reg_10b, 0x10b, 6, 1);
3245 mchbar_clrsetbits32(0x1f4, ~0, 1 << 17); // !!!!
3246 mchbar_write32(0x1f0, 0x1d000200);
3247 mchbar_setbits8(0x1f0, 1 << 0);
3248 while (mchbar_read8(0x1f0) & 1)
3251 program_board_delay(&info);
3253 mchbar_write8(0x5ff, 0);
3254 mchbar_write8(0x5ff, 1 << 7);
3255 mchbar_write8(0x5f4, 1 << 0);
3257 mchbar_clrbits32(0x130, 1 << 1); // | 2 when ?
3258 while (mchbar_read32(0x130) & 1)
3261 rmw_1d0(0x14b, 0x47, 0x30, 7);
3262 rmw_1d0(0xd6, 0x38, 7, 6);
3263 rmw_1d0(0x328, 0x38, 7, 6);
3265 for (channel = 0; channel < NUM_CHANNELS; channel++)
3266 set_4cf(&info, channel, 1, 0);
3268 rmw_1d0(0x116, 0xe, 0, 4);
3269 rmw_1d0(0xae, 0x3e, 0, 6);
3270 rmw_1d0(0x300, 0x3e, 0, 6);
3271 mchbar_clrbits16(0x356, 1 << 15);
3272 mchbar_clrbits16(0x756, 1 << 15);
3273 mchbar_clrbits32(0x140, 7 << 24);
3274 mchbar_clrbits32(0x138, 7 << 24);
3275 mchbar_write32(0x130, 0x31111301);
3276 /* Wait until REG130b0 is 1. */
3277 while (mchbar_read32(0x130) & 1)
3280 u8 value_a1;
3282 const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6)); // = 0x1cf4040 // !!!!
3283 const u8 val_2f3 = get_bits_420(read_1d0(0x2f3, 6)); // = 0x10a4040 // !!!!
3284 value_a1 = val_xa1;
3285 rmw_1d0(0x320, 0x38, val_2f3, 6);
3286 rmw_1d0(0x14b, 0x78, val_xa1, 7);
3287 rmw_1d0(0xce, 0x38, val_xa1, 6);
3290 for (channel = 0; channel < NUM_CHANNELS; channel++)
3291 set_4cf(&info, channel, 1, 1);
3293 rmw_1d0(0x116, 0xe, 1, 4); // = 0x4040432 // !!!!
3295 if ((mchbar_read32(0x144) & 0x1f) < 0x13)
3296 value_a1 += 2;
3297 else
3298 value_a1 += 1;
3300 if (value_a1 > 7)
3301 value_a1 = 7;
3303 write_1d0(2, 0xae, 6, 1);
3304 write_1d0(2, 0x300, 6, 1);
3305 write_1d0(value_a1, 0x121, 3, 1);
3306 rmw_1d0(0xd6, 0x38, 4, 6);
3307 rmw_1d0(0x328, 0x38, 4, 6);
3310 for (channel = 0; channel < NUM_CHANNELS; channel++)
3311 set_4cf(&info, channel, 2, 0);
3313 mchbar_write32(0x130, 0x11111301 | info.populated_ranks[1][0][0] << 30 |
3314 info.populated_ranks[0][0][0] << 29);
3315 while (mchbar_read8(0x130) & 1)
3319 const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6));
3320 read_1d0(0x2f3, 6); // = 0x10a4054 // !!!!
3321 rmw_1d0(0x21c, 0x38, 0, 6);
3322 rmw_1d0(0x14b, 0x78, val_xa1, 7);
3325 for (channel = 0; channel < NUM_CHANNELS; channel++)
3326 set_4cf(&info, channel, 2, 1);
3328 set_334(1);
3330 mchbar_write8(0x1e8, 1 << 2);
3332 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3333 write_500(&info, channel,
3334 0x3 & ~(info.populated_ranks_mask[channel]), 0x6b7, 2,
3336 write_500(&info, channel, 0x3, 0x69b, 2, 1);
3338 mchbar_clrsetbits32(0x2d0, ~0xff0c01ff, 0x200000);
3339 mchbar_write16(0x6c0, 0x14a0);
3340 mchbar_clrsetbits32(0x6d0, ~0xff0000ff, 0x8000);
3341 mchbar_write16(0x232, 1 << 3);
3342 /* 0x40004 or 0 depending on ? */
3343 mchbar_clrsetbits32(0x234, 0x40004, 0x40004);
3344 mchbar_clrsetbits32(0x34, 0x7, 5);
3345 mchbar_write32(0x128, 0x2150d05);
3346 mchbar_write8(0x12c, 0x1f);
3347 mchbar_write8(0x12d, 0x56);
3348 mchbar_write8(0x12e, 0x31);
3349 mchbar_write8(0x12f, 0);
3350 mchbar_write8(0x271, 1 << 1);
3351 mchbar_write8(0x671, 1 << 1);
3352 mchbar_write8(0x1e8, 1 << 2);
3353 for (channel = 0; channel < NUM_CHANNELS; channel++)
3354 mchbar_write32(0x294 + (channel << 10),
3355 (info.populated_ranks_mask[channel] & 3) << 16);
3356 mchbar_clrsetbits32(0x134, ~0xfc01ffff, 0x10000);
3357 mchbar_clrsetbits32(0x134, ~0xfc85ffff, 0x850000);
3358 for (channel = 0; channel < NUM_CHANNELS; channel++)
3359 mchbar_clrsetbits32(0x260 + (channel << 10), 0xf << 20, 1 << 27 |
3360 (info.populated_ranks_mask[channel] & 3) << 20);
3362 if (!s3resume)
3363 jedec_init(&info);
3365 int totalrank = 0;
3366 for (channel = 0; channel < NUM_CHANNELS; channel++)
3367 for (slot = 0; slot < NUM_SLOTS; slot++)
3368 for (rank = 0; rank < NUM_RANKS; rank++)
3369 if (info.populated_ranks[channel][slot][rank]) {
3370 jedec_read(&info, channel, slot, rank,
3371 totalrank, 0xa, 0x400);
3372 totalrank++;
3375 mchbar_write8(0x12c, 0x9f);
3377 mchbar_clrsetbits8(0x271, 0x3e, 0x0e);
3378 mchbar_clrsetbits8(0x671, 0x3e, 0x0e);
3380 if (!s3resume) {
3381 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3382 mchbar_write32(0x294 + (channel << 10),
3383 (info.populated_ranks_mask[channel] & 3) << 16);
3384 mchbar_write16(0x298 + (channel << 10),
3385 info.populated_ranks[channel][0][0] |
3386 info.populated_ranks[channel][0][1] << 5);
3387 mchbar_write32(0x29c + (channel << 10), 0x77a);
3389 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3392 u8 a, b;
3393 a = mchbar_read8(0x243);
3394 b = mchbar_read8(0x643);
3395 mchbar_write8(0x243, a | 2);
3396 mchbar_write8(0x643, b | 2);
3399 write_1d0(7, 0x19b, 3, 1);
3400 write_1d0(7, 0x1c0, 3, 1);
3401 write_1d0(4, 0x1c6, 4, 1);
3402 write_1d0(4, 0x1cc, 4, 1);
3403 rmw_1d0(0x151, 0xf, 0x4, 4);
3404 mchbar_write32(0x584, 0xfffff);
3405 mchbar_write32(0x984, 0xfffff);
3407 for (channel = 0; channel < NUM_CHANNELS; channel++)
3408 for (slot = 0; slot < NUM_SLOTS; slot++)
3409 for (rank = 0; rank < NUM_RANKS; rank++)
3410 if (info.
3411 populated_ranks[channel][slot]
3412 [rank])
3413 config_rank(&info, s3resume,
3414 channel, slot,
3415 rank);
3417 mchbar_write8(0x243, 1);
3418 mchbar_write8(0x643, 1);
3421 /* was == 1 but is common */
3422 pci_write_config16(NORTHBRIDGE, 0xc8, 3);
3423 write_26c(0, 0x820);
3424 write_26c(1, 0x820);
3425 mchbar_setbits32(0x130, 1 << 1);
3426 /* end */
3428 if (s3resume) {
3429 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3430 mchbar_write32(0x294 + (channel << 10),
3431 (info.populated_ranks_mask[channel] & 3) << 16);
3432 mchbar_write16(0x298 + (channel << 10),
3433 info.populated_ranks[channel][0][0] |
3434 info.populated_ranks[channel][0][1] << 5);
3435 mchbar_write32(0x29c + (channel << 10), 0x77a);
3437 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3440 mchbar_clrbits32(0xfa4, 1 << 24 | 1 << 1);
3441 mchbar_write32(0xfb0, 0x2000e019);
3443 /* Before training. */
3444 timestamp_add_now(103);
3446 if (!s3resume)
3447 ram_training(&info);
3449 /* After training. */
3450 timestamp_add_now(104);
3452 dump_timings(&info);
3454 program_modules_memory_map(&info, 0);
3455 program_total_memory_map(&info);
3457 if (info.non_interleaved_part_mb != 0 && info.interleaved_part_mb != 0)
3458 mchbar_write8(0x111, 0 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3459 else if (have_match_ranks(&info, 0, 4) && have_match_ranks(&info, 1, 4))
3460 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 1 << 7);
3461 else if (have_match_ranks(&info, 0, 2) && have_match_ranks(&info, 1, 2))
3462 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 0 << 7);
3463 else
3464 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3466 mchbar_clrbits32(0xfac, 1 << 31);
3467 mchbar_write32(0xfb4, 0x4800);
3468 mchbar_write32(0xfb8, (info.revision < 8) ? 0x20 : 0x0);
3469 mchbar_write32(0xe94, 0x7ffff);
3470 mchbar_write32(0xfc0, 0x80002040);
3471 mchbar_write32(0xfc4, 0x701246);
3472 mchbar_clrbits8(0xfc8, 0x70);
3473 mchbar_setbits32(0xe5c, 1 << 24);
3474 mchbar_clrsetbits32(0x1a70, 3 << 20, 2 << 20);
3475 mchbar_write32(0x50, 0x700b0);
3476 mchbar_write32(0x3c, 0x10);
3477 mchbar_clrsetbits8(0x1aa8, 0x3f, 0xa);
3478 mchbar_setbits8(0xff4, 1 << 1);
3479 mchbar_clrsetbits32(0xff8, 0xe008, 0x1020);
3481 mchbar_write32(0xd00, IOMMU_BASE2 | 1);
3482 mchbar_write32(0xd40, IOMMU_BASE1 | 1);
3483 mchbar_write32(0xdc0, IOMMU_BASE4 | 1);
3485 write32p(IOMMU_BASE1 | 0xffc, 0x80000000);
3486 write32p(IOMMU_BASE2 | 0xffc, 0xc0000000);
3487 write32p(IOMMU_BASE4 | 0xffc, 0x80000000);
3490 u32 eax;
3492 eax = info.fsb_frequency / 9;
3493 mchbar_clrsetbits32(0xfcc, 0x3ffff,
3494 (eax * 0x280) | (eax * 0x5000) | eax | 0x40000);
3495 mchbar_write32(0x20, 0x33001);
3498 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3499 mchbar_clrbits32(0x220 + (channel << 10), 0x7770);
3500 if (info.max_slots_used_in_channel == 1)
3501 mchbar_setbits16(0x237 + (channel << 10), 0x0201);
3502 else
3503 mchbar_clrbits16(0x237 + (channel << 10), 0x0201);
3505 mchbar_setbits8(0x241 + (channel << 10), 1 << 0);
3507 if (info.clock_speed_index <= 1 && (info.silicon_revision == 2
3508 || info.silicon_revision == 3))
3509 mchbar_setbits32(0x248 + (channel << 10), 0x00102000);
3510 else
3511 mchbar_clrbits32(0x248 + (channel << 10), 0x00102000);
3514 mchbar_setbits32(0x115, 1 << 24);
3517 u8 al;
3518 al = 0xd;
3519 if (!(info.silicon_revision == 0 || info.silicon_revision == 1))
3520 al += 2;
3521 al |= ((1 << (info.max_slots_used_in_channel - 1)) - 1) << 4;
3522 mchbar_write32(0x210, al << 16 | 0x20);
3525 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3526 mchbar_write32(0x288 + (channel << 10), 0x70605040);
3527 mchbar_write32(0x28c + (channel << 10), 0xfffec080);
3528 mchbar_write32(0x290 + (channel << 10), 0x282091c |
3529 (info.max_slots_used_in_channel - 1) << 0x16);
3531 u32 reg1c;
3532 pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3533 reg1c = epbar_read32(EPVC1RCAP); // = 0x8001 // OK
3534 pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3535 epbar_write32(EPVC1RCAP, reg1c); // OK
3536 mchbar_read8(0xe08); // = 0x0
3537 pci_read_config32(NORTHBRIDGE, 0xe4); // = 0x316126
3538 mchbar_setbits8(0x1210, 1 << 1);
3539 mchbar_write32(0x1200, 0x8800440);
3540 mchbar_write32(0x1204, 0x53ff0453);
3541 mchbar_write32(0x1208, 0x19002043);
3542 mchbar_write16(0x1214, 0x320);
3544 if (info.revision == 0x10 || info.revision == 0x11) {
3545 mchbar_write16(0x1214, 0x220);
3546 mchbar_setbits8(0x1210, 1 << 6);
3549 mchbar_setbits8(0x1214, 1 << 2);
3550 mchbar_write8(0x120c, 1);
3551 mchbar_write8(0x1218, 3);
3552 mchbar_write8(0x121a, 3);
3553 mchbar_write8(0x121c, 3);
3554 mchbar_write16(0xc14, 0);
3555 mchbar_write16(0xc20, 0);
3556 mchbar_write32(0x1c, 0);
3558 /* revision dependent here. */
3560 mchbar_setbits16(0x1230, 0x1f07);
3562 if (info.uma_enabled)
3563 mchbar_setbits32(0x11f4, 1 << 28);
3565 mchbar_setbits16(0x1230, 1 << 15);
3566 mchbar_setbits8(0x1214, 1 << 0);
3568 u8 bl, ebpb;
3569 u16 reg_1020;
3571 reg_1020 = mchbar_read32(0x1020); // = 0x6c733c // OK
3572 mchbar_write8(0x1070, 1);
3574 mchbar_write32(0x1000, 0x100);
3575 mchbar_write8(0x1007, 0);
3577 if (reg_1020 != 0) {
3578 mchbar_write16(0x1018, 0);
3579 bl = reg_1020 >> 8;
3580 ebpb = reg_1020 & 0xff;
3581 } else {
3582 ebpb = 0;
3583 bl = 8;
3586 rdmsr(0x1a2);
3588 mchbar_write32(0x1014, 0xffffffff);
3590 mchbar_write32(0x1010, ((((ebpb + 0x7d) << 7) / bl) & 0xff) * !!reg_1020);
3592 mchbar_write8(0x101c, 0xb8);
3594 mchbar_clrsetbits8(0x123e, 0xf0, 0x60);
3595 if (reg_1020 != 0) {
3596 mchbar_clrsetbits32(0x123c, 0xf << 20, 0x6 << 20);
3597 mchbar_write8(0x101c, 0xb8);
3600 const u64 heci_uma_addr =
3601 ((u64)
3602 ((((u64)pci_read_config16(NORTHBRIDGE, TOM)) << 6) -
3603 info.memory_reserved_for_heci_mb)) << 20;
3605 setup_heci_uma(heci_uma_addr, info.memory_reserved_for_heci_mb);
3607 if (info.uma_enabled) {
3608 u16 ax;
3609 mchbar_setbits32(0x11b0, 1 << 14);
3610 mchbar_setbits32(0x11b4, 1 << 14);
3611 mchbar_setbits16(0x1190, 1 << 14);
3613 ax = mchbar_read16(0x1190) & 0xf00; // = 0x480a // OK
3614 mchbar_write16(0x1170, ax | (mchbar_read16(0x1170) & 0x107f) | 0x4080);
3615 mchbar_setbits16(0x1170, 1 << 12);
3617 udelay(1000);
3619 u16 ecx;
3620 for (ecx = 0xffff; ecx && (mchbar_read16(0x1170) & (1 << 12)); ecx--)
3622 mchbar_clrbits16(0x1190, 1 << 14);
3625 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3626 pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) & ~0x80);
3627 udelay(10000);
3628 mchbar_write16(0x2ca8, 1 << 3);
3630 udelay(1000);
3631 dump_timings(&info);
3632 cbmem_wasnot_inited = cbmem_recovery(s3resume);
3634 if (!s3resume)
3635 save_timings(&info);
3636 if (s3resume && cbmem_wasnot_inited) {
3637 printk(BIOS_ERR, "Failed S3 resume.\n");
3638 ram_check_nodie(1 * MiB);
3640 /* Failed S3 resume, reset to come up cleanly */
3641 full_reset();