1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include <console/console.h>
4 #include <commonlib/helpers.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <device/smbus_host.h>
10 #include <cpu/x86/msr.h>
11 #include <cpu/x86/cache.h>
13 #include <cf9_reset.h>
15 #include <device/pci_def.h>
16 #include <device/device.h>
19 #include <timestamp.h>
20 #include <cpu/x86/mtrr.h>
21 #include <cpu/intel/speedstep.h>
22 #include <cpu/intel/turbo.h>
23 #include <mrc_cache.h>
24 #include <southbridge/intel/ibexpeak/me.h>
25 #include <southbridge/intel/common/pmbase.h>
32 #include "raminit_tables.h"
34 #define NORTHBRIDGE PCI_DEV(0, 0, 0)
35 #define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
36 #define GMA PCI_DEV(0, 0x2, 0x0)
38 #define FOR_ALL_RANKS \
39 for (channel = 0; channel < NUM_CHANNELS; channel++) \
40 for (slot = 0; slot < NUM_SLOTS; slot++) \
41 for (rank = 0; rank < NUM_RANKS; rank++)
43 #define FOR_POPULATED_RANKS \
44 for (channel = 0; channel < NUM_CHANNELS; channel++) \
45 for (slot = 0; slot < NUM_SLOTS; slot++) \
46 for (rank = 0; rank < NUM_RANKS; rank++) \
47 if (info->populated_ranks[channel][slot][rank])
49 #define FOR_POPULATED_RANKS_BACKWARDS \
50 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
51 for (slot = 0; slot < NUM_SLOTS; slot++) \
52 for (rank = 0; rank < NUM_RANKS; rank++) \
53 if (info->populated_ranks[channel][slot][rank])
55 #include <lib.h> /* Prototypes */
57 typedef struct _u128
{
62 static void read128(u32 addr
, u64
* out
)
66 asm volatile ("movdqu %%xmm0, %0\n"
67 "movdqa (%2), %%xmm0\n"
69 "movdqu %0, %%xmm0":"+m" (stor
), "=m"(ret
):"r"(addr
));
75 * Ironlake memory I/O timings are located in scan chains, accessible
76 * through MCHBAR register groups. Each channel has a scan chain, and
77 * there's a global scan chain too. Each chain is broken into smaller
78 * sections of N bits, where N <= 32. Each section allows reading and
79 * writing a certain parameter. Each section contains N - 2 data bits
80 * and two additional bits: a Mask bit, and a Halt bit.
84 static void write_1d0(u32 val
, u16 addr
, int bits
, int flag
)
86 mchbar_write32(0x1d0, 0);
87 while (mchbar_read32(0x1d0) & (1 << 23))
89 mchbar_write32(0x1d4, (val
& ((1 << bits
) - 1)) | 2 << bits
| flag
<< bits
);
90 mchbar_write32(0x1d0, 1 << 30 | addr
);
91 while (mchbar_read32(0x1d0) & (1 << 23))
96 static u16
read_1d0(u16 addr
, int split
)
99 mchbar_write32(0x1d0, 0);
100 while (mchbar_read32(0x1d0) & (1 << 23))
102 mchbar_write32(0x1d0, 1 << 31 | (((mchbar_read8(0x246) >> 2) & 3) + 0x361 - addr
));
103 while (mchbar_read32(0x1d0) & (1 << 23))
105 val
= mchbar_read32(0x1d8);
106 write_1d0(0, 0x33d, 0, 0);
107 write_1d0(0, 0x33d, 0, 0);
108 val
&= ((1 << split
) - 1);
109 // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
113 static void sfence(void)
115 asm volatile ("sfence");
118 static inline u16
get_lane_offset(int slot
, int rank
, int lane
)
120 return 0x124 * lane
+ ((lane
& 4) ? 0x23e : 0) + 11 * rank
+ 22 * slot
-
124 static inline u16
get_timing_register_addr(int lane
, int tm
, int slot
, int rank
)
126 const u16 offs
[] = { 0x1d, 0xa8, 0xe6, 0x5c };
127 return get_lane_offset(slot
, rank
, lane
) + offs
[(tm
+ 3) % 4];
130 static u32
gav_real(int line
, u32 in
)
132 // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
136 #define gav(x) gav_real(__LINE__, (x))
138 /* Global allocation of timings_car */
139 timing_bounds_t timings_car
[64];
143 read_500(struct raminfo
*info
, int channel
, u16 addr
, int split
)
146 info
->last_500_command
[channel
] = 1 << 31;
147 mchbar_write32(0x500 + (channel
<< 10), 0);
148 while (mchbar_read32(0x500 + (channel
<< 10)) & (1 << 23))
150 mchbar_write32(0x500 + (channel
<< 10),
151 1 << 31 | (((mchbar_read8(0x246 + (channel
<< 10)) >> 2) & 3) + 0xb88 - addr
));
152 while (mchbar_read32(0x500 + (channel
<< 10)) & (1 << 23))
154 val
= mchbar_read32(0x508 + (channel
<< 10));
155 return val
& ((1 << split
) - 1);
160 write_500(struct raminfo
*info
, int channel
, u32 val
, u16 addr
, int bits
,
163 if (info
->last_500_command
[channel
] == 1 << 31) {
164 info
->last_500_command
[channel
] = 1 << 30;
165 write_500(info
, channel
, 0, 0xb61, 0, 0);
167 mchbar_write32(0x500 + (channel
<< 10), 0);
168 while (mchbar_read32(0x500 + (channel
<< 10)) & (1 << 23))
170 mchbar_write32(0x504 + (channel
<< 10),
171 (val
& ((1 << bits
) - 1)) | 2 << bits
| flag
<< bits
);
172 mchbar_write32(0x500 + (channel
<< 10), 1 << 30 | addr
);
173 while (mchbar_read32(0x500 + (channel
<< 10)) & (1 << 23))
177 static void rmw_500(struct raminfo
*info
, int channel
, u16 addr
, int bits
, u32
and, u32
or)
179 const u32 val
= read_500(info
, channel
, addr
, bits
) & and;
180 write_500(info
, channel
, val
| or, addr
, bits
, 1);
183 static int rw_test(int rank
)
185 const u32 mask
= 0xf00fc33c;
188 for (i
= 0; i
< 64; i
++)
189 write32p((rank
<< 28) | (i
<< 2), 0);
191 for (i
= 0; i
< 64; i
++)
192 gav(read32p((rank
<< 28) | (i
<< 2)));
194 for (i
= 0; i
< 32; i
++) {
195 u32 pat
= (((mask
>> i
) & 1) ? 0xffffffff : 0);
196 write32p((rank
<< 28) | (i
<< 3), pat
);
197 write32p((rank
<< 28) | (i
<< 3) | 4, pat
);
200 for (i
= 0; i
< 32; i
++) {
201 u8 pat
= (((mask
>> i
) & 1) ? 0xff : 0);
204 gav(val
= read32p((rank
<< 28) | (i
<< 3)));
205 for (j
= 0; j
< 4; j
++)
206 if (((val
>> (j
* 8)) & 0xff) != pat
)
208 gav(val
= read32p((rank
<< 28) | (i
<< 3) | 4));
209 for (j
= 0; j
< 4; j
++)
210 if (((val
>> (j
* 8)) & 0xff) != pat
)
214 for (i
= 0; i
< 64; i
++)
215 write32p((rank
<< 28) | (i
<< 2), 0);
217 for (i
= 0; i
< 64; i
++)
218 gav(read32p((rank
<< 28) | (i
<< 2)));
224 program_timings(struct raminfo
*info
, u16 base
, int channel
, int slot
, int rank
)
227 for (lane
= 0; lane
< 8; lane
++) {
228 write_500(info
, channel
,
231 lane_timings
[2][channel
][slot
][rank
][lane
],
232 get_timing_register_addr(lane
, 2, slot
, rank
), 9, 0);
233 write_500(info
, channel
,
236 lane_timings
[3][channel
][slot
][rank
][lane
],
237 get_timing_register_addr(lane
, 3, slot
, rank
), 9, 0);
241 static void write_26c(int channel
, u16 si
)
243 mchbar_write32(0x26c + (channel
<< 10), 0x03243f35);
244 mchbar_write32(0x268 + (channel
<< 10), 0xcfc00000 | si
<< 9);
245 mchbar_write16(0x2b9 + (channel
<< 10), si
);
248 static void toggle_1d0_142_5ff(void)
250 u32 reg32
= gav(read_1d0(0x142, 3));
251 if (reg32
& (1 << 1))
252 write_1d0(0, 0x142, 3, 1);
254 mchbar_write8(0x5ff, 0);
255 mchbar_write8(0x5ff, 1 << 7);
256 if (reg32
& (1 << 1))
257 write_1d0(0x2, 0x142, 3, 1);
260 static u32
get_580(int channel
, u8 addr
)
263 toggle_1d0_142_5ff();
264 mchbar_write32(0x580 + (channel
<< 10), 0x8493c012 | addr
);
265 mchbar_setbits8(0x580 + (channel
<< 10), 1 << 0);
266 while (!((ret
= mchbar_read32(0x580 + (channel
<< 10))) & (1 << 16)))
268 mchbar_clrbits8(0x580 + (channel
<< 10), 1 << 0);
272 #define RANK_SHIFT 28
273 #define CHANNEL_SHIFT 10
275 static void seq9(struct raminfo
*info
, int channel
, int slot
, int rank
)
279 for (i
= 0; i
< 2; i
++)
280 for (lane
= 0; lane
< 8; lane
++)
281 write_500(info
, channel
,
282 info
->training
.lane_timings
[i
+
284 [rank
][lane
], get_timing_register_addr(lane
,
290 write_1d0(1, 0x103, 6, 1);
291 for (lane
= 0; lane
< 8; lane
++)
292 write_500(info
, channel
,
294 lane_timings
[0][channel
][slot
][rank
][lane
],
295 get_timing_register_addr(lane
, 0, slot
, rank
), 9, 0);
297 for (i
= 0; i
< 2; i
++) {
298 for (lane
= 0; lane
< 8; lane
++)
299 write_500(info
, channel
,
300 info
->training
.lane_timings
[i
+
302 [rank
][lane
], get_timing_register_addr(lane
,
307 gav(get_580(channel
, ((i
+ 1) << 2) | (rank
<< 5)));
310 toggle_1d0_142_5ff();
311 write_1d0(0x2, 0x142, 3, 1);
313 for (lane
= 0; lane
< 8; lane
++) {
314 // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
315 info
->training
.lane_timings
[2][channel
][slot
][rank
][lane
] =
316 read_500(info
, channel
,
317 get_timing_register_addr(lane
, 2, slot
, rank
), 9);
318 //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
319 info
->training
.lane_timings
[3][channel
][slot
][rank
][lane
] =
320 info
->training
.lane_timings
[2][channel
][slot
][rank
][lane
] +
325 static int count_ranks_in_channel(struct raminfo
*info
, int channel
)
329 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
330 for (rank
= 0; rank
< NUM_SLOTS
; rank
++)
331 res
+= info
->populated_ranks
[channel
][slot
][rank
];
336 config_rank(struct raminfo
*info
, int s3resume
, int channel
, int slot
, int rank
)
340 write_1d0(0, 0x178, 7, 1);
341 seq9(info
, channel
, slot
, rank
);
342 program_timings(info
, 0x80, channel
, slot
, rank
);
345 add
= count_ranks_in_channel(info
, 1);
349 gav(rw_test(rank
+ add
));
350 program_timings(info
, 0x00, channel
, slot
, rank
);
352 gav(rw_test(rank
+ add
));
354 gav(rw_test(rank
+ add
));
355 write_1d0(0, 0x142, 3, 1);
356 write_1d0(0, 0x103, 6, 1);
358 gav(get_580(channel
, 0xc | (rank
<< 5)));
359 gav(read_1d0(0x142, 3));
361 mchbar_write8(0x5ff, 0);
362 mchbar_write8(0x5ff, 1 << 7);
365 static void set_4cf(struct raminfo
*info
, int channel
, u8 bit
, u8 val
)
367 const u16 regtable
[] = { 0x4cf, 0x659, 0x697 };
370 for (int i
= 0; i
< ARRAY_SIZE(regtable
); i
++)
371 rmw_500(info
, channel
, regtable
[i
], 4, ~(1 << bit
), val
<< bit
);
374 static void set_334(int zero
)
377 const u32 val3
[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
380 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
381 for (j
= 0; j
< 4; j
++) {
382 u32 a
= (j
== 1) ? 0x29292929 : 0x31313131;
383 u32 lmask
= (j
== 3) ? 0xffff : 0xffffffff;
385 if ((j
== 0 || j
== 3) && zero
)
392 for (k
= 0; k
< 2; k
++) {
393 mchbar_write32(0x138 + 8 * k
, channel
<< 26 | j
<< 24);
394 gav(vd8
[1][(channel
<< 3) | (j
<< 1) | k
] =
395 mchbar_read32(0x138 + 8 * k
));
396 gav(vd8
[0][(channel
<< 3) | (j
<< 1) | k
] =
397 mchbar_read32(0x13c + 8 * k
));
400 mchbar_write32(0x334 + (channel
<< 10) + j
* 0x44, zero
? 0 : val3
[j
]);
401 mchbar_write32(0x32c + (channel
<< 10) + j
* 0x44,
402 zero
? 0 : 0x18191819 & lmask
);
403 mchbar_write16(0x34a + (channel
<< 10) + j
* 0x44, c
);
404 mchbar_write32(0x33c + (channel
<< 10) + j
* 0x44,
405 zero
? 0 : a
& lmask
);
406 mchbar_write32(0x344 + (channel
<< 10) + j
* 0x44,
407 zero
? 0 : a
& lmask
);
411 mchbar_setbits32(0x130, 1 << 0);
412 while (mchbar_read8(0x130) & 1)
416 static void rmw_1d0(u16 addr
, u32
and, u32
or, int split
)
419 v
= read_1d0(addr
, split
);
420 write_1d0((v
& and) | or, addr
, split
, 1);
423 static int find_highest_bit_set(u16 val
)
426 for (i
= 15; i
>= 0; i
--)
432 static int find_lowest_bit_set32(u32 val
)
435 for (i
= 0; i
< 32; i
++)
446 MEMORY_BUS_WIDTH
= 8,
447 TIMEBASE_DIVIDEND
= 10,
448 TIMEBASE_DIVISOR
= 11,
451 CAS_LATENCIES_LSB
= 14,
452 CAS_LATENCIES_MSB
= 15,
453 CAS_LATENCY_TIME
= 16,
454 THERMAL_AND_REFRESH
= 31,
455 REFERENCE_RAW_CARD_USED
= 62,
456 RANK1_ADDRESS_MAPPING
= 63
459 static void calculate_timings(struct raminfo
*info
)
461 unsigned int cycletime
;
462 unsigned int cas_latency_time
;
463 unsigned int supported_cas_latencies
;
464 unsigned int channel
, slot
;
465 unsigned int clock_speed_index
;
466 unsigned int min_cas_latency
;
467 unsigned int cas_latency
;
468 unsigned int max_clock_index
;
470 /* Find common CAS latency */
471 supported_cas_latencies
= 0x3fe;
472 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
473 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
474 if (info
->populated_ranks
[channel
][slot
][0])
475 supported_cas_latencies
&=
478 spd
[channel
][slot
][CAS_LATENCIES_LSB
] |
480 spd
[channel
][slot
][CAS_LATENCIES_MSB
] <<
483 max_clock_index
= MIN(3, info
->max_supported_clock_speed_index
);
485 cycletime
= min_cycletime
[max_clock_index
];
486 cas_latency_time
= min_cas_latency_time
[max_clock_index
];
488 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
489 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
490 if (info
->populated_ranks
[channel
][slot
][0]) {
491 unsigned int timebase
;
495 spd
[channel
][slot
][TIMEBASE_DIVIDEND
] /
496 info
->spd
[channel
][slot
][TIMEBASE_DIVISOR
];
500 info
->spd
[channel
][slot
][CYCLETIME
]);
502 MAX(cas_latency_time
,
505 spd
[channel
][slot
][CAS_LATENCY_TIME
]);
507 if (cycletime
> min_cycletime
[0])
508 die("RAM init: Decoded SPD DRAM freq is slower than the controller minimum!");
509 for (clock_speed_index
= 0; clock_speed_index
< 3; clock_speed_index
++) {
510 if (cycletime
== min_cycletime
[clock_speed_index
])
512 if (cycletime
> min_cycletime
[clock_speed_index
]) {
514 cycletime
= min_cycletime
[clock_speed_index
];
518 min_cas_latency
= DIV_ROUND_UP(cas_latency_time
, cycletime
);
520 while (supported_cas_latencies
) {
521 cas_latency
= find_highest_bit_set(supported_cas_latencies
) + 3;
522 if (cas_latency
<= min_cas_latency
)
524 supported_cas_latencies
&=
525 ~(1 << find_highest_bit_set(supported_cas_latencies
));
528 if (cas_latency
!= min_cas_latency
&& clock_speed_index
)
531 if (cas_latency
* min_cycletime
[clock_speed_index
] > 20000)
532 die("Couldn't configure DRAM");
533 info
->clock_speed_index
= clock_speed_index
;
534 info
->cas_latency
= cas_latency
;
537 static void program_base_timings(struct raminfo
*info
)
539 unsigned int channel
;
540 unsigned int slot
, rank
, lane
;
541 unsigned int extended_silicon_revision
;
544 extended_silicon_revision
= info
->silicon_revision
;
545 if (info
->silicon_revision
== 0)
546 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
547 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
549 spd
[channel
][slot
][MODULE_TYPE
] & 0xF) ==
551 extended_silicon_revision
= 4;
553 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
554 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
555 for (rank
= 0; rank
< NUM_SLOTS
; rank
++) {
557 if (!info
->populated_ranks
[channel
][slot
][rank
])
560 for (lane
= 0; lane
< 9; lane
++) {
566 spd
[channel
][slot
][MODULE_TYPE
] &
572 [REFERENCE_RAW_CARD_USED
] &
574 if (reference_card
== 3)
579 if (reference_card
== 5)
587 lane_timings
[0][channel
][slot
][rank
]
592 lane_timings
[1][channel
][slot
][rank
]
595 for (tm_reg
= 2; tm_reg
< 4; tm_reg
++)
598 [channel
][slot
][rank
][lane
]
601 [extended_silicon_revision
]
605 + info
->max4048
[channel
]
608 [extended_silicon_revision
]
610 mode4030
[channel
]][slot
]
614 for (tm_reg
= 0; tm_reg
< 4; tm_reg
++)
615 write_500(info
, channel
,
618 [channel
][slot
][rank
]
620 get_timing_register_addr
626 if (!(extended_silicon_revision
!= 4
628 populated_ranks_mask
[channel
] & 5) ==
632 [REFERENCE_RAW_CARD_USED
] & 0x1F)
635 u16_FFFE0EB8
[0][info
->
639 [REFERENCE_RAW_CARD_USED
] & 0x1F)
642 u16_FFFE0EB8
[1][info
->
646 for (i
= 0; i
< 3; i
++)
647 write_500(info
, channel
,
649 info
->max4048
[channel
]
652 [extended_silicon_revision
]
654 mode4030
[channel
]][info
->
656 u16_fffd0c50
[i
][slot
][rank
],
658 write_500(info
, channel
,
659 (info
->max4048
[channel
] +
661 [extended_silicon_revision
][info
->
666 u16_fffd0c70
[slot
][rank
], 7, 1);
668 if (!info
->populated_ranks_mask
[channel
])
670 for (i
= 0; i
< 3; i
++)
671 write_500(info
, channel
,
672 (info
->max4048
[channel
] +
673 info
->avg4044
[channel
]
676 [extended_silicon_revision
][info
->
680 u16_fffd0c68
[i
], 8, 1);
684 /* The time of clock cycle in ps. */
685 static unsigned int cycle_ps(struct raminfo
*info
)
687 return 2 * halfcycle_ps(info
);
690 /* Frequency in 0.1 MHz units. */
691 static unsigned int frequency_01(struct raminfo
*info
)
693 return 100 * frequency_11(info
) / 9;
696 static unsigned int ps_to_halfcycles(struct raminfo
*info
, unsigned int ps
)
698 return (frequency_11(info
) * 2) * ps
/ 900000;
701 static unsigned int ns_to_cycles(struct raminfo
*info
, unsigned int ns
)
703 return (frequency_11(info
)) * ns
/ 900;
706 static void compute_derived_timings(struct raminfo
*info
)
708 unsigned int channel
, slot
, rank
;
709 int extended_silicon_revision
;
712 int some_delay_2_halfcycles_ceil
;
713 int some_delay_2_halfcycles_floor
;
715 int some_delay_3_ps_rounded
;
716 int some_delay_1_cycle_ceil
;
717 int some_delay_1_cycle_floor
;
719 some_delay_3_ps_rounded
= 0;
720 extended_silicon_revision
= info
->silicon_revision
;
721 if (!info
->silicon_revision
)
722 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
723 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
725 spd
[channel
][slot
][MODULE_TYPE
] & 0xF) ==
727 extended_silicon_revision
= 4;
728 if (info
->board_lane_delay
[7] < 5)
729 info
->board_lane_delay
[7] = 5;
730 info
->revision_flag_1
= 2;
731 if (info
->silicon_revision
== 2 || info
->silicon_revision
== 3)
732 info
->revision_flag_1
= 0;
733 if (info
->revision
< 16)
734 info
->revision_flag_1
= 0;
736 if (info
->revision
< 8)
737 info
->revision_flag_1
= 0;
738 if (info
->revision
>= 8 && (info
->silicon_revision
== 0
739 || info
->silicon_revision
== 1))
740 some_delay_2_ps
= 735;
742 some_delay_2_ps
= 750;
744 if (info
->revision
>= 0x10 && (info
->silicon_revision
== 0
745 || info
->silicon_revision
== 1))
746 some_delay_1_ps
= 3929;
748 some_delay_1_ps
= 3490;
750 some_delay_1_cycle_floor
= some_delay_1_ps
/ cycle_ps(info
);
751 some_delay_1_cycle_ceil
= some_delay_1_ps
/ cycle_ps(info
);
752 if (some_delay_1_ps
% cycle_ps(info
))
753 some_delay_1_cycle_ceil
++;
755 some_delay_1_cycle_floor
--;
756 info
->some_delay_1_cycle_floor
= some_delay_1_cycle_floor
;
757 if (info
->revision_flag_1
)
758 some_delay_2_ps
= halfcycle_ps(info
) >> 6;
760 MAX(some_delay_1_ps
- 30,
761 2 * halfcycle_ps(info
) * (some_delay_1_cycle_ceil
- 1) + 1000) +
764 halfcycle_ps(info
) - some_delay_2_ps
% halfcycle_ps(info
);
765 if (info
->revision_flag_1
) {
766 if (some_delay_3_ps
>= 150) {
767 const int some_delay_3_halfcycles
=
768 (some_delay_3_ps
<< 6) / halfcycle_ps(info
);
769 some_delay_3_ps_rounded
=
770 halfcycle_ps(info
) * some_delay_3_halfcycles
>> 6;
773 some_delay_2_halfcycles_ceil
=
774 (some_delay_2_ps
+ halfcycle_ps(info
) - 1) / halfcycle_ps(info
) -
775 2 * (some_delay_1_cycle_ceil
- 1);
776 if (info
->revision_flag_1
&& some_delay_3_ps
< 150)
777 some_delay_2_halfcycles_ceil
++;
778 some_delay_2_halfcycles_floor
= some_delay_2_halfcycles_ceil
;
779 if (info
->revision
< 0x10)
780 some_delay_2_halfcycles_floor
=
781 some_delay_2_halfcycles_ceil
- 1;
782 if (!info
->revision_flag_1
)
783 some_delay_2_halfcycles_floor
++;
784 /* FIXME: this variable is unused. Should it be used? */
785 (void)some_delay_2_halfcycles_floor
;
786 info
->some_delay_2_halfcycles_ceil
= some_delay_2_halfcycles_ceil
;
787 info
->some_delay_3_ps_rounded
= some_delay_3_ps_rounded
;
788 if ((info
->populated_ranks
[0][0][0] && info
->populated_ranks
[0][1][0])
789 || (info
->populated_ranks
[1][0][0]
790 && info
->populated_ranks
[1][1][0]))
791 info
->max_slots_used_in_channel
= 2;
793 info
->max_slots_used_in_channel
= 1;
794 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
795 mchbar_write32(0x244 + (channel
<< 10),
796 ((info
->revision
< 8) ? 1 : 0x200) |
797 ((2 - info
->max_slots_used_in_channel
) << 17) |
799 (info
->some_delay_1_cycle_floor
<< 18) | 0x9510);
800 if (info
->max_slots_used_in_channel
== 1) {
801 info
->mode4030
[0] = (count_ranks_in_channel(info
, 0) == 2);
802 info
->mode4030
[1] = (count_ranks_in_channel(info
, 1) == 2);
804 info
->mode4030
[0] = ((count_ranks_in_channel(info
, 0) == 1) || (count_ranks_in_channel(info
, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
805 info
->mode4030
[1] = ((count_ranks_in_channel(info
, 1) == 1)
806 || (count_ranks_in_channel(info
, 1) ==
809 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
816 if (!info
->populated_ranks_mask
[channel
])
820 min_of_unk_2
= 32767;
824 for (i
= 0; i
< 3; i
++) {
826 if (info
->revision
< 8)
828 u8_FFFD1891
[0][channel
][info
->
832 (info
->revision
>= 0x10
833 || info
->revision_flag_1
))
835 u8_FFFD1891
[1][channel
][info
->
840 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
841 for (rank
= 0; rank
< NUM_RANKS
; rank
++) {
846 populated_ranks
[channel
][slot
]
849 if (extended_silicon_revision
== 4
851 populated_ranks_mask
[channel
] &
855 [REFERENCE_RAW_CARD_USED
] &
860 b
= u16_fe0eb8
[0][info
->
865 [REFERENCE_RAW_CARD_USED
]
870 b
= u16_fe0eb8
[1][info
->
874 min_of_unk_2
= MIN(min_of_unk_2
, a
);
875 min_of_unk_2
= MIN(min_of_unk_2
, b
);
884 [extended_silicon_revision
]
886 mode4030
[channel
]][info
->
897 [extended_silicon_revision
][info
->
900 [info
->clock_speed_index
] + min_of_unk_2
;
902 max_of_unk
= MAX(max_of_unk
, unk1
- t
);
907 die("No memory ranks found for channel %u\n", channel
);
909 info
->avg4044
[channel
] = sum
/ count
;
910 info
->max4048
[channel
] = max_of_unk
;
914 static void jedec_read(struct raminfo
*info
,
915 int channel
, int slot
, int rank
,
916 int total_rank
, u8 addr3
, unsigned int value
)
918 /* Handle mirrored mapping. */
919 if ((rank
& 1) && (info
->spd
[channel
][slot
][RANK1_ADDRESS_MAPPING
] & 1)) {
920 addr3
= (addr3
& 0xCF) | ((addr3
& 0x10) << 1) | ((addr3
>> 1) & 0x10);
921 value
= (value
& ~0x1f8) | ((value
>> 1) & 0xa8) | ((value
& 0xa8) << 1);
924 mchbar_clrsetbits8(0x271, 0x1f << 1, addr3
);
925 mchbar_clrsetbits8(0x671, 0x1f << 1, addr3
);
927 read32p((value
<< 3) | (total_rank
<< 28));
929 mchbar_clrsetbits8(0x271, 0x1f << 1, 1 << 1);
930 mchbar_clrsetbits8(0x671, 0x1f << 1, 1 << 1);
932 read32p(total_rank
<< 28);
943 MR0_BT_INTERLEAVED
= 8,
944 MR0_DLL_RESET_ON
= 256
948 MR2_RTT_WR_DISABLED
= 0,
952 static void jedec_init(struct raminfo
*info
)
955 int channel
, slot
, rank
;
958 int self_refresh_temperature
;
959 int auto_self_refresh
;
961 auto_self_refresh
= 1;
962 self_refresh_temperature
= 1;
963 if (info
->board_lane_delay
[3] <= 10) {
964 if (info
->board_lane_delay
[3] <= 8)
965 write_recovery
= info
->board_lane_delay
[3] - 4;
971 FOR_POPULATED_RANKS
{
973 (info
->spd
[channel
][slot
][THERMAL_AND_REFRESH
] >> 2) & 1;
974 self_refresh_temperature
&=
975 info
->spd
[channel
][slot
][THERMAL_AND_REFRESH
] & 1;
977 if (auto_self_refresh
== 1)
978 self_refresh_temperature
= 0;
980 dll_on
= ((info
->silicon_revision
!= 2 && info
->silicon_revision
!= 3)
981 || (info
->populated_ranks
[0][0][0]
982 && info
->populated_ranks
[0][1][0])
983 || (info
->populated_ranks
[1][0][0]
984 && info
->populated_ranks
[1][1][0]));
988 for (channel
= NUM_CHANNELS
- 1; channel
>= 0; channel
--) {
989 int rtt
, rtt_wr
= MR2_RTT_WR_DISABLED
;
992 if (info
->silicon_revision
== 2 || info
->silicon_revision
== 3) {
995 if (info
->clock_speed_index
!= 0) {
997 if (info
->populated_ranks_mask
[channel
] == 3)
1001 if ((info
->populated_ranks_mask
[channel
] & 5) == 5) {
1011 mchbar_write16(0x588 + (channel
<< 10), 0);
1012 mchbar_write16(0x58a + (channel
<< 10), 4);
1013 mchbar_write16(0x58c + (channel
<< 10), rtt
| MR1_ODS34OHM
);
1014 mchbar_write16(0x58e + (channel
<< 10), rzq_reg58e
| 0x82);
1015 mchbar_write16(0x590 + (channel
<< 10), 0x1282);
1017 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
1018 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
1019 if (info
->populated_ranks
[channel
][slot
][rank
]) {
1020 jedec_read(info
, channel
, slot
, rank
,
1025 | (auto_self_refresh
<< 6) |
1026 (self_refresh_temperature
<<
1028 jedec_read(info
, channel
, slot
, rank
,
1029 total_rank
, 0x38, 0);
1030 jedec_read(info
, channel
, slot
, rank
,
1032 rtt
| MR1_ODS34OHM
);
1033 jedec_read(info
, channel
, slot
, rank
,
1036 (write_recovery
<< 9)
1037 | ((info
->cas_latency
- 4) <<
1038 4) | MR0_BT_INTERLEAVED
|
1045 static void program_modules_memory_map(struct raminfo
*info
, int pre_jedec
)
1047 unsigned int channel
, slot
, rank
;
1048 unsigned int total_mb
[2] = { 0, 0 }; /* total memory per channel in MB */
1049 unsigned int channel_0_non_interleaved
;
1052 if (info
->populated_ranks
[channel
][slot
][rank
]) {
1053 total_mb
[channel
] +=
1054 pre_jedec
? 256 : (256 << info
->
1055 density
[channel
][slot
] >> info
->
1056 is_x16_module
[channel
][slot
]);
1057 mchbar_write8(0x208 + rank
+ 2 * slot
+ (channel
<< 10),
1058 (pre_jedec
? (1 | ((1 + 1) << 1)) :
1059 (info
->is_x16_module
[channel
][slot
] |
1060 ((info
->density
[channel
][slot
] + 1) << 1))) |
1063 mchbar_write16(0x200 + (channel
<< 10) + 4 * slot
+ 2 * rank
,
1064 total_mb
[channel
] >> 6);
1067 info
->total_memory_mb
= total_mb
[0] + total_mb
[1];
1069 info
->interleaved_part_mb
=
1070 pre_jedec
? 0 : 2 * MIN(total_mb
[0], total_mb
[1]);
1071 info
->non_interleaved_part_mb
=
1072 total_mb
[0] + total_mb
[1] - info
->interleaved_part_mb
;
1073 channel_0_non_interleaved
= total_mb
[0] - info
->interleaved_part_mb
/ 2;
1074 mchbar_write32(0x100, channel_0_non_interleaved
| info
->non_interleaved_part_mb
<< 16);
1076 mchbar_write16(0x104, info
->interleaved_part_mb
);
1079 static void program_board_delay(struct raminfo
*info
)
1081 int cas_latency_shift
;
1083 int some_delay_3_half_cycles
;
1085 unsigned int channel
, i
;
1086 int high_multiplier
;
1088 int cas_latency_derived
;
1090 high_multiplier
= 0;
1091 some_delay_ns
= 200;
1092 some_delay_3_half_cycles
= 4;
1093 cas_latency_shift
= info
->silicon_revision
== 0
1094 || info
->silicon_revision
== 1 ? 1 : 0;
1095 if (info
->revision
< 8) {
1096 some_delay_ns
= 600;
1097 cas_latency_shift
= 0;
1102 ((info
->clock_speed_index
> 1
1103 || (info
->silicon_revision
!= 2
1104 && info
->silicon_revision
!= 3))) ^ (info
->revision
>=
1106 write_500(info
, 0, speed_bit
| ((!info
->use_ecc
) << 1), 0x60e,
1108 write_500(info
, 1, speed_bit
| ((!info
->use_ecc
) << 1), 0x60e,
1110 if (info
->revision
>= 0x10 && info
->clock_speed_index
<= 1
1111 && (info
->silicon_revision
== 2
1112 || info
->silicon_revision
== 3))
1113 rmw_1d0(0x116, 5, 2, 4);
1115 mchbar_write32(0x120, 1 << (info
->max_slots_used_in_channel
+ 28) | 0x188e7f9f);
1117 mchbar_write8(0x124, info
->board_lane_delay
[4] + (frequency_01(info
) + 999) / 1000);
1118 mchbar_write16(0x125, 0x1360);
1119 mchbar_write8(0x127, 0x40);
1120 if (info
->fsb_frequency
< frequency_11(info
) / 2) {
1121 unsigned int some_delay_2_half_cycles
;
1122 high_multiplier
= 1;
1123 some_delay_2_half_cycles
= ps_to_halfcycles(info
,
1136 some_delay_3_half_cycles
=
1137 MIN((some_delay_2_half_cycles
+
1138 (frequency_11(info
) * 2) * (28 -
1139 some_delay_2_half_cycles
) /
1140 (frequency_11(info
) * 2 -
1141 4 * (info
->fsb_frequency
))) >> 3, 7);
1143 if (mchbar_read8(0x2ca9) & 1)
1144 some_delay_3_half_cycles
= 3;
1145 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
1146 mchbar_setbits32(0x220 + (channel
<< 10), 0x18001117);
1147 mchbar_write32(0x224 + (channel
<< 10),
1148 (info
->max_slots_used_in_channel
- 1) |
1149 (info
->cas_latency
- 5 - info
->clock_speed_index
)
1150 << 21 | (info
->max_slots_used_in_channel
+
1151 info
->cas_latency
- cas_latency_shift
- 4) << 16 |
1152 (info
->cas_latency
- cas_latency_shift
- 4) << 26 |
1153 (info
->cas_latency
- info
->clock_speed_index
+
1154 info
->max_slots_used_in_channel
- 6) << 8);
1155 mchbar_write32(0x228 + (channel
<< 10), info
->max_slots_used_in_channel
);
1156 mchbar_write8(0x239 + (channel
<< 10), 32);
1157 mchbar_write32(0x248 + (channel
<< 10), high_multiplier
<< 24 |
1158 some_delay_3_half_cycles
<< 25 | 0x840000);
1159 mchbar_write32(0x278 + (channel
<< 10), 0xc362042);
1160 mchbar_write32(0x27c + (channel
<< 10), 0x8b000062);
1161 mchbar_write32(0x24c + (channel
<< 10),
1162 (!!info
->clock_speed_index
) << 17 |
1163 ((2 + info
->clock_speed_index
-
1164 (!!info
->clock_speed_index
))) << 12 | 0x10200);
1166 mchbar_write8(0x267 + (channel
<< 10), 4);
1167 mchbar_write16(0x272 + (channel
<< 10), 0x155);
1168 mchbar_clrsetbits32(0x2bc + (channel
<< 10), 0xffffff, 0x707070);
1170 write_500(info
, channel
,
1171 ((!info
->populated_ranks
[channel
][1][1])
1172 | (!info
->populated_ranks
[channel
][1][0] << 1)
1173 | (!info
->populated_ranks
[channel
][0][1] << 2)
1174 | (!info
->populated_ranks
[channel
][0][0] << 3)),
1178 mchbar_write8(0x2c4, (1 + (info
->clock_speed_index
!= 0)) << 6 | 0xc);
1180 u8 freq_divisor
= 2;
1181 if (info
->fsb_frequency
== frequency_11(info
))
1183 else if (2 * info
->fsb_frequency
< 3 * (frequency_11(info
) / 2))
1187 mchbar_write32(0x2c0, freq_divisor
<< 11 | 0x6009c400);
1190 if (info
->board_lane_delay
[3] <= 10) {
1191 if (info
->board_lane_delay
[3] <= 8)
1192 lane_3_delay
= info
->board_lane_delay
[3];
1198 cas_latency_derived
= info
->cas_latency
- info
->clock_speed_index
+ 2;
1199 if (info
->clock_speed_index
> 1)
1200 cas_latency_derived
++;
1201 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
1202 mchbar_write32(0x240 + (channel
<< 10),
1203 ((info
->clock_speed_index
== 0) * 0x11000) |
1204 0x1002100 | (2 + info
->clock_speed_index
) << 4 |
1205 (info
->cas_latency
- 3));
1206 write_500(info
, channel
, (info
->clock_speed_index
<< 1) | 1,
1208 write_500(info
, channel
,
1209 info
->clock_speed_index
+ 2 * info
->cas_latency
- 7,
1212 mchbar_write32(0x250 + (channel
<< 10),
1213 (lane_3_delay
+ info
->clock_speed_index
+ 9) << 6 |
1214 info
->board_lane_delay
[7] << 2 |
1215 info
->board_lane_delay
[4] << 16 |
1216 info
->board_lane_delay
[1] << 25 |
1217 info
->board_lane_delay
[1] << 29 | 1);
1218 mchbar_write32(0x254 + (channel
<< 10),
1219 info
->board_lane_delay
[1] >> 3 |
1220 (info
->board_lane_delay
[8] + 4 * info
->use_ecc
) << 6 |
1221 0x80 | info
->board_lane_delay
[6] << 1 |
1222 info
->board_lane_delay
[2] << 28 |
1223 cas_latency_derived
<< 16 | 0x4700000);
1224 mchbar_write32(0x258 + (channel
<< 10),
1225 (info
->board_lane_delay
[5] + info
->clock_speed_index
+ 9) << 12 |
1226 (info
->clock_speed_index
- info
->cas_latency
+ 12) << 8 |
1227 info
->board_lane_delay
[2] << 17 |
1228 info
->board_lane_delay
[4] << 24 | 0x47);
1229 mchbar_write32(0x25c + (channel
<< 10),
1230 info
->board_lane_delay
[1] << 1 |
1231 info
->board_lane_delay
[0] << 8 | 0x1da50000);
1232 mchbar_write8(0x264 + (channel
<< 10), 0xff);
1233 mchbar_write8(0x5f8 + (channel
<< 10), cas_latency_shift
<< 3 | info
->use_ecc
);
1236 program_modules_memory_map(info
, 1);
1238 mchbar_clrsetbits16(0x610, 0xfe3c,
1239 MIN(ns_to_cycles(info
, some_delay_ns
) / 2, 127) << 9 | 0x3c);
1240 mchbar_setbits16(0x612, 1 << 8);
1241 mchbar_setbits16(0x214, 0x3e00);
1242 for (i
= 0; i
< 8; i
++) {
1243 pci_write_config32(QPI_SAD
, SAD_DRAM_RULE(i
),
1244 (info
->total_memory_mb
- 64) | !i
| 2);
1245 pci_write_config32(QPI_SAD
, SAD_INTERLEAVE_LIST(i
), 0);
1249 #define DEFAULT_PCI_MMIO_SIZE 2048
1251 static void program_total_memory_map(struct raminfo
*info
)
1253 unsigned int tom
, tolud
, touud
;
1254 unsigned int quickpath_reserved
;
1255 unsigned int remap_base
;
1256 unsigned int uma_base_igd
;
1257 unsigned int uma_base_gtt
;
1258 unsigned int mmio_size
;
1260 unsigned int memory_map
[8];
1262 unsigned int current_limit
;
1263 unsigned int tseg_base
;
1264 int uma_size_igd
= 0, uma_size_gtt
= 0;
1266 memset(memory_map
, 0, sizeof(memory_map
));
1268 if (info
->uma_enabled
) {
1269 u16 t
= pci_read_config16(NORTHBRIDGE
, GGC
);
1271 const int uma_sizes_gtt
[16] =
1272 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1274 const int uma_sizes_igd
[16] = {
1275 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1279 uma_size_igd
= uma_sizes_igd
[(t
>> 4) & 0xF];
1280 uma_size_gtt
= uma_sizes_gtt
[(t
>> 8) & 0xF];
1283 mmio_size
= DEFAULT_PCI_MMIO_SIZE
;
1285 tom
= info
->total_memory_mb
;
1288 touud
= ALIGN_DOWN(tom
- info
->memory_reserved_for_heci_mb
, 64);
1289 tolud
= ALIGN_DOWN(MIN(4096 - mmio_size
+ ALIGN_UP(uma_size_igd
+ uma_size_gtt
, 64)
1292 if (touud
- tolud
> 64) {
1294 remap_base
= MAX(4096, touud
);
1295 touud
= touud
- tolud
+ 4096;
1298 memory_map
[2] = touud
| 1;
1299 quickpath_reserved
= 0;
1301 u32 t
= pci_read_config32(QPI_SAD
, 0x68);
1306 u32 shift
= t
>> 20;
1308 die("Quickpath value is 0\n");
1309 quickpath_reserved
= (u32
)1 << find_lowest_bit_set32(shift
);
1313 touud
-= quickpath_reserved
;
1315 uma_base_igd
= tolud
- uma_size_igd
;
1316 uma_base_gtt
= uma_base_igd
- uma_size_gtt
;
1317 tseg_base
= ALIGN_DOWN(uma_base_gtt
, 64) - (CONFIG_SMM_TSEG_SIZE
>> 20);
1319 tseg_base
-= quickpath_reserved
;
1320 tseg_base
= ALIGN_DOWN(tseg_base
, 8);
1322 pci_write_config16(NORTHBRIDGE
, TOLUD
, tolud
<< 4);
1323 pci_write_config16(NORTHBRIDGE
, TOM
, tom
>> 6);
1325 pci_write_config16(NORTHBRIDGE
, REMAPBASE
, remap_base
>> 6);
1326 pci_write_config16(NORTHBRIDGE
, REMAPLIMIT
, (touud
- 64) >> 6);
1328 pci_write_config16(NORTHBRIDGE
, TOUUD
, touud
);
1330 if (info
->uma_enabled
) {
1331 pci_write_config32(NORTHBRIDGE
, IGD_BASE
, uma_base_igd
<< 20);
1332 pci_write_config32(NORTHBRIDGE
, GTT_BASE
, uma_base_gtt
<< 20);
1334 pci_write_config32(NORTHBRIDGE
, TSEG
, tseg_base
<< 20);
1337 memory_map
[0] = ALIGN_DOWN(uma_base_gtt
, 64) | 1;
1338 memory_map
[1] = 4096;
1339 for (i
= 0; i
< ARRAY_SIZE(memory_map
); i
++) {
1340 current_limit
= MAX(current_limit
, memory_map
[i
] & ~1);
1341 pci_write_config32(QPI_SAD
, SAD_DRAM_RULE(i
),
1342 (memory_map
[i
] & 1) | ALIGN_DOWN(current_limit
-
1344 pci_write_config32(QPI_SAD
, SAD_INTERLEAVE_LIST(i
), 0);
1348 static void collect_system_info(struct raminfo
*info
)
1352 unsigned int channel
;
1354 for (i
= 0; i
< 3; i
++) {
1355 capid0
[i
] = pci_read_config32(NORTHBRIDGE
, CAPID0
| (i
<< 2));
1356 printk(BIOS_DEBUG
, "CAPID0[%d] = 0x%08x\n", i
, capid0
[i
]);
1358 info
->revision
= pci_read_config8(NORTHBRIDGE
, PCI_REVISION_ID
);
1359 printk(BIOS_DEBUG
, "Revision ID: 0x%x\n", info
->revision
);
1360 printk(BIOS_DEBUG
, "Device ID: 0x%x\n", pci_read_config16(NORTHBRIDGE
, PCI_DEVICE_ID
));
1362 info
->max_supported_clock_speed_index
= (~capid0
[1] & 7);
1364 if ((capid0
[1] >> 11) & 1)
1365 info
->uma_enabled
= 0;
1367 gav(info
->uma_enabled
=
1368 pci_read_config8(NORTHBRIDGE
, DEVEN
) & 8);
1369 /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
1370 info
->silicon_revision
= 0;
1372 if (capid0
[2] & 2) {
1373 info
->silicon_revision
= 0;
1374 info
->max_supported_clock_speed_index
= 2;
1375 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
1376 if (info
->populated_ranks
[channel
][0][0]
1377 && (info
->spd
[channel
][0][MODULE_TYPE
] & 0xf) ==
1379 info
->silicon_revision
= 2;
1380 info
->max_supported_clock_speed_index
= 1;
1383 switch (((capid0
[2] >> 18) & 1) + 2 * ((capid0
[1] >> 3) & 1)) {
1386 info
->silicon_revision
= 3;
1389 info
->silicon_revision
= 0;
1392 info
->silicon_revision
= 2;
1395 switch (pci_read_config16(NORTHBRIDGE
, PCI_DEVICE_ID
)) {
1397 info
->silicon_revision
= 0;
1400 info
->silicon_revision
= 1;
1406 static void write_training_data(struct raminfo
*info
)
1408 int tm
, channel
, slot
, rank
, lane
;
1409 if (info
->revision
< 8)
1412 for (tm
= 0; tm
< 4; tm
++)
1413 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
1414 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
1415 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
1416 for (lane
= 0; lane
< 9; lane
++)
1417 write_500(info
, channel
,
1421 [channel
][slot
][rank
]
1423 get_timing_register_addr
1426 write_1d0(info
->cached_training
->reg_178
, 0x178, 7, 1);
1427 write_1d0(info
->cached_training
->reg_10b
, 0x10b, 6, 1);
1430 static void dump_timings(struct raminfo
*info
)
1432 int channel
, slot
, rank
, lane
, i
;
1433 printk(RAM_SPEW
, "Timings:\n");
1434 FOR_POPULATED_RANKS
{
1435 printk(RAM_SPEW
, "channel %d, slot %d, rank %d\n", channel
,
1437 for (lane
= 0; lane
< 9; lane
++) {
1438 printk(RAM_SPEW
, "lane %d: ", lane
);
1439 for (i
= 0; i
< 4; i
++) {
1440 printk(RAM_SPEW
, "%x (%x) ",
1441 read_500(info
, channel
,
1442 get_timing_register_addr
1443 (lane
, i
, slot
, rank
),
1446 lane_timings
[i
][channel
][slot
][rank
]
1449 printk(RAM_SPEW
, "\n");
1452 printk(RAM_SPEW
, "[178] = %x (%x)\n", read_1d0(0x178, 7),
1453 info
->training
.reg_178
);
1454 printk(RAM_SPEW
, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
1455 info
->training
.reg_10b
);
1458 /* Read timings and other registers that need to be restored verbatim and
1461 static void save_timings(struct raminfo
*info
)
1463 struct ram_training train
;
1464 int channel
, slot
, rank
, lane
, i
;
1466 train
= info
->training
;
1467 FOR_POPULATED_RANKS
for (lane
= 0; lane
< 9; lane
++)
1468 for (i
= 0; i
< 4; i
++)
1469 train
.lane_timings
[i
][channel
][slot
][rank
][lane
] =
1470 read_500(info
, channel
,
1471 get_timing_register_addr(lane
, i
, slot
,
1473 train
.reg_178
= read_1d0(0x178, 7);
1474 train
.reg_10b
= read_1d0(0x10b, 6);
1476 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
1478 reg32
= mchbar_read32((channel
<< 10) + 0x274);
1479 train
.reg274265
[channel
][0] = reg32
>> 16;
1480 train
.reg274265
[channel
][1] = reg32
& 0xffff;
1481 train
.reg274265
[channel
][2] = mchbar_read16((channel
<< 10) + 0x265) >> 8;
1483 train
.reg2ca9_bit0
= mchbar_read8(0x2ca9) & 1;
1484 train
.reg_6dc
= mchbar_read32(0x6dc);
1485 train
.reg_6e8
= mchbar_read32(0x6e8);
1487 printk(RAM_SPEW
, "[6dc] = %x\n", train
.reg_6dc
);
1488 printk(RAM_SPEW
, "[6e8] = %x\n", train
.reg_6e8
);
1490 /* Save the MRC S3 restore data to cbmem */
1491 mrc_cache_stash_data(MRC_TRAINING_DATA
, MRC_CACHE_VERSION
,
1492 &train
, sizeof(train
));
1495 static const struct ram_training
*get_cached_training(void)
1497 return mrc_cache_current_mmap_leak(MRC_TRAINING_DATA
,
1502 static int have_match_ranks(struct raminfo
*info
, int channel
, int ranks
)
1504 int ranks_in_channel
;
1505 ranks_in_channel
= info
->populated_ranks
[channel
][0][0]
1506 + info
->populated_ranks
[channel
][0][1]
1507 + info
->populated_ranks
[channel
][1][0]
1508 + info
->populated_ranks
[channel
][1][1];
1511 if (ranks_in_channel
== 0)
1514 if (ranks_in_channel
!= ranks
)
1517 if (info
->populated_ranks
[channel
][0][0] !=
1518 info
->populated_ranks
[channel
][1][0])
1520 if (info
->populated_ranks
[channel
][0][1] !=
1521 info
->populated_ranks
[channel
][1][1])
1523 if (info
->is_x16_module
[channel
][0] != info
->is_x16_module
[channel
][1])
1525 if (info
->density
[channel
][0] != info
->density
[channel
][1])
1530 static void read_4090(struct raminfo
*info
)
1532 int i
, channel
, slot
, rank
, lane
;
1533 for (i
= 0; i
< 2; i
++)
1534 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
1535 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
1536 for (lane
= 0; lane
< 9; lane
++)
1538 lane_timings
[0][i
][slot
][rank
][lane
]
1541 for (i
= 1; i
< 4; i
++)
1542 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
1543 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
1544 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
1545 for (lane
= 0; lane
< 9; lane
++) {
1547 lane_timings
[i
][channel
]
1548 [slot
][rank
][lane
] =
1549 read_500(info
, channel
,
1550 get_timing_register_addr
1553 + (i
== 1) * 11; // !!!!
1557 static u32
get_etalon2(int flip
, u32 addr
)
1559 const u16 invmask
[] = {
1560 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
1561 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
1564 u32 comp4
= addr
/ 480;
1566 u32 comp1
= addr
& 0xf;
1567 u32 comp2
= (addr
>> 4) & 1;
1568 u32 comp3
= addr
>> 5;
1571 ret
= 0x1010101 << (comp4
- 1);
1574 if (flip
^ (((invmask
[comp3
] >> comp1
) ^ comp2
) & 1))
1580 static void disable_cache_region(void)
1582 msr_t msr
= {.lo
= 0, .hi
= 0 };
1584 wrmsr(MTRR_PHYS_BASE(3), msr
);
1585 wrmsr(MTRR_PHYS_MASK(3), msr
);
1588 static void enable_cache_region(unsigned int base
, unsigned int size
)
1591 msr
.lo
= base
| MTRR_TYPE_WRPROT
;
1593 wrmsr(MTRR_PHYS_BASE(3), msr
);
1594 msr
.lo
= ((~(ALIGN_DOWN(size
+ 4096, 4096) - 1) | MTRR_DEF_TYPE_EN
)
1596 msr
.hi
= 0x0000000f;
1597 wrmsr(MTRR_PHYS_MASK(3), msr
);
1600 static void flush_cache(u32 start
, u32 size
)
1605 end
= start
+ (ALIGN_DOWN(size
+ 4096, 4096));
1606 for (addr
= start
; addr
< end
; addr
+= 64)
1607 clflush((void *)(uintptr_t)addr
);
1610 static void clear_errors(void)
1612 pci_write_config8(NORTHBRIDGE
, 0xc0, 0x01);
1615 static void write_testing(struct raminfo
*info
, int totalrank
, int flip
)
1618 /* in 8-byte units. */
1622 base
= (u8
*)(uintptr_t)(totalrank
<< 28);
1623 for (offset
= 0; offset
< 9 * 480; offset
+= 2) {
1624 write32(base
+ offset
* 8, get_etalon2(flip
, offset
));
1625 write32(base
+ offset
* 8 + 4, get_etalon2(flip
, offset
));
1626 write32(base
+ offset
* 8 + 8, get_etalon2(flip
, offset
+ 1));
1627 write32(base
+ offset
* 8 + 12, get_etalon2(flip
, offset
+ 1));
1629 if (nwrites
>= 320) {
1636 static u8
check_testing(struct raminfo
*info
, u8 total_rank
, int flip
)
1640 int comp1
, comp2
, comp3
;
1641 u32 failxor
[2] = { 0, 0 };
1643 enable_cache_region((total_rank
<< 28), 1728 * 5 * 4);
1645 for (comp3
= 0; comp3
< 9 && failmask
!= 0xff; comp3
++) {
1646 for (comp1
= 0; comp1
< 4; comp1
++)
1647 for (comp2
= 0; comp2
< 60; comp2
++) {
1650 comp3
* 8 * 60 + 2 * comp1
+ 8 * comp2
;
1651 read128((total_rank
<< 28) | (curroffset
<< 3),
1654 get_etalon2(flip
, curroffset
) ^ re
[0];
1656 get_etalon2(flip
, curroffset
) ^ re
[1];
1658 get_etalon2(flip
, curroffset
| 1) ^ re
[2];
1660 get_etalon2(flip
, curroffset
| 1) ^ re
[3];
1662 for (i
= 0; i
< 8; i
++)
1663 if ((0xff << (8 * (i
% 4))) & failxor
[i
/ 4])
1666 disable_cache_region();
1667 flush_cache((total_rank
<< 28), 1728 * 5 * 4);
1671 const u32 seed1
[0x18] = {
1672 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
1673 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
1674 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
1675 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
1676 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
1677 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
1680 static u32
get_seed2(int a
, int b
)
1682 const u32 seed2
[5] = {
1683 0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
1687 r
= seed2
[(a
+ (a
>= 10)) / 5];
1691 static int make_shift(int comp2
, int comp5
, int x
)
1693 const u8 seed3
[32] = {
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
1696 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
1697 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1700 return (comp2
- ((seed3
[comp5
] >> (x
& 7)) & 1)) & 0x1f;
1703 static u32
get_etalon(int flip
, u32 addr
)
1706 int comp1
= (addr
>> 1) & 1;
1707 int comp2
= (addr
>> 3) & 0x1f;
1708 int comp3
= (addr
>> 8) & 0xf;
1709 int comp4
= (addr
>> 12) & 0xf;
1710 int comp5
= (addr
>> 16) & 0x1f;
1711 u32 mask_bit
= ~(0x10001 << comp3
);
1718 make_shift(comp2
, comp5
,
1719 (comp3
>> 3) | (comp1
<< 2) | 2)) & 1) ^ flip
;
1722 make_shift(comp2
, comp5
,
1723 (comp3
>> 3) | (comp1
<< 2) | 0)) & 1) ^ flip
;
1725 for (byte
= 0; byte
< 4; byte
++)
1726 if ((get_seed2(comp5
, comp4
) >>
1727 make_shift(comp2
, comp5
, (byte
| (comp1
<< 2)))) & 1)
1728 mask_byte
|= 0xff << (8 * byte
);
1730 return (mask_bit
& mask_byte
) | (part1
<< comp3
) | (part2
<<
1735 write_testing_type2(struct raminfo
*info
, u8 totalrank
, u8 region
, u8 block
,
1739 for (i
= 0; i
< 2048; i
++)
1740 write32p((totalrank
<< 28) | (region
<< 25) | (block
<< 16) |
1741 (i
<< 2), get_etalon(flip
, (block
<< 16) | (i
<< 2)));
1745 check_testing_type2(struct raminfo
*info
, u8 totalrank
, u8 region
, u8 block
,
1751 int comp1
, comp2
, comp3
;
1756 enable_cache_region(totalrank
<< 28, 134217728);
1757 for (comp3
= 0; comp3
< 2 && failmask
!= 0xff; comp3
++) {
1758 for (comp1
= 0; comp1
< 16; comp1
++)
1759 for (comp2
= 0; comp2
< 64; comp2
++) {
1761 (totalrank
<< 28) | (region
<< 25) | (block
1763 | (comp3
<< 12) | (comp2
<< 6) | (comp1
<<
1765 failxor
[comp1
& 1] |=
1766 read32p(addr
) ^ get_etalon(flip
, addr
);
1768 for (i
= 0; i
< 8; i
++)
1769 if ((0xff << (8 * (i
% 4))) & failxor
[i
/ 4])
1772 disable_cache_region();
1773 flush_cache((totalrank
<< 28) | (region
<< 25) | (block
<< 16), 16384);
1777 static int check_bounded(unsigned short *vals
, u16 bound
)
1781 for (i
= 0; i
< 8; i
++)
1782 if (vals
[i
] < bound
)
1788 BEFORE_USABLE
= 0, AT_USABLE
= 1, AT_MARGIN
= 2, COMPLETE
= 3
1791 static int validate_state(enum state
*in
)
1794 for (i
= 0; i
< 8; i
++)
1795 if (in
[i
] != COMPLETE
)
1801 do_fsm(enum state
*state
, u16
*counter
,
1802 u8 fail_mask
, int margin
, int uplimit
,
1803 u8
*res_low
, u8
*res_high
, u8 val
)
1807 for (lane
= 0; lane
< 8; lane
++) {
1808 int is_fail
= (fail_mask
>> lane
) & 1;
1809 switch (state
[lane
]) {
1813 state
[lane
] = AT_USABLE
;
1817 state
[lane
] = BEFORE_USABLE
;
1822 if (counter
[lane
] >= margin
) {
1823 state
[lane
] = AT_MARGIN
;
1824 res_low
[lane
] = val
- margin
+ 1;
1831 state
[lane
] = BEFORE_USABLE
;
1835 state
[lane
] = COMPLETE
;
1836 res_high
[lane
] = val
- 1;
1839 state
[lane
] = AT_MARGIN
;
1840 if (val
== uplimit
) {
1841 state
[lane
] = COMPLETE
;
1842 res_high
[lane
] = uplimit
;
1853 train_ram_at_178(struct raminfo
*info
, u8 channel
, int slot
, int rank
,
1854 u8 total_rank
, u8 reg_178
, int first_run
, int niter
,
1855 timing_bounds_t
* timings
)
1858 enum state state
[8];
1862 unsigned short num_successfully_checked
[8];
1866 for (i
= 0; i
< 8; i
++)
1867 state
[i
] = BEFORE_USABLE
;
1871 for (lane
= 0; lane
< 8; lane
++)
1872 if (timings
[reg_178
][channel
][slot
][rank
][lane
].
1874 timings
[reg_178
][channel
][slot
][rank
][lane
].
1876 timings
[reg_178
][channel
][slot
][rank
][lane
].
1878 timings
[reg_178
][channel
][slot
][rank
][lane
].
1883 for (i
= 0; i
< 8; i
++)
1884 state
[i
] = COMPLETE
;
1888 for (reg1b3
= 0; reg1b3
< 0x30 && !validate_state(state
); reg1b3
++) {
1890 write_1d0(reg1b3
^ 32, 0x1b3, 6, 1);
1891 write_1d0(reg1b3
^ 32, 0x1a3, 6, 1);
1892 failmask
= check_testing(info
, total_rank
, 0);
1893 mchbar_setbits32(0xfb0, 3 << 16);
1894 do_fsm(state
, count
, failmask
, 5, 47, lower_usable
,
1895 upper_usable
, reg1b3
);
1899 write_1d0(0, 0x1b3, 6, 1);
1900 write_1d0(0, 0x1a3, 6, 1);
1901 for (lane
= 0; lane
< 8; lane
++) {
1902 if (state
[lane
] == COMPLETE
) {
1903 timings
[reg_178
][channel
][slot
][rank
][lane
].
1905 lower_usable
[lane
] +
1907 lane_timings
[0][channel
][slot
][rank
][lane
]
1909 timings
[reg_178
][channel
][slot
][rank
][lane
].
1911 upper_usable
[lane
] +
1913 lane_timings
[0][channel
][slot
][rank
][lane
]
1920 for (lane
= 0; lane
< 8; lane
++)
1921 if (state
[lane
] == COMPLETE
) {
1922 write_500(info
, channel
,
1923 timings
[reg_178
][channel
][slot
][rank
]
1925 get_timing_register_addr(lane
, 0,
1928 write_500(info
, channel
,
1929 timings
[reg_178
][channel
][slot
][rank
]
1932 lane_timings
[1][channel
][slot
][rank
]
1936 lane_timings
[0][channel
][slot
][rank
]
1937 [lane
], get_timing_register_addr(lane
,
1942 num_successfully_checked
[lane
] = 0;
1944 num_successfully_checked
[lane
] = -1;
1948 for (i
= 0; i
< niter
; i
++) {
1949 if (failmask
== 0xFF)
1952 check_testing_type2(info
, total_rank
, 2, i
,
1955 check_testing_type2(info
, total_rank
, 3, i
,
1958 mchbar_setbits32(0xfb0, 3 << 16);
1959 for (lane
= 0; lane
< 8; lane
++)
1960 if (num_successfully_checked
[lane
] != 0xffff) {
1961 if ((1 << lane
) & failmask
) {
1962 if (timings
[reg_178
][channel
]
1965 timings
[reg_178
][channel
]
1966 [slot
][rank
][lane
].smallest
)
1967 num_successfully_checked
1970 num_successfully_checked
1976 write_500(info
, channel
,
1983 get_timing_register_addr
1987 write_500(info
, channel
,
2007 get_timing_register_addr
2013 num_successfully_checked
[lane
]
2017 while (!check_bounded(num_successfully_checked
, 2))
2020 for (lane
= 0; lane
< 8; lane
++)
2021 if (state
[lane
] == COMPLETE
) {
2022 write_500(info
, channel
,
2023 timings
[reg_178
][channel
][slot
][rank
]
2025 get_timing_register_addr(lane
, 0,
2028 write_500(info
, channel
,
2029 timings
[reg_178
][channel
][slot
][rank
]
2032 lane_timings
[1][channel
][slot
][rank
]
2036 lane_timings
[0][channel
][slot
][rank
]
2037 [lane
], get_timing_register_addr(lane
,
2042 num_successfully_checked
[lane
] = 0;
2044 num_successfully_checked
[lane
] = -1;
2048 for (i
= 0; i
< niter
; i
++) {
2049 if (failmask
== 0xFF)
2052 check_testing_type2(info
, total_rank
, 2, i
,
2055 check_testing_type2(info
, total_rank
, 3, i
,
2059 mchbar_setbits32(0xfb0, 3 << 16);
2060 for (lane
= 0; lane
< 8; lane
++) {
2061 if (num_successfully_checked
[lane
] != 0xffff) {
2062 if ((1 << lane
) & failmask
) {
2063 if (timings
[reg_178
][channel
]
2066 timings
[reg_178
][channel
]
2069 num_successfully_checked
2072 num_successfully_checked
2078 write_500(info
, channel
,
2085 get_timing_register_addr
2089 write_500(info
, channel
,
2109 get_timing_register_addr
2115 num_successfully_checked
[lane
]
2120 while (!check_bounded(num_successfully_checked
, 3))
2123 for (lane
= 0; lane
< 8; lane
++) {
2124 write_500(info
, channel
,
2126 lane_timings
[0][channel
][slot
][rank
][lane
],
2127 get_timing_register_addr(lane
, 0, slot
, rank
),
2129 write_500(info
, channel
,
2131 lane_timings
[1][channel
][slot
][rank
][lane
],
2132 get_timing_register_addr(lane
, 1, slot
, rank
),
2134 if (timings
[reg_178
][channel
][slot
][rank
][lane
].
2136 timings
[reg_178
][channel
][slot
][rank
][lane
].
2138 timings
[reg_178
][channel
][slot
][rank
][lane
].
2140 timings
[reg_178
][channel
][slot
][rank
][lane
].
2147 static void set_10b(struct raminfo
*info
, u8 val
)
2153 if (read_1d0(0x10b, 6) == val
)
2156 write_1d0(val
, 0x10b, 6, 1);
2158 FOR_POPULATED_RANKS_BACKWARDS
for (lane
= 0; lane
< 9; lane
++) {
2160 reg_500
= read_500(info
, channel
,
2161 get_timing_register_addr(lane
, 0, slot
,
2164 if (lut16
[info
->clock_speed_index
] <= reg_500
)
2165 reg_500
-= lut16
[info
->clock_speed_index
];
2169 reg_500
+= lut16
[info
->clock_speed_index
];
2171 write_500(info
, channel
, reg_500
,
2172 get_timing_register_addr(lane
, 0, slot
, rank
), 9, 1);
2176 static void set_ecc(int onoff
)
2179 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
2181 t
= mchbar_read8((channel
<< 10) + 0x5f8);
2186 mchbar_write8((channel
<< 10) + 0x5f8, t
);
2190 static void set_178(u8 val
)
2197 write_1d0(2 * val
, 0x178, 7, 1);
2201 write_500_timings_type(struct raminfo
*info
, int channel
, int slot
, int rank
,
2206 for (lane
= 0; lane
< 8; lane
++)
2207 write_500(info
, channel
,
2209 lane_timings
[type
][channel
][slot
][rank
][lane
],
2210 get_timing_register_addr(lane
, type
, slot
, rank
), 9,
2215 try_timing_offsets(struct raminfo
*info
, int channel
,
2216 int slot
, int rank
, int totalrank
)
2219 enum state state
[8];
2220 u8 lower_usable
[8], upper_usable
[8];
2226 for (i
= 0; i
< 8; i
++)
2227 state
[i
] = BEFORE_USABLE
;
2229 memset(count
, 0, sizeof(count
));
2231 for (lane
= 0; lane
< 8; lane
++)
2232 write_500(info
, channel
,
2234 lane_timings
[2][channel
][slot
][rank
][lane
] + 32,
2235 get_timing_register_addr(lane
, 3, slot
, rank
), 9, 1);
2237 for (timing_offset
= 0; !validate_state(state
) && timing_offset
< 64;
2240 write_1d0(timing_offset
^ 32, 0x1bb, 6, 1);
2242 for (i
= 0; i
< 2 && failmask
!= 0xff; i
++) {
2244 write_testing(info
, totalrank
, flip
);
2245 failmask
|= check_testing(info
, totalrank
, flip
);
2247 do_fsm(state
, count
, failmask
, 10, 63, lower_usable
,
2248 upper_usable
, timing_offset
);
2250 write_1d0(0, 0x1bb, 6, 1);
2252 if (!validate_state(state
))
2253 die("Couldn't discover DRAM timings (1)\n");
2255 for (lane
= 0; lane
< 8; lane
++) {
2258 if (info
->silicon_revision
) {
2261 usable_length
= upper_usable
[lane
] - lower_usable
[lane
];
2262 if (usable_length
>= 20) {
2263 bias
= usable_length
/ 2 - 10;
2268 write_500(info
, channel
,
2270 lane_timings
[2][channel
][slot
][rank
][lane
] +
2271 (upper_usable
[lane
] + lower_usable
[lane
]) / 2 - bias
,
2272 get_timing_register_addr(lane
, 3, slot
, rank
), 9, 1);
2273 info
->training
.timing2_bounds
[channel
][slot
][rank
][lane
][0] =
2274 info
->training
.lane_timings
[2][channel
][slot
][rank
][lane
] +
2276 info
->training
.timing2_bounds
[channel
][slot
][rank
][lane
][1] =
2277 info
->training
.lane_timings
[2][channel
][slot
][rank
][lane
] +
2279 info
->training
.timing2_offset
[channel
][slot
][rank
][lane
] =
2280 info
->training
.lane_timings
[2][channel
][slot
][rank
][lane
];
2285 choose_training(struct raminfo
*info
, int channel
, int slot
, int rank
,
2286 int lane
, timing_bounds_t
* timings
, u8 center_178
)
2290 unsigned int sum
= 0, count
= 0;
2292 u8 lower_margin
, upper_margin
;
2297 central_weight
= 20;
2299 if (info
->silicon_revision
== 1 && channel
== 1) {
2303 populated_ranks_mask
[1] ^ (info
->
2304 populated_ranks_mask
[1] >> 2)) &
2308 if ((info
->populated_ranks_mask
[0] & 5) == 5) {
2309 central_weight
= 20;
2312 if (info
->clock_speed_index
>= 2
2313 && (info
->populated_ranks_mask
[0] & 5) == 5 && slot
== 1) {
2314 if (info
->silicon_revision
== 1) {
2318 central_weight
= 10;
2325 central_weight
= 20;
2330 if (info
->silicon_revision
== 0 && channel
== 0 && lane
== 0) {
2332 central_weight
= 20;
2335 for (reg_178
= center_178
- span
; reg_178
<= center_178
+ span
;
2339 largest
= timings
[reg_178
][channel
][slot
][rank
][lane
].largest
;
2340 smallest
= timings
[reg_178
][channel
][slot
][rank
][lane
].smallest
;
2341 if (largest
- smallest
+ 1 >= 5) {
2342 unsigned int weight
;
2343 if (reg_178
== center_178
)
2344 weight
= central_weight
;
2346 weight
= side_weight
;
2347 sum
+= weight
* (largest
+ smallest
);
2353 die("Couldn't discover DRAM timings (2)\n");
2354 result
= sum
/ (2 * count
);
2356 result
- timings
[center_178
][channel
][slot
][rank
][lane
].smallest
;
2358 timings
[center_178
][channel
][slot
][rank
][lane
].largest
- result
;
2359 if (upper_margin
< 10 && lower_margin
> 10)
2360 result
-= MIN(lower_margin
- 10, 10 - upper_margin
);
2361 if (upper_margin
> 10 && lower_margin
< 10)
2362 result
+= MIN(upper_margin
- 10, 10 - lower_margin
);
2366 #define STANDARD_MIN_MARGIN 5
2368 static u8
choose_reg178(struct raminfo
*info
, timing_bounds_t
* timings
)
2371 int lane
, rank
, slot
, channel
;
2373 int count
= 0, sum
= 0;
2375 for (reg178
= reg178_min
[info
->clock_speed_index
];
2376 reg178
< reg178_max
[info
->clock_speed_index
];
2377 reg178
+= reg178_step
[info
->clock_speed_index
]) {
2378 margin
[reg178
] = -1;
2379 FOR_POPULATED_RANKS_BACKWARDS
for (lane
= 0; lane
< 8; lane
++) {
2381 timings
[reg178
][channel
][slot
][rank
][lane
].largest
-
2382 timings
[reg178
][channel
][slot
][rank
][lane
].
2384 if (curmargin
< margin
[reg178
])
2385 margin
[reg178
] = curmargin
;
2387 if (margin
[reg178
] >= STANDARD_MIN_MARGIN
) {
2389 weight
= margin
[reg178
] - STANDARD_MIN_MARGIN
;
2390 sum
+= weight
* reg178
;
2396 die("Couldn't discover DRAM timings (3)\n");
2400 for (threshold
= 30; threshold
>= 5; threshold
--) {
2401 int usable_length
= 0;
2402 int smallest_fount
= 0;
2403 for (reg178
= reg178_min
[info
->clock_speed_index
];
2404 reg178
< reg178_max
[info
->clock_speed_index
];
2405 reg178
+= reg178_step
[info
->clock_speed_index
])
2406 if (margin
[reg178
] >= threshold
) {
2408 reg178_step
[info
->clock_speed_index
];
2409 info
->training
.reg178_largest
=
2411 2 * reg178_step
[info
->clock_speed_index
];
2413 if (!smallest_fount
) {
2415 info
->training
.reg178_smallest
=
2421 if (usable_length
>= 0x21)
2428 static int check_cached_sanity(struct raminfo
*info
)
2434 if (!info
->cached_training
)
2437 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
2438 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
2439 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
2440 for (lane
= 0; lane
< 8 + info
->use_ecc
; lane
++) {
2441 u16 cached_value
, estimation_value
;
2443 info
->cached_training
->
2444 lane_timings
[1][channel
][slot
][rank
]
2446 if (cached_value
>= 0x18
2447 && cached_value
<= 0x1E7) {
2450 lane_timings
[1][channel
]
2452 if (estimation_value
<
2455 if (estimation_value
>
2463 static int try_cached_training(struct raminfo
*info
)
2468 int channel
, slot
, rank
, lane
;
2472 if (!check_cached_sanity(info
))
2475 info
->training
.reg178_center
= info
->cached_training
->reg178_center
;
2476 info
->training
.reg178_smallest
= info
->cached_training
->reg178_smallest
;
2477 info
->training
.reg178_largest
= info
->cached_training
->reg178_largest
;
2478 memcpy(&info
->training
.timing_bounds
,
2479 &info
->cached_training
->timing_bounds
,
2480 sizeof(info
->training
.timing_bounds
));
2481 memcpy(&info
->training
.timing_offset
,
2482 &info
->cached_training
->timing_offset
,
2483 sizeof(info
->training
.timing_offset
));
2485 write_1d0(2, 0x142, 3, 1);
2486 saved_243
[0] = mchbar_read8(0x243);
2487 saved_243
[1] = mchbar_read8(0x643);
2488 mchbar_write8(0x243, saved_243
[0] | 2);
2489 mchbar_write8(0x643, saved_243
[1] | 2);
2491 pci_write_config16(NORTHBRIDGE
, 0xc8, 3);
2492 if (read_1d0(0x10b, 6) & 1)
2494 for (tm
= 0; tm
< 2; tm
++) {
2497 set_178(tm
? info
->cached_training
->reg178_largest
: info
->
2498 cached_training
->reg178_smallest
);
2501 /* Check timing ranges. With i == 0 we check smallest one and with
2502 i == 1 the largest bound. With j == 0 we check that on the bound
2503 it still works whereas with j == 1 we check that just outside of
2506 FOR_POPULATED_RANKS_BACKWARDS
{
2507 for (i
= 0; i
< 2; i
++) {
2508 for (lane
= 0; lane
< 8; lane
++) {
2509 write_500(info
, channel
,
2510 info
->cached_training
->
2511 timing2_bounds
[channel
][slot
]
2513 get_timing_register_addr(lane
,
2520 write_500(info
, channel
,
2524 [channel
][slot
][rank
]
2526 get_timing_register_addr
2527 (lane
, 2, slot
, rank
),
2529 write_500(info
, channel
,
2530 i
? info
->cached_training
->
2531 timing_bounds
[tm
][channel
]
2535 timing_bounds
[tm
][channel
]
2536 [slot
][rank
][lane
].smallest
,
2537 get_timing_register_addr(lane
,
2542 write_500(info
, channel
,
2543 info
->cached_training
->
2544 timing_offset
[channel
][slot
]
2546 (i
? info
->cached_training
->
2547 timing_bounds
[tm
][channel
]
2551 timing_bounds
[tm
][channel
]
2554 get_timing_register_addr(lane
,
2560 for (j
= 0; j
< 2; j
++) {
2562 u8 expected_failmask
;
2565 reg1b3
= (j
== 1) + 4;
2567 j
== i
? reg1b3
: (-reg1b3
) & 0x3f;
2568 write_1d0(reg1b3
, 0x1bb, 6, 1);
2569 write_1d0(reg1b3
, 0x1b3, 6, 1);
2570 write_1d0(reg1b3
, 0x1a3, 6, 1);
2573 write_testing(info
, totalrank
, flip
);
2575 check_testing(info
, totalrank
,
2578 j
== 0 ? 0x00 : 0xff;
2579 if (failmask
!= expected_failmask
)
2587 set_178(info
->cached_training
->reg178_center
);
2590 write_training_data(info
);
2591 write_1d0(0, 322, 3, 1);
2592 info
->training
= *info
->cached_training
;
2594 write_1d0(0, 0x1bb, 6, 1);
2595 write_1d0(0, 0x1b3, 6, 1);
2596 write_1d0(0, 0x1a3, 6, 1);
2597 mchbar_write8(0x243, saved_243
[0]);
2598 mchbar_write8(0x643, saved_243
[1]);
2603 FOR_POPULATED_RANKS
{
2604 write_500_timings_type(info
, channel
, slot
, rank
, 1);
2605 write_500_timings_type(info
, channel
, slot
, rank
, 2);
2606 write_500_timings_type(info
, channel
, slot
, rank
, 3);
2609 write_1d0(0, 0x1bb, 6, 1);
2610 write_1d0(0, 0x1b3, 6, 1);
2611 write_1d0(0, 0x1a3, 6, 1);
2612 mchbar_write8(0x243, saved_243
[0]);
2613 mchbar_write8(0x643, saved_243
[1]);
2618 static void do_ram_training(struct raminfo
*info
)
2625 timing_bounds_t
*timings
= timings_car
;
2626 int lane
, rank
, slot
, channel
;
2629 write_1d0(2, 0x142, 3, 1);
2630 saved_243
[0] = mchbar_read8(0x243);
2631 saved_243
[1] = mchbar_read8(0x643);
2632 mchbar_write8(0x243, saved_243
[0] | 2);
2633 mchbar_write8(0x643, saved_243
[1] | 2);
2634 switch (info
->clock_speed_index
) {
2647 FOR_POPULATED_RANKS_BACKWARDS
{
2650 write_500_timings_type(info
, channel
, slot
, rank
, 0);
2652 write_testing(info
, totalrank
, 0);
2653 for (i
= 0; i
< niter
; i
++) {
2654 write_testing_type2(info
, totalrank
, 2, i
, 0);
2655 write_testing_type2(info
, totalrank
, 3, i
, 1);
2657 pci_write_config8(NORTHBRIDGE
, 0xc0, 0x01);
2661 if (reg178_min
[info
->clock_speed_index
] <
2662 reg178_max
[info
->clock_speed_index
])
2663 memset(timings
[reg178_min
[info
->clock_speed_index
]], 0,
2664 sizeof(timings
[0]) *
2665 (reg178_max
[info
->clock_speed_index
] -
2666 reg178_min
[info
->clock_speed_index
]));
2667 for (reg_178
= reg178_min
[info
->clock_speed_index
];
2668 reg_178
< reg178_max
[info
->clock_speed_index
];
2669 reg_178
+= reg178_step
[info
->clock_speed_index
]) {
2672 for (channel
= NUM_CHANNELS
- 1; channel
>= 0; channel
--)
2673 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
2674 for (rank
= 0; rank
< NUM_RANKS
; rank
++) {
2675 memset(&timings
[reg_178
][channel
][slot
]
2676 [rank
][0].smallest
, 0, 16);
2678 populated_ranks
[channel
][slot
]
2680 train_ram_at_178(info
, channel
,
2691 reg178_center
= choose_reg178(info
, timings
);
2693 FOR_POPULATED_RANKS_BACKWARDS
for (lane
= 0; lane
< 8; lane
++) {
2694 info
->training
.timing_bounds
[0][channel
][slot
][rank
][lane
].
2696 timings
[info
->training
.
2697 reg178_smallest
][channel
][slot
][rank
][lane
].
2699 info
->training
.timing_bounds
[0][channel
][slot
][rank
][lane
].
2701 timings
[info
->training
.
2702 reg178_smallest
][channel
][slot
][rank
][lane
].largest
;
2703 info
->training
.timing_bounds
[1][channel
][slot
][rank
][lane
].
2705 timings
[info
->training
.
2706 reg178_largest
][channel
][slot
][rank
][lane
].smallest
;
2707 info
->training
.timing_bounds
[1][channel
][slot
][rank
][lane
].
2709 timings
[info
->training
.
2710 reg178_largest
][channel
][slot
][rank
][lane
].largest
;
2711 info
->training
.timing_offset
[channel
][slot
][rank
][lane
] =
2712 info
->training
.lane_timings
[1][channel
][slot
][rank
][lane
]
2714 info
->training
.lane_timings
[0][channel
][slot
][rank
][lane
] +
2718 if (info
->silicon_revision
== 1
2720 populated_ranks_mask
[1] ^ (info
->
2721 populated_ranks_mask
[1] >> 2)) & 1) {
2722 int ranks_after_channel1
;
2725 for (reg_178
= reg178_center
- 18;
2726 reg_178
<= reg178_center
+ 18; reg_178
+= 18) {
2729 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
2730 for (rank
= 0; rank
< NUM_RANKS
; rank
++) {
2732 populated_ranks
[1][slot
][rank
]) {
2733 train_ram_at_178(info
, 1, slot
,
2743 ranks_after_channel1
= totalrank
;
2745 for (reg_178
= reg178_center
- 12;
2746 reg_178
<= reg178_center
+ 12; reg_178
+= 12) {
2747 totalrank
= ranks_after_channel1
;
2749 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
2750 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
2752 populated_ranks
[0][slot
][rank
]) {
2753 train_ram_at_178(info
, 0, slot
,
2763 for (reg_178
= reg178_center
- 12;
2764 reg_178
<= reg178_center
+ 12; reg_178
+= 12) {
2767 FOR_POPULATED_RANKS_BACKWARDS
{
2768 train_ram_at_178(info
, channel
, slot
, rank
,
2769 totalrank
, reg_178
, 0, niter
,
2776 set_178(reg178_center
);
2777 FOR_POPULATED_RANKS_BACKWARDS
for (lane
= 0; lane
< 8; lane
++) {
2781 choose_training(info
, channel
, slot
, rank
, lane
, timings
,
2783 write_500(info
, channel
, tm0
,
2784 get_timing_register_addr(lane
, 0, slot
, rank
), 9, 1);
2785 write_500(info
, channel
,
2788 lane_timings
[1][channel
][slot
][rank
][lane
] -
2790 lane_timings
[0][channel
][slot
][rank
][lane
],
2791 get_timing_register_addr(lane
, 1, slot
, rank
), 9, 1);
2795 FOR_POPULATED_RANKS_BACKWARDS
{
2796 try_timing_offsets(info
, channel
, slot
, rank
, totalrank
);
2799 mchbar_write8(0x243, saved_243
[0]);
2800 mchbar_write8(0x643, saved_243
[1]);
2801 write_1d0(0, 0x142, 3, 1);
2802 info
->training
.reg178_center
= reg178_center
;
2805 static void ram_training(struct raminfo
*info
)
2809 saved_fc4
= mchbar_read16(0xfc4);
2810 mchbar_write16(0xfc4, 0xffff);
2812 if (info
->revision
>= 8)
2815 if (!try_cached_training(info
))
2816 do_ram_training(info
);
2817 if ((info
->silicon_revision
== 2 || info
->silicon_revision
== 3)
2818 && info
->clock_speed_index
< 2)
2820 mchbar_write16(0xfc4, saved_fc4
);
2823 u16
get_max_timing(struct raminfo
*info
, int channel
)
2825 int slot
, rank
, lane
;
2828 if ((mchbar_read8(0x2ca8) >> 2) < 1)
2831 if (info
->revision
< 8)
2834 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
2835 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
2836 if (info
->populated_ranks
[channel
][slot
][rank
])
2837 for (lane
= 0; lane
< 8 + info
->use_ecc
; lane
++)
2838 ret
= MAX(ret
, read_500(info
, channel
,
2839 get_timing_register_addr
2845 static void dmi_setup(void)
2847 gav(dmibar_read8(0x254));
2848 dmibar_write8(0x254, 1 << 0);
2849 dmibar_write16(0x1b8, 0x18f2);
2850 mchbar_clrsetbits16(0x48, ~0, 1 << 1);
2852 dmibar_setbits32(0xd68, 1 << 27);
2854 outl((gav(inl(DEFAULT_GPIOBASE
| 0x38)) & ~0x140000) | 0x400000,
2855 DEFAULT_GPIOBASE
| 0x38);
2856 gav(inb(DEFAULT_GPIOBASE
| 0xe)); // = 0xfdcaff6e
2859 void chipset_init(const int s3resume
)
2865 x2ca8
= mchbar_read8(0x2ca8);
2866 if ((x2ca8
& 1) || (x2ca8
== 8 && !s3resume
)) {
2867 printk(BIOS_DEBUG
, "soft reset detected, rebooting properly\n");
2868 mchbar_write8(0x2ca8, 0);
2874 mchbar_write16(0x1170, 0xa880);
2875 mchbar_write8(0x11c1, 1 << 0);
2876 mchbar_write16(0x1170, 0xb880);
2877 mchbar_clrsetbits8(0x1210, ~0, 0x84);
2879 gfxsize
= get_uint_option("gfx_uma_size", 0); /* 0 for 32MB */
2881 ggc
= 0xb00 | ((gfxsize
+ 5) << 4);
2883 pci_write_config16(NORTHBRIDGE
, GGC
, ggc
| 2);
2886 deven
= pci_read_config16(NORTHBRIDGE
, DEVEN
); // = 0x3
2889 mchbar_write8(0x2c30, 1 << 5);
2890 pci_read_config8(NORTHBRIDGE
, 0x8); // = 0x18
2891 mchbar_setbits16(0x2c30, 1 << 9);
2892 mchbar_write16(0x2c32, 0x434);
2893 mchbar_clrsetbits32(0x2c44, ~0, 0x1053687);
2894 pci_read_config8(GMA
, MSAC
); // = 0x2
2895 pci_write_config8(GMA
, MSAC
, 0x2);
2897 RCBA8(0x2318) = 0x47;
2899 RCBA8(0x2320) = 0xfc;
2902 mchbar_clrsetbits32(0x30, ~0, 0x40);
2904 pci_write_config16(NORTHBRIDGE
, GGC
, ggc
);
2905 gav(RCBA32(0x3428));
2906 RCBA32(0x3428) = 0x1d;
2909 static u8
get_bits_420(const u32 reg32
)
2912 val
|= (reg32
>> 4) & (1 << 0);
2913 val
|= (reg32
>> 2) & (1 << 1);
2914 val
|= (reg32
>> 0) & (1 << 2);
2918 void raminit(const int s3resume
, const u8
*spd_addrmap
)
2920 unsigned int channel
, slot
, lane
, rank
;
2921 struct raminfo info
;
2923 int cbmem_wasnot_inited
;
2925 x2ca8
= mchbar_read8(0x2ca8);
2927 printk(RAM_DEBUG
, "Scratchpad MCHBAR8(0x2ca8): 0x%04x\n", x2ca8
);
2929 memset(&info
, 0x5a, sizeof(info
));
2931 info
.last_500_command
[0] = 0;
2932 info
.last_500_command
[1] = 0;
2934 info
.board_lane_delay
[0] = 0x14;
2935 info
.board_lane_delay
[1] = 0x07;
2936 info
.board_lane_delay
[2] = 0x07;
2937 info
.board_lane_delay
[3] = 0x08;
2938 info
.board_lane_delay
[4] = 0x56;
2939 info
.board_lane_delay
[5] = 0x04;
2940 info
.board_lane_delay
[6] = 0x04;
2941 info
.board_lane_delay
[7] = 0x05;
2942 info
.board_lane_delay
[8] = 0x10;
2944 info
.training
.reg_178
= 0;
2945 info
.training
.reg_10b
= 0;
2947 /* Wait for some bit, maybe TXT clear. */
2948 while (!(read8((u8
*)0xfed40000) & (1 << 7)))
2951 /* Wait for ME to be ready */
2952 intel_early_me_init();
2953 info
.memory_reserved_for_heci_mb
= intel_early_me_uma_size();
2956 timestamp_add_now(101);
2958 if (!s3resume
|| 1) { // possible error
2959 memset(&info
.populated_ranks
, 0, sizeof(info
.populated_ranks
));
2962 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
2963 for (slot
= 0; slot
< NUM_SLOTS
; slot
++) {
2967 const u8 useful_addresses
[] = {
2979 0x11, 0x12, 0x13, 0x14, 0x15,
2980 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b,
2982 THERMAL_AND_REFRESH
,
2984 REFERENCE_RAW_CARD_USED
,
2985 RANK1_ADDRESS_MAPPING
,
2986 0x75, 0x76, 0x77, 0x78,
2987 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
2988 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84,
2989 0x85, 0x86, 0x87, 0x88,
2990 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
2991 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
2994 if (!spd_addrmap
[2 * channel
+ slot
])
2996 for (try = 0; try < 5; try++) {
2997 v
= smbus_read_byte(spd_addrmap
[2 * channel
+ slot
],
3006 ARRAY_SIZE(useful_addresses
); addr
++)
3008 spd
[channel
][0][useful_addresses
3010 smbus_read_byte(spd_addrmap
[2 * channel
+ slot
],
3013 if (info
.spd
[channel
][0][DEVICE_TYPE
] != 11)
3014 die("Only DDR3 is supported");
3016 v
= info
.spd
[channel
][0][RANKS_AND_DQ
];
3017 info
.populated_ranks
[channel
][0][0] = 1;
3018 info
.populated_ranks
[channel
][0][1] =
3020 if (((v
>> 3) & 7) > 1)
3021 die("At most 2 ranks are supported");
3022 if ((v
& 7) == 0 || (v
& 7) > 2)
3023 die("Only x8 and x16 modules are supported");
3025 spd
[channel
][slot
][MODULE_TYPE
] & 0xF) != 2
3027 spd
[channel
][slot
][MODULE_TYPE
] & 0xF)
3029 die("Registered memory is not supported");
3030 info
.is_x16_module
[channel
][0] = (v
& 7) - 1;
3031 info
.density
[channel
][slot
] =
3032 info
.spd
[channel
][slot
][DENSITY
] & 0xF;
3035 spd
[channel
][slot
][MEMORY_BUS_WIDTH
] &
3042 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3044 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
3045 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
3047 populated_ranks
[channel
][slot
][rank
]
3048 << (2 * slot
+ rank
);
3049 info
.populated_ranks_mask
[channel
] = v
;
3054 gav(pci_read_config32(NORTHBRIDGE
, CAPID0
+ 4));
3058 timestamp_add_now(102);
3060 mchbar_clrbits8(0x2ca8, 1 << 1 | 1 << 0);
3062 collect_system_info(&info
);
3063 calculate_timings(&info
);
3066 u8 reg8
= pci_read_config8(SOUTHBRIDGE
, GEN_PMCON_2
);
3067 if (x2ca8
== 0 && (reg8
& 0x80)) {
3068 /* Don't enable S4-assertion stretch. Makes trouble on roda/rk9.
3069 reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4);
3070 pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08);
3075 pci_write_config8(SOUTHBRIDGE
, GEN_PMCON_2
,
3076 (reg8
& ~(1 << 7)));
3079 "Interrupted RAM init, reset required.\n");
3084 if (!s3resume
&& x2ca8
== 0)
3085 pci_write_config8(SOUTHBRIDGE
, GEN_PMCON_2
,
3086 pci_read_config8(SOUTHBRIDGE
, GEN_PMCON_2
) | 0x80);
3088 compute_derived_timings(&info
);
3090 early_quickpath_init(&info
, x2ca8
);
3092 info
.cached_training
= get_cached_training();
3095 late_quickpath_init(&info
, s3resume
);
3097 mchbar_setbits32(0x2c80, 1 << 24);
3098 mchbar_write32(0x1804, mchbar_read32(0x1c04) & ~(1 << 27));
3100 mchbar_read8(0x2ca8); // !!!!
3103 mchbar_clrbits8(0x2ca8, 3);
3104 mchbar_write8(0x2ca8, mchbar_read8(0x2ca8) + 4); // "+" or "|"?
3105 /* This issues a CPU reset without resetting the platform */
3106 printk(BIOS_DEBUG
, "Issuing a CPU reset\n");
3107 /* Write back the S3 state to PM1_CNT to let the reset CPU
3108 know it also needs to take the s3 path. */
3110 write_pmbase32(PM1_CNT
, read_pmbase32(PM1_CNT
)
3111 | (SLP_TYP_S3
<< 10));
3112 mchbar_setbits32(0x1af0, 1 << 4);
3116 mchbar_clrbits8(0x2ca8, 0); // !!!!
3118 mchbar_clrbits32(0x2c80, 1 << 24);
3120 pci_write_config32(QPI_NON_CORE
, MAX_RTIDS
, 0x20220);
3123 u8 x2c20
= (mchbar_read16(0x2c20) >> 8) & 3;
3124 u16 x2c10
= mchbar_read16(0x2c10);
3125 u16 value
= mchbar_read16(0x2c00);
3126 if (x2c20
== 0 && (x2c10
& 0x300) == 0)
3131 mchbar_write16(0x2c00, value
);
3134 udelay(1000); // !!!!
3136 write_1d0(0, 0x33d, 0, 0);
3137 write_500(&info
, 0, 0, 0xb61, 0, 0);
3138 write_500(&info
, 1, 0, 0xb61, 0, 0);
3139 mchbar_write32(0x1a30, 0);
3140 mchbar_write32(0x1a34, 0);
3141 mchbar_write16(0x614, 0xb5b | (info
.populated_ranks
[1][0][0] * 0x404) |
3142 (info
.populated_ranks
[0][0][0] * 0xa0));
3143 mchbar_write16(0x616, 0x26a);
3144 mchbar_write32(0x134, 0x856000);
3145 mchbar_write32(0x160, 0x5ffffff);
3146 mchbar_clrsetbits32(0x114, ~0, 0xc2024440); // !!!!
3147 mchbar_clrsetbits32(0x118, ~0, 0x4); // !!!!
3148 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3149 mchbar_write32(0x260 + (channel
<< 10), 0x30809ff |
3150 (info
.populated_ranks_mask
[channel
] & 3) << 20);
3151 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3152 mchbar_write16(0x31c + (channel
<< 10), 0x101);
3153 mchbar_write16(0x360 + (channel
<< 10), 0x909);
3154 mchbar_write16(0x3a4 + (channel
<< 10), 0x101);
3155 mchbar_write16(0x3e8 + (channel
<< 10), 0x101);
3156 mchbar_write32(0x320 + (channel
<< 10), 0x29002900);
3157 mchbar_write32(0x324 + (channel
<< 10), 0);
3158 mchbar_write32(0x368 + (channel
<< 10), 0x32003200);
3159 mchbar_write16(0x352 + (channel
<< 10), 0x505);
3160 mchbar_write16(0x354 + (channel
<< 10), 0x3c3c);
3161 mchbar_write16(0x356 + (channel
<< 10), 0x1040);
3162 mchbar_write16(0x39a + (channel
<< 10), 0x73e4);
3163 mchbar_write16(0x3de + (channel
<< 10), 0x77ed);
3164 mchbar_write16(0x422 + (channel
<< 10), 0x1040);
3167 write_1d0(0x4, 0x151, 4, 1);
3168 write_1d0(0, 0x142, 3, 1);
3169 rdmsr(0x1ac); // !!!!
3170 write_500(&info
, 1, 1, 0x6b3, 4, 1);
3171 write_500(&info
, 1, 1, 0x6cf, 4, 1);
3173 rmw_1d0(0x21c, 0x38, 0, 6);
3175 write_1d0(((!info
.populated_ranks
[1][0][0]) << 1) | ((!info
.
3179 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3180 mchbar_write16(0x38e + (channel
<< 10), 0x5f5f);
3181 mchbar_write16(0x3d2 + (channel
<< 10), 0x5f5f);
3186 program_base_timings(&info
);
3188 mchbar_setbits8(0x5ff, 1 << 7);
3190 write_1d0(0x2, 0x1d5, 2, 1);
3191 write_1d0(0x20, 0x166, 7, 1);
3192 write_1d0(0x0, 0xeb, 3, 1);
3193 write_1d0(0x0, 0xf3, 6, 1);
3195 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3197 if (info
.populated_ranks
[channel
][0][1] && info
.clock_speed_index
> 1)
3199 if (info
.silicon_revision
== 0 || info
.silicon_revision
== 1)
3202 for (lane
= 0; lane
< 9; lane
++) {
3203 const u16 addr
= 0x125 + get_lane_offset(0, 0, lane
);
3204 rmw_500(&info
, channel
, addr
, 6, 0xf, a
);
3209 if (!info
.cached_training
) {
3212 "Couldn't find training data. Rebooting\n");
3213 reg32
= inl(DEFAULT_PMBASE
+ 0x04);
3214 outl(reg32
& ~(7 << 10), DEFAULT_PMBASE
+ 0x04);
3218 info
.training
= *info
.cached_training
;
3219 for (tm
= 0; tm
< 4; tm
++)
3220 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3221 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
3222 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
3223 for (lane
= 0; lane
< 9; lane
++)
3231 get_timing_register_addr
3235 write_1d0(info
.cached_training
->reg_178
, 0x178, 7, 1);
3236 write_1d0(info
.cached_training
->reg_10b
, 0x10b, 6, 1);
3239 mchbar_clrsetbits32(0x1f4, ~0, 1 << 17); // !!!!
3240 mchbar_write32(0x1f0, 0x1d000200);
3241 mchbar_setbits8(0x1f0, 1 << 0);
3242 while (mchbar_read8(0x1f0) & 1)
3245 program_board_delay(&info
);
3247 mchbar_write8(0x5ff, 0);
3248 mchbar_write8(0x5ff, 1 << 7);
3249 mchbar_write8(0x5f4, 1 << 0);
3251 mchbar_clrbits32(0x130, 1 << 1); // | 2 when ?
3252 while (mchbar_read32(0x130) & 1)
3255 rmw_1d0(0x14b, 0x47, 0x30, 7);
3256 rmw_1d0(0xd6, 0x38, 7, 6);
3257 rmw_1d0(0x328, 0x38, 7, 6);
3259 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3260 set_4cf(&info
, channel
, 1, 0);
3262 rmw_1d0(0x116, 0xe, 0, 4);
3263 rmw_1d0(0xae, 0x3e, 0, 6);
3264 rmw_1d0(0x300, 0x3e, 0, 6);
3265 mchbar_clrbits16(0x356, 1 << 15);
3266 mchbar_clrbits16(0x756, 1 << 15);
3267 mchbar_clrbits32(0x140, 7 << 24);
3268 mchbar_clrbits32(0x138, 7 << 24);
3269 mchbar_write32(0x130, 0x31111301);
3270 /* Wait until REG130b0 is 1. */
3271 while (mchbar_read32(0x130) & 1)
3276 const u8 val_xa1
= get_bits_420(read_1d0(0xa1, 6)); // = 0x1cf4040 // !!!!
3277 const u8 val_2f3
= get_bits_420(read_1d0(0x2f3, 6)); // = 0x10a4040 // !!!!
3279 rmw_1d0(0x320, 0x38, val_2f3
, 6);
3280 rmw_1d0(0x14b, 0x78, val_xa1
, 7);
3281 rmw_1d0(0xce, 0x38, val_xa1
, 6);
3284 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3285 set_4cf(&info
, channel
, 1, 1);
3287 rmw_1d0(0x116, 0xe, 1, 4); // = 0x4040432 // !!!!
3289 if ((mchbar_read32(0x144) & 0x1f) < 0x13)
3297 write_1d0(2, 0xae, 6, 1);
3298 write_1d0(2, 0x300, 6, 1);
3299 write_1d0(value_a1
, 0x121, 3, 1);
3300 rmw_1d0(0xd6, 0x38, 4, 6);
3301 rmw_1d0(0x328, 0x38, 4, 6);
3304 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3305 set_4cf(&info
, channel
, 2, 0);
3307 mchbar_write32(0x130, 0x11111301 | info
.populated_ranks
[1][0][0] << 30 |
3308 info
.populated_ranks
[0][0][0] << 29);
3309 while (mchbar_read8(0x130) & 1)
3313 const u8 val_xa1
= get_bits_420(read_1d0(0xa1, 6));
3314 read_1d0(0x2f3, 6); // = 0x10a4054 // !!!!
3315 rmw_1d0(0x21c, 0x38, 0, 6);
3316 rmw_1d0(0x14b, 0x78, val_xa1
, 7);
3319 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3320 set_4cf(&info
, channel
, 2, 1);
3324 mchbar_write8(0x1e8, 1 << 2);
3326 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3327 write_500(&info
, channel
,
3328 0x3 & ~(info
.populated_ranks_mask
[channel
]), 0x6b7, 2,
3330 write_500(&info
, channel
, 0x3, 0x69b, 2, 1);
3332 mchbar_clrsetbits32(0x2d0, ~0xff0c01ff, 0x200000);
3333 mchbar_write16(0x6c0, 0x14a0);
3334 mchbar_clrsetbits32(0x6d0, ~0xff0000ff, 0x8000);
3335 mchbar_write16(0x232, 1 << 3);
3336 /* 0x40004 or 0 depending on ? */
3337 mchbar_clrsetbits32(0x234, 0x40004, 0x40004);
3338 mchbar_clrsetbits32(0x34, 0x7, 5);
3339 mchbar_write32(0x128, 0x2150d05);
3340 mchbar_write8(0x12c, 0x1f);
3341 mchbar_write8(0x12d, 0x56);
3342 mchbar_write8(0x12e, 0x31);
3343 mchbar_write8(0x12f, 0);
3344 mchbar_write8(0x271, 1 << 1);
3345 mchbar_write8(0x671, 1 << 1);
3346 mchbar_write8(0x1e8, 1 << 2);
3347 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3348 mchbar_write32(0x294 + (channel
<< 10),
3349 (info
.populated_ranks_mask
[channel
] & 3) << 16);
3350 mchbar_clrsetbits32(0x134, ~0xfc01ffff, 0x10000);
3351 mchbar_clrsetbits32(0x134, ~0xfc85ffff, 0x850000);
3352 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3353 mchbar_clrsetbits32(0x260 + (channel
<< 10), 0xf << 20, 1 << 27 |
3354 (info
.populated_ranks_mask
[channel
] & 3) << 20);
3360 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3361 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
3362 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
3363 if (info
.populated_ranks
[channel
][slot
][rank
]) {
3364 jedec_read(&info
, channel
, slot
, rank
,
3365 totalrank
, 0xa, 0x400);
3369 mchbar_write8(0x12c, 0x9f);
3371 mchbar_clrsetbits8(0x271, 0x3e, 0x0e);
3372 mchbar_clrsetbits8(0x671, 0x3e, 0x0e);
3375 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3376 mchbar_write32(0x294 + (channel
<< 10),
3377 (info
.populated_ranks_mask
[channel
] & 3) << 16);
3378 mchbar_write16(0x298 + (channel
<< 10),
3379 info
.populated_ranks
[channel
][0][0] |
3380 info
.populated_ranks
[channel
][0][1] << 5);
3381 mchbar_write32(0x29c + (channel
<< 10), 0x77a);
3383 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3387 a
= mchbar_read8(0x243);
3388 b
= mchbar_read8(0x643);
3389 mchbar_write8(0x243, a
| 2);
3390 mchbar_write8(0x643, b
| 2);
3393 write_1d0(7, 0x19b, 3, 1);
3394 write_1d0(7, 0x1c0, 3, 1);
3395 write_1d0(4, 0x1c6, 4, 1);
3396 write_1d0(4, 0x1cc, 4, 1);
3397 rmw_1d0(0x151, 0xf, 0x4, 4);
3398 mchbar_write32(0x584, 0xfffff);
3399 mchbar_write32(0x984, 0xfffff);
3401 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++)
3402 for (slot
= 0; slot
< NUM_SLOTS
; slot
++)
3403 for (rank
= 0; rank
< NUM_RANKS
; rank
++)
3405 populated_ranks
[channel
][slot
]
3407 config_rank(&info
, s3resume
,
3411 mchbar_write8(0x243, 1);
3412 mchbar_write8(0x643, 1);
3415 /* was == 1 but is common */
3416 pci_write_config16(NORTHBRIDGE
, 0xc8, 3);
3417 write_26c(0, 0x820);
3418 write_26c(1, 0x820);
3419 mchbar_setbits32(0x130, 1 << 1);
3423 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3424 mchbar_write32(0x294 + (channel
<< 10),
3425 (info
.populated_ranks_mask
[channel
] & 3) << 16);
3426 mchbar_write16(0x298 + (channel
<< 10),
3427 info
.populated_ranks
[channel
][0][0] |
3428 info
.populated_ranks
[channel
][0][1] << 5);
3429 mchbar_write32(0x29c + (channel
<< 10), 0x77a);
3431 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3434 mchbar_clrbits32(0xfa4, 1 << 24 | 1 << 1);
3435 mchbar_write32(0xfb0, 0x2000e019);
3437 /* Before training. */
3438 timestamp_add_now(103);
3441 ram_training(&info
);
3443 /* After training. */
3444 timestamp_add_now(104);
3446 dump_timings(&info
);
3448 program_modules_memory_map(&info
, 0);
3449 program_total_memory_map(&info
);
3451 if (info
.non_interleaved_part_mb
!= 0 && info
.interleaved_part_mb
!= 0)
3452 mchbar_write8(0x111, 0 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3453 else if (have_match_ranks(&info
, 0, 4) && have_match_ranks(&info
, 1, 4))
3454 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 1 << 7);
3455 else if (have_match_ranks(&info
, 0, 2) && have_match_ranks(&info
, 1, 2))
3456 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 0 << 7);
3458 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3460 mchbar_clrbits32(0xfac, 1 << 31);
3461 mchbar_write32(0xfb4, 0x4800);
3462 mchbar_write32(0xfb8, (info
.revision
< 8) ? 0x20 : 0x0);
3463 mchbar_write32(0xe94, 0x7ffff);
3464 mchbar_write32(0xfc0, 0x80002040);
3465 mchbar_write32(0xfc4, 0x701246);
3466 mchbar_clrbits8(0xfc8, 0x70);
3467 mchbar_setbits32(0xe5c, 1 << 24);
3468 mchbar_clrsetbits32(0x1a70, 3 << 20, 2 << 20);
3469 mchbar_write32(0x50, 0x700b0);
3470 mchbar_write32(0x3c, 0x10);
3471 mchbar_clrsetbits8(0x1aa8, 0x3f, 0xa);
3472 mchbar_setbits8(0xff4, 1 << 1);
3473 mchbar_clrsetbits32(0xff8, 0xe008, 0x1020);
3475 mchbar_write32(0xd00, IOMMU_BASE2
| 1);
3476 mchbar_write32(0xd40, IOMMU_BASE1
| 1);
3477 mchbar_write32(0xdc0, IOMMU_BASE4
| 1);
3479 write32p(IOMMU_BASE1
| 0xffc, 0x80000000);
3480 write32p(IOMMU_BASE2
| 0xffc, 0xc0000000);
3481 write32p(IOMMU_BASE4
| 0xffc, 0x80000000);
3486 eax
= info
.fsb_frequency
/ 9;
3487 mchbar_clrsetbits32(0xfcc, 0x3ffff,
3488 (eax
* 0x280) | (eax
* 0x5000) | eax
| 0x40000);
3489 mchbar_write32(0x20, 0x33001);
3492 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3493 mchbar_clrbits32(0x220 + (channel
<< 10), 0x7770);
3494 if (info
.max_slots_used_in_channel
== 1)
3495 mchbar_setbits16(0x237 + (channel
<< 10), 0x0201);
3497 mchbar_clrbits16(0x237 + (channel
<< 10), 0x0201);
3499 mchbar_setbits8(0x241 + (channel
<< 10), 1 << 0);
3501 if (info
.clock_speed_index
<= 1 && (info
.silicon_revision
== 2
3502 || info
.silicon_revision
== 3))
3503 mchbar_setbits32(0x248 + (channel
<< 10), 0x00102000);
3505 mchbar_clrbits32(0x248 + (channel
<< 10), 0x00102000);
3508 mchbar_setbits32(0x115, 1 << 24);
3513 if (!(info
.silicon_revision
== 0 || info
.silicon_revision
== 1))
3515 al
|= ((1 << (info
.max_slots_used_in_channel
- 1)) - 1) << 4;
3516 mchbar_write32(0x210, al
<< 16 | 0x20);
3519 for (channel
= 0; channel
< NUM_CHANNELS
; channel
++) {
3520 mchbar_write32(0x288 + (channel
<< 10), 0x70605040);
3521 mchbar_write32(0x28c + (channel
<< 10), 0xfffec080);
3522 mchbar_write32(0x290 + (channel
<< 10), 0x282091c |
3523 (info
.max_slots_used_in_channel
- 1) << 0x16);
3526 pci_read_config32(NORTHBRIDGE
, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3527 reg1c
= epbar_read32(EPVC1RCAP
); // = 0x8001 // OK
3528 pci_read_config32(NORTHBRIDGE
, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3529 epbar_write32(EPVC1RCAP
, reg1c
); // OK
3530 mchbar_read8(0xe08); // = 0x0
3531 pci_read_config32(NORTHBRIDGE
, 0xe4); // = 0x316126
3532 mchbar_setbits8(0x1210, 1 << 1);
3533 mchbar_write32(0x1200, 0x8800440);
3534 mchbar_write32(0x1204, 0x53ff0453);
3535 mchbar_write32(0x1208, 0x19002043);
3536 mchbar_write16(0x1214, 0x320);
3538 if (info
.revision
== 0x10 || info
.revision
== 0x11) {
3539 mchbar_write16(0x1214, 0x220);
3540 mchbar_setbits8(0x1210, 1 << 6);
3543 mchbar_setbits8(0x1214, 1 << 2);
3544 mchbar_write8(0x120c, 1);
3545 mchbar_write8(0x1218, 3);
3546 mchbar_write8(0x121a, 3);
3547 mchbar_write8(0x121c, 3);
3548 mchbar_write16(0xc14, 0);
3549 mchbar_write16(0xc20, 0);
3550 mchbar_write32(0x1c, 0);
3552 /* revision dependent here. */
3554 mchbar_setbits16(0x1230, 0x1f07);
3556 if (info
.uma_enabled
)
3557 mchbar_setbits32(0x11f4, 1 << 28);
3559 mchbar_setbits16(0x1230, 1 << 15);
3560 mchbar_setbits8(0x1214, 1 << 0);
3565 reg_1020
= mchbar_read32(0x1020); // = 0x6c733c // OK
3566 mchbar_write8(0x1070, 1);
3568 mchbar_write32(0x1000, 0x100);
3569 mchbar_write8(0x1007, 0);
3571 if (reg_1020
!= 0) {
3572 mchbar_write16(0x1018, 0);
3574 ebpb
= reg_1020
& 0xff;
3582 mchbar_write32(0x1014, 0xffffffff);
3584 mchbar_write32(0x1010, ((((ebpb
+ 0x7d) << 7) / bl
) & 0xff) * !!reg_1020
);
3586 mchbar_write8(0x101c, 0xb8);
3588 mchbar_clrsetbits8(0x123e, 0xf0, 0x60);
3589 if (reg_1020
!= 0) {
3590 mchbar_clrsetbits32(0x123c, 0xf << 20, 0x6 << 20);
3591 mchbar_write8(0x101c, 0xb8);
3594 const u64 heci_uma_addr
=
3596 ((((u64
)pci_read_config16(NORTHBRIDGE
, TOM
)) << 6) -
3597 info
.memory_reserved_for_heci_mb
)) << 20;
3599 setup_heci_uma(heci_uma_addr
, info
.memory_reserved_for_heci_mb
);
3601 if (info
.uma_enabled
) {
3603 mchbar_setbits32(0x11b0, 1 << 14);
3604 mchbar_setbits32(0x11b4, 1 << 14);
3605 mchbar_setbits16(0x1190, 1 << 14);
3607 ax
= mchbar_read16(0x1190) & 0xf00; // = 0x480a // OK
3608 mchbar_write16(0x1170, ax
| (mchbar_read16(0x1170) & 0x107f) | 0x4080);
3609 mchbar_setbits16(0x1170, 1 << 12);
3614 for (ecx
= 0xffff; ecx
&& (mchbar_read16(0x1170) & (1 << 12)); ecx
--)
3616 mchbar_clrbits16(0x1190, 1 << 14);
3619 pci_write_config8(SOUTHBRIDGE
, GEN_PMCON_2
,
3620 pci_read_config8(SOUTHBRIDGE
, GEN_PMCON_2
) & ~0x80);
3622 mchbar_write16(0x2ca8, 1 << 3);
3625 dump_timings(&info
);
3626 cbmem_wasnot_inited
= cbmem_recovery(s3resume
);
3629 save_timings(&info
);
3630 if (s3resume
&& cbmem_wasnot_inited
) {
3631 printk(BIOS_ERR
, "Failed S3 resume.\n");
3632 ram_check_nodie(1 * MiB
);
3634 /* Failed S3 resume, reset to come up cleanly */