kernel: remove unused utsname_set_machine()
[unleashed.git] / usr / src / uts / sun4u / cpu / us3_common.c
blob55090f57ff1a45e0013377bbdb1ef01a0bc0a26f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/ddi.h>
29 #include <sys/sysmacros.h>
30 #include <sys/archsystm.h>
31 #include <sys/vmsystm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/machthread.h>
35 #include <sys/cpu.h>
36 #include <sys/cmp.h>
37 #include <sys/elf_SPARC.h>
38 #include <vm/vm_dep.h>
39 #include <vm/hat_sfmmu.h>
40 #include <vm/seg_kpm.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cheetahregs.h>
43 #include <sys/us3_module.h>
44 #include <sys/async.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/dditypes.h>
48 #include <sys/prom_debug.h>
49 #include <sys/prom_plat.h>
50 #include <sys/cpu_module.h>
51 #include <sys/sysmacros.h>
52 #include <sys/intreg.h>
53 #include <sys/clock.h>
54 #include <sys/platform_module.h>
55 #include <sys/machtrap.h>
56 #include <sys/ontrap.h>
57 #include <sys/panic.h>
58 #include <sys/memlist.h>
59 #include <sys/bootconf.h>
60 #include <sys/ivintr.h>
61 #include <sys/atomic.h>
62 #include <sys/taskq.h>
63 #include <sys/note.h>
64 #include <sys/ndifm.h>
65 #include <sys/ddifm.h>
66 #include <sys/fm/protocol.h>
67 #include <sys/fm/util.h>
68 #include <sys/fm/cpu/UltraSPARC-III.h>
69 #include <sys/fpras_impl.h>
70 #include <sys/dtrace.h>
71 #include <sys/watchpoint.h>
72 #include <sys/plat_ecc_unum.h>
73 #include <sys/cyclic.h>
74 #include <sys/errorq.h>
75 #include <sys/errclassify.h>
76 #include <sys/pghw.h>
77 #include <sys/clock_impl.h>
79 #ifdef CHEETAHPLUS_ERRATUM_25
80 #include <sys/xc_impl.h>
81 #endif /* CHEETAHPLUS_ERRATUM_25 */
83 ch_cpu_logout_t clop_before_flush;
84 ch_cpu_logout_t clop_after_flush;
85 uint_t flush_retries_done = 0;
87 * Note that 'Cheetah PRM' refers to:
88 * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
92 * Per CPU pointers to physical address of TL>0 logout data areas.
93 * These pointers have to be in the kernel nucleus to avoid MMU
94 * misses.
96 uint64_t ch_err_tl1_paddrs[NCPU];
99 * One statically allocated structure to use during startup/DR
100 * to prevent unnecessary panics.
102 ch_err_tl1_data_t ch_err_tl1_data;
105 * Per CPU pending error at TL>0, used by level15 softint handler
107 uchar_t ch_err_tl1_pending[NCPU];
110 * For deferred CE re-enable after trap.
112 taskq_t *ch_check_ce_tq;
115 * Internal functions.
117 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
118 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
119 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
120 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
121 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
122 uint64_t t_afsr_bit);
123 static int clear_ecc(struct async_flt *ecc);
124 #if defined(CPU_IMP_ECACHE_ASSOC)
125 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
126 #endif
127 int cpu_ecache_set_size(struct cpu *cp);
128 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
129 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
130 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
131 int cpu_ectag_pa_to_subblk_state(int cachesize,
132 uint64_t subaddr, uint64_t tag);
133 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
134 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
135 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
136 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
137 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
138 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
139 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
140 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
141 static void cpu_scrubphys(struct async_flt *aflt);
142 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
143 int *, int *);
144 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
145 static void cpu_ereport_init(struct async_flt *aflt);
146 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
147 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
148 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
149 uint64_t nceen, ch_cpu_logout_t *clop);
150 static int cpu_ce_delayed_ec_logout(uint64_t);
151 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
152 static int cpu_error_is_ecache_data(int, uint64_t);
153 static void cpu_fmri_cpu_set(nvlist_t *, int);
154 static int cpu_error_to_resource_type(struct async_flt *aflt);
156 #ifdef CHEETAHPLUS_ERRATUM_25
157 static int mondo_recover_proc(uint16_t, int);
158 static void cheetah_nudge_init(void);
159 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
160 cyc_time_t *when);
161 static void cheetah_nudge_buddy(void);
162 #endif /* CHEETAHPLUS_ERRATUM_25 */
164 #if defined(CPU_IMP_L1_CACHE_PARITY)
165 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
166 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
167 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
168 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
169 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
170 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
171 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
172 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
173 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
174 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
175 #endif /* CPU_IMP_L1_CACHE_PARITY */
177 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
178 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
179 int *segsp, int *banksp, int *mcidp);
182 * This table is used to determine which bit(s) is(are) bad when an ECC
183 * error occurs. The array is indexed by an 9-bit syndrome. The entries
184 * of this array have the following semantics:
186 * 00-127 The number of the bad bit, when only one bit is bad.
187 * 128 ECC bit C0 is bad.
188 * 129 ECC bit C1 is bad.
189 * 130 ECC bit C2 is bad.
190 * 131 ECC bit C3 is bad.
191 * 132 ECC bit C4 is bad.
192 * 133 ECC bit C5 is bad.
193 * 134 ECC bit C6 is bad.
194 * 135 ECC bit C7 is bad.
195 * 136 ECC bit C8 is bad.
196 * 137-143 reserved for Mtag Data and ECC.
197 * 144(M2) Two bits are bad within a nibble.
198 * 145(M3) Three bits are bad within a nibble.
199 * 146(M3) Four bits are bad within a nibble.
200 * 147(M) Multiple bits (5 or more) are bad.
201 * 148 NO bits are bad.
202 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
205 #define C0 128
206 #define C1 129
207 #define C2 130
208 #define C3 131
209 #define C4 132
210 #define C5 133
211 #define C6 134
212 #define C7 135
213 #define C8 136
214 #define MT0 137 /* Mtag Data bit 0 */
215 #define MT1 138
216 #define MT2 139
217 #define MTC0 140 /* Mtag Check bit 0 */
218 #define MTC1 141
219 #define MTC2 142
220 #define MTC3 143
221 #define M2 144
222 #define M3 145
223 #define M4 146
224 #define M 147
225 #define NA 148
226 #if defined(JALAPENO) || defined(SERRANO)
227 #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
228 #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */
229 #define SLAST S003MEM /* last special syndrome */
230 #else /* JALAPENO || SERRANO */
231 #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */
232 #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */
233 #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */
234 #define SLAST S11C /* last special syndrome */
235 #endif /* JALAPENO || SERRANO */
236 #if defined(JALAPENO) || defined(SERRANO)
237 #define BPAR0 152 /* syndrom 152 through 167 for bus parity */
238 #define BPAR15 167
239 #endif /* JALAPENO || SERRANO */
241 static uint8_t ecc_syndrome_tab[] =
243 NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M,
244 C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16,
245 C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10,
246 M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M,
247 C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6,
248 M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4,
249 M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4,
250 #if defined(JALAPENO) || defined(SERRANO)
251 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
252 #else /* JALAPENO || SERRANO */
253 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
254 #endif /* JALAPENO || SERRANO */
255 C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5,
256 M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M,
257 M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2,
258 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3,
259 M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M,
260 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3,
261 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M,
262 M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M,
263 C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4,
264 #if defined(JALAPENO) || defined(SERRANO)
265 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M,
266 #else /* JALAPENO || SERRANO */
267 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M,
268 #endif /* JALAPENO || SERRANO */
269 M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2,
270 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M,
271 M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4,
272 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3,
273 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3,
274 M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2,
275 M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4,
276 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M,
277 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3,
278 M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M,
279 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3,
280 M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M,
281 M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M,
282 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M
285 #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
287 #if !(defined(JALAPENO) || defined(SERRANO))
289 * This table is used to determine which bit(s) is(are) bad when a Mtag
290 * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries
291 * of this array have the following semantics:
293 * -1 Invalid mtag syndrome.
294 * 137 Mtag Data 0 is bad.
295 * 138 Mtag Data 1 is bad.
296 * 139 Mtag Data 2 is bad.
297 * 140 Mtag ECC 0 is bad.
298 * 141 Mtag ECC 1 is bad.
299 * 142 Mtag ECC 2 is bad.
300 * 143 Mtag ECC 3 is bad.
301 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
303 short mtag_syndrome_tab[] =
305 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2
308 #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short))
310 #else /* !(JALAPENO || SERRANO) */
312 #define BSYND_TBL_SIZE 16
314 #endif /* !(JALAPENO || SERRANO) */
317 * Virtual Address bit flag in the data cache. This is actually bit 2 in the
318 * dcache data tag.
320 #define VA13 INT64_C(0x0000000000000002)
323 * Types returned from cpu_error_to_resource_type()
325 #define ERRTYPE_UNKNOWN 0
326 #define ERRTYPE_CPU 1
327 #define ERRTYPE_MEMORY 2
328 #define ERRTYPE_ECACHE_DATA 3
331 * CE initial classification and subsequent action lookup table
333 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
334 static int ce_disp_inited;
337 * Set to disable leaky and partner check for memory correctables
339 int ce_xdiag_off;
342 * The following are not incremented atomically so are indicative only
344 static int ce_xdiag_drops;
345 static int ce_xdiag_lkydrops;
346 static int ce_xdiag_ptnrdrops;
347 static int ce_xdiag_bad;
350 * CE leaky check callback structure
352 typedef struct {
353 struct async_flt *lkycb_aflt;
354 errorq_t *lkycb_eqp;
355 errorq_elem_t *lkycb_eqep;
356 } ce_lkychk_cb_t;
359 * defines for various ecache_flush_flag's
361 #define ECACHE_FLUSH_LINE 1
362 #define ECACHE_FLUSH_ALL 2
365 * STICK sync
367 #define STICK_ITERATION 10
368 #define MAX_TSKEW 1
369 #define EV_A_START 0
370 #define EV_A_END 1
371 #define EV_B_START 2
372 #define EV_B_END 3
373 #define EVENTS 4
375 static int64_t stick_iter = STICK_ITERATION;
376 static int64_t stick_tsk = MAX_TSKEW;
378 typedef enum {
379 EVENT_NULL = 0,
380 SLAVE_START,
381 SLAVE_CONT,
382 MASTER_START
383 } event_cmd_t;
385 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
386 static int64_t timestamp[EVENTS];
387 static volatile int slave_done;
389 #ifdef DEBUG
390 #define DSYNC_ATTEMPTS 64
391 typedef struct {
392 int64_t skew_val[DSYNC_ATTEMPTS];
393 } ss_t;
395 ss_t stick_sync_stats[NCPU];
396 #endif /* DEBUG */
398 uint_t cpu_impl_dual_pgsz = 0;
399 #if defined(CPU_IMP_DUAL_PAGESIZE)
400 uint_t disable_dual_pgsz = 0;
401 #endif /* CPU_IMP_DUAL_PAGESIZE */
404 * Save the cache bootup state for use when internal
405 * caches are to be re-enabled after an error occurs.
407 uint64_t cache_boot_state;
410 * PA[22:0] represent Displacement in Safari configuration space.
412 uint_t root_phys_addr_lo_mask = 0x7fffffu;
414 bus_config_eclk_t bus_config_eclk[] = {
415 #if defined(JALAPENO) || defined(SERRANO)
416 {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
417 {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
418 {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
419 #else /* JALAPENO || SERRANO */
420 {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
421 {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
422 {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
423 #endif /* JALAPENO || SERRANO */
424 {0, 0}
428 * Interval for deferred CEEN reenable
430 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
433 * set in /etc/system to control logging of user BERR/TO's
435 int cpu_berr_to_verbose = 0;
438 * set to 0 in /etc/system to defer CEEN reenable for all CEs
440 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
441 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
444 * Set of all offline cpus
446 cpuset_t cpu_offline_set;
448 static void cpu_delayed_check_ce_errors(void *);
449 static void cpu_check_ce_errors(void *);
450 void cpu_error_ecache_flush(ch_async_flt_t *);
451 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
452 static void cpu_log_and_clear_ce(ch_async_flt_t *);
453 void cpu_ce_detected(ch_cpu_errors_t *, int);
456 * CE Leaky check timeout in microseconds. This is chosen to be twice the
457 * memory refresh interval of current DIMMs (64ms). After initial fix that
458 * gives at least one full refresh cycle in which the cell can leak
459 * (whereafter further refreshes simply reinforce any incorrect bit value).
461 clock_t cpu_ce_lkychk_timeout_usec = 128000;
464 * CE partner check partner caching period in seconds
466 int cpu_ce_ptnr_cachetime_sec = 60;
469 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
471 #define CH_SET_TRAP(ttentry, ttlabel) \
472 bcopy((const void *)&ttlabel, &ttentry, 32); \
473 flush_instr_mem((caddr_t)&ttentry, 32);
475 static int min_ecache_size;
476 static uint_t priv_hcl_1;
477 static uint_t priv_hcl_2;
478 static uint_t priv_hcl_4;
479 static uint_t priv_hcl_8;
481 void
482 cpu_setup(void)
484 extern int at_flags;
485 extern int cpc_has_overflow_intr;
488 * Setup chip-specific trap handlers.
490 cpu_init_trap();
492 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
494 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
497 * save the cache bootup state.
499 cache_boot_state = get_dcu() & DCU_CACHE;
502 * Due to the number of entries in the fully-associative tlb
503 * this may have to be tuned lower than in spitfire.
505 pp_slots = MIN(8, MAXPP_SLOTS);
508 * Block stores do not invalidate all pages of the d$, pagecopy
509 * et. al. need virtual translations with virtual coloring taken
510 * into consideration. prefetch/ldd will pollute the d$ on the
511 * load side.
513 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
515 if (use_page_coloring) {
516 do_pg_coloring = 1;
519 isa_list =
520 "sparcv9+vis2 sparcv9+vis sparcv9 "
521 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 "sparcv8 sparcv8-fsmuld sparcv7 sparc";
525 * On Panther-based machines, this should
526 * also include AV_SPARC_POPC too
528 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
531 * On cheetah, there's no hole in the virtual address space
533 hole_start = hole_end = 0;
536 * The kpm mapping window.
537 * kpm_size:
538 * The size of a single kpm range.
539 * The overall size will be: kpm_size * vac_colors.
540 * kpm_vbase:
541 * The virtual start address of the kpm range within the kernel
542 * virtual address space. kpm_vbase has to be kpm_size aligned.
544 kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 kpm_size_shift = 43;
546 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 kpm_smallpages = 1;
550 * The traptrace code uses either %tick or %stick for
551 * timestamping. We have %stick so we can use it.
553 traptrace_use_stick = 1;
556 * Cheetah has a performance counter overflow interrupt
558 cpc_has_overflow_intr = 1;
560 #if defined(CPU_IMP_DUAL_PAGESIZE)
562 * Use Cheetah+ and later dual page size support.
564 if (!disable_dual_pgsz) {
565 cpu_impl_dual_pgsz = 1;
567 #endif /* CPU_IMP_DUAL_PAGESIZE */
570 * Declare that this architecture/cpu combination does fpRAS.
572 fpras_implemented = 1;
575 * Setup CE lookup table
577 CE_INITDISPTBL_POPULATE(ce_disp_table);
578 ce_disp_inited = 1;
582 * Called by setcpudelay
584 void
585 cpu_init_tick_freq(void)
588 * For UltraSPARC III and beyond we want to use the
589 * system clock rate as the basis for low level timing,
590 * due to support of mixed speed CPUs and power managment.
592 if (system_clock_freq == 0)
593 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
595 sys_tick_freq = system_clock_freq;
598 #ifdef CHEETAHPLUS_ERRATUM_25
600 * Tunables
602 int cheetah_bpe_off = 0;
603 int cheetah_sendmondo_recover = 1;
604 int cheetah_sendmondo_fullscan = 0;
605 int cheetah_sendmondo_recover_delay = 5;
607 #define CHEETAH_LIVELOCK_MIN_DELAY 1
610 * Recovery Statistics
612 typedef struct cheetah_livelock_entry {
613 int cpuid; /* fallen cpu */
614 int buddy; /* cpu that ran recovery */
615 clock_t lbolt; /* when recovery started */
616 hrtime_t recovery_time; /* time spent in recovery */
617 } cheetah_livelock_entry_t;
619 #define CHEETAH_LIVELOCK_NENTRY 32
621 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
622 int cheetah_livelock_entry_nxt;
624 #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \
625 statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \
626 if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \
627 cheetah_livelock_entry_nxt = 0; \
631 #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val
633 struct {
634 hrtime_t hrt; /* maximum recovery time */
635 int recovery; /* recovered */
636 int full_claimed; /* maximum pages claimed in full recovery */
637 int proc_entry; /* attempted to claim TSB */
638 int proc_tsb_scan; /* tsb scanned */
639 int proc_tsb_partscan; /* tsb partially scanned */
640 int proc_tsb_fullscan; /* whole tsb scanned */
641 int proc_claimed; /* maximum pages claimed in tsb scan */
642 int proc_user; /* user thread */
643 int proc_kernel; /* kernel thread */
644 int proc_onflt; /* bad stack */
645 int proc_cpu; /* null cpu */
646 int proc_thread; /* null thread */
647 int proc_proc; /* null proc */
648 int proc_as; /* null as */
649 int proc_hat; /* null hat */
650 int proc_hat_inval; /* hat contents don't make sense */
651 int proc_hat_busy; /* hat is changing TSBs */
652 int proc_tsb_reloc; /* TSB skipped because being relocated */
653 int proc_cnum_bad; /* cnum out of range */
654 int proc_cnum; /* last cnum processed */
655 tte_t proc_tte; /* last tte processed */
656 } cheetah_livelock_stat;
658 #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++
660 #define CHEETAH_LIVELOCK_STATSET(item, value) \
661 cheetah_livelock_stat.item = value
663 #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \
664 if (value > cheetah_livelock_stat.item) \
665 cheetah_livelock_stat.item = value; \
669 * Attempt to recover a cpu by claiming every cache line as saved
670 * in the TSB that the non-responsive cpu is using. Since we can't
671 * grab any adaptive lock, this is at best an attempt to do so. Because
672 * we don't grab any locks, we must operate under the protection of
673 * on_fault().
675 * Return 1 if cpuid could be recovered, 0 if failed.
678 mondo_recover_proc(uint16_t cpuid, int bn)
680 label_t ljb;
681 cpu_t *cp;
682 kthread_t *t;
683 proc_t *p;
684 struct as *as;
685 struct hat *hat;
686 uint_t cnum;
687 struct tsb_info *tsbinfop;
688 struct tsbe *tsbep;
689 caddr_t tsbp;
690 caddr_t end_tsbp;
691 uint64_t paddr;
692 uint64_t idsr;
693 u_longlong_t pahi, palo;
694 int pages_claimed = 0;
695 tte_t tsbe_tte;
696 int tried_kernel_tsb = 0;
697 mmu_ctx_t *mmu_ctxp;
699 CHEETAH_LIVELOCK_STAT(proc_entry);
701 if (on_fault(&ljb)) {
702 CHEETAH_LIVELOCK_STAT(proc_onflt);
703 goto badstruct;
706 if ((cp = cpu[cpuid]) == NULL) {
707 CHEETAH_LIVELOCK_STAT(proc_cpu);
708 goto badstruct;
711 if ((t = cp->cpu_thread) == NULL) {
712 CHEETAH_LIVELOCK_STAT(proc_thread);
713 goto badstruct;
716 if ((p = ttoproc(t)) == NULL) {
717 CHEETAH_LIVELOCK_STAT(proc_proc);
718 goto badstruct;
721 if ((as = p->p_as) == NULL) {
722 CHEETAH_LIVELOCK_STAT(proc_as);
723 goto badstruct;
726 if ((hat = as->a_hat) == NULL) {
727 CHEETAH_LIVELOCK_STAT(proc_hat);
728 goto badstruct;
731 if (hat != ksfmmup) {
732 CHEETAH_LIVELOCK_STAT(proc_user);
733 if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
734 CHEETAH_LIVELOCK_STAT(proc_hat_busy);
735 goto badstruct;
737 tsbinfop = hat->sfmmu_tsb;
738 if (tsbinfop == NULL) {
739 CHEETAH_LIVELOCK_STAT(proc_hat_inval);
740 goto badstruct;
742 tsbp = tsbinfop->tsb_va;
743 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
744 } else {
745 CHEETAH_LIVELOCK_STAT(proc_kernel);
746 tsbinfop = NULL;
747 tsbp = ktsb_base;
748 end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
751 /* Verify as */
752 if (hat->sfmmu_as != as) {
753 CHEETAH_LIVELOCK_STAT(proc_hat_inval);
754 goto badstruct;
757 mmu_ctxp = CPU_MMU_CTXP(cp);
758 ASSERT(mmu_ctxp);
759 cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
760 CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
762 if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
763 (cnum >= mmu_ctxp->mmu_nctxs)) {
764 CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
765 goto badstruct;
768 do {
769 CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
772 * Skip TSBs being relocated. This is important because
773 * we want to avoid the following deadlock scenario:
775 * 1) when we came in we set ourselves to "in recover" state.
776 * 2) when we try to touch TSB being relocated the mapping
777 * will be in the suspended state so we'll spin waiting
778 * for it to be unlocked.
779 * 3) when the CPU that holds the TSB mapping locked tries to
780 * unlock it it will send a xtrap which will fail to xcall
781 * us or the CPU we're trying to recover, and will in turn
782 * enter the mondo code.
783 * 4) since we are still spinning on the locked mapping
784 * no further progress will be made and the system will
785 * inevitably hard hang.
787 * A TSB not being relocated can't begin being relocated
788 * while we're accessing it because we check
789 * sendmondo_in_recover before relocating TSBs.
791 if (hat != ksfmmup &&
792 (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
793 CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
794 goto next_tsbinfo;
797 for (tsbep = (struct tsbe *)tsbp;
798 tsbep < (struct tsbe *)end_tsbp; tsbep++) {
799 tsbe_tte = tsbep->tte_data;
801 if (tsbe_tte.tte_val == 0) {
803 * Invalid tte
805 continue;
807 if (tsbe_tte.tte_se) {
809 * Don't want device registers
811 continue;
813 if (tsbe_tte.tte_cp == 0) {
815 * Must be cached in E$
817 continue;
819 if (tsbep->tte_tag.tag_invalid != 0) {
821 * Invalid tag, ingnore this entry.
823 continue;
825 CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
826 idsr = getidsr();
827 if ((idsr & (IDSR_NACK_BIT(bn) |
828 IDSR_BUSY_BIT(bn))) == 0) {
829 CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
830 goto done;
832 pahi = tsbe_tte.tte_pahi;
833 palo = tsbe_tte.tte_palo;
834 paddr = (uint64_t)((pahi << 32) |
835 (palo << MMU_PAGESHIFT));
836 claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
837 CH_ECACHE_SUBBLK_SIZE);
838 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
839 shipit(cpuid, bn);
841 pages_claimed++;
843 next_tsbinfo:
844 if (tsbinfop != NULL)
845 tsbinfop = tsbinfop->tsb_next;
846 if (tsbinfop != NULL) {
847 tsbp = tsbinfop->tsb_va;
848 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
849 } else if (tsbp == ktsb_base) {
850 tried_kernel_tsb = 1;
851 } else if (!tried_kernel_tsb) {
852 tsbp = ktsb_base;
853 end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
854 hat = ksfmmup;
855 tsbinfop = NULL;
857 } while (tsbinfop != NULL ||
858 ((tsbp == ktsb_base) && !tried_kernel_tsb));
860 CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
861 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
862 no_fault();
863 idsr = getidsr();
864 if ((idsr & (IDSR_NACK_BIT(bn) |
865 IDSR_BUSY_BIT(bn))) == 0) {
866 return (1);
867 } else {
868 return (0);
871 done:
872 no_fault();
873 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
874 return (1);
876 badstruct:
877 no_fault();
878 return (0);
882 * Attempt to claim ownership, temporarily, of every cache line that a
883 * non-responsive cpu might be using. This might kick that cpu out of
884 * this state.
886 * The return value indicates to the caller if we have exhausted all recovery
887 * techniques. If 1 is returned, it is useless to call this function again
888 * even for a different target CPU.
891 mondo_recover(uint16_t cpuid, int bn)
893 struct memseg *seg;
894 uint64_t begin_pa, end_pa, cur_pa;
895 hrtime_t begin_hrt, end_hrt;
896 int retval = 0;
897 int pages_claimed = 0;
898 cheetah_livelock_entry_t *histp;
899 uint64_t idsr;
901 if (atomic_cas_32(&sendmondo_in_recover, 0, 1) != 0) {
903 * Wait while recovery takes place
905 while (sendmondo_in_recover) {
906 drv_usecwait(1);
909 * Assume we didn't claim the whole memory. If
910 * the target of this caller is not recovered,
911 * it will come back.
913 return (retval);
916 CHEETAH_LIVELOCK_ENTRY_NEXT(histp);
917 CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, LBOLT_WAITFREE);
918 CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
919 CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
921 begin_hrt = gethrtime_waitfree();
923 * First try to claim the lines in the TSB the target
924 * may have been using.
926 if (mondo_recover_proc(cpuid, bn) == 1) {
928 * Didn't claim the whole memory
930 goto done;
934 * We tried using the TSB. The target is still
935 * not recovered. Check if complete memory scan is
936 * enabled.
938 if (cheetah_sendmondo_fullscan == 0) {
940 * Full memory scan is disabled.
942 retval = 1;
943 goto done;
947 * Try claiming the whole memory.
949 for (seg = memsegs; seg; seg = seg->next) {
950 begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
951 end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
952 for (cur_pa = begin_pa; cur_pa < end_pa;
953 cur_pa += MMU_PAGESIZE) {
954 idsr = getidsr();
955 if ((idsr & (IDSR_NACK_BIT(bn) |
956 IDSR_BUSY_BIT(bn))) == 0) {
958 * Didn't claim all memory
960 goto done;
962 claimlines(cur_pa, MMU_PAGESIZE,
963 CH_ECACHE_SUBBLK_SIZE);
964 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
965 shipit(cpuid, bn);
967 pages_claimed++;
972 * We did all we could.
974 retval = 1;
976 done:
978 * Update statistics
980 end_hrt = gethrtime_waitfree();
981 CHEETAH_LIVELOCK_STAT(recovery);
982 CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
983 CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
984 CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
985 (end_hrt - begin_hrt));
987 while (atomic_cas_32(&sendmondo_in_recover, 1, 0) != 1)
990 return (retval);
994 * This is called by the cyclic framework when this CPU becomes online
996 /*ARGSUSED*/
997 static void
998 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1001 hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1002 hdlr->cyh_level = CY_LOW_LEVEL;
1003 hdlr->cyh_arg = NULL;
1006 * Stagger the start time
1008 when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1009 if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1010 cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1012 when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1016 * Create a low level cyclic to send a xtrap to the next cpu online.
1017 * However, there's no need to have this running on a uniprocessor system.
1019 static void
1020 cheetah_nudge_init(void)
1022 cyc_omni_handler_t hdlr;
1024 if (max_ncpus == 1) {
1025 return;
1028 hdlr.cyo_online = cheetah_nudge_onln;
1029 hdlr.cyo_offline = NULL;
1030 hdlr.cyo_arg = NULL;
1032 mutex_enter(&cpu_lock);
1033 (void) cyclic_add_omni(&hdlr);
1034 mutex_exit(&cpu_lock);
1038 * Cyclic handler to wake up buddy
1040 void
1041 cheetah_nudge_buddy(void)
1044 * Disable kernel preemption to protect the cpu list
1046 kpreempt_disable();
1047 if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1048 xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1049 0, 0);
1051 kpreempt_enable();
1054 #endif /* CHEETAHPLUS_ERRATUM_25 */
1056 #ifdef SEND_MONDO_STATS
1057 uint32_t x_one_stimes[64];
1058 uint32_t x_one_ltimes[16];
1059 uint32_t x_set_stimes[64];
1060 uint32_t x_set_ltimes[16];
1061 uint32_t x_set_cpus[NCPU];
1062 uint32_t x_nack_stimes[64];
1063 #endif
1066 * Note: A version of this function is used by the debugger via the KDI,
1067 * and must be kept in sync with this version. Any changes made to this
1068 * function to support new chips or to accomodate errata must also be included
1069 * in the KDI-specific version. See us3_kdi.c.
1071 void
1072 send_one_mondo(int cpuid)
1074 int busy, nack;
1075 uint64_t idsr, starttick, endtick, tick, lasttick;
1076 uint64_t busymask;
1077 #ifdef CHEETAHPLUS_ERRATUM_25
1078 int recovered = 0;
1079 #endif
1081 CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1082 starttick = lasttick = gettick();
1083 shipit(cpuid, 0);
1084 endtick = starttick + xc_tick_limit;
1085 busy = nack = 0;
1086 #if defined(JALAPENO) || defined(SERRANO)
1088 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1089 * will be used for dispatching interrupt. For now, assume
1090 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1091 * issues with respect to BUSY/NACK pair usage.
1093 busymask = IDSR_BUSY_BIT(cpuid);
1094 #else /* JALAPENO || SERRANO */
1095 busymask = IDSR_BUSY;
1096 #endif /* JALAPENO || SERRANO */
1097 for (;;) {
1098 idsr = getidsr();
1099 if (idsr == 0)
1100 break;
1102 tick = gettick();
1104 * If there is a big jump between the current tick
1105 * count and lasttick, we have probably hit a break
1106 * point. Adjust endtick accordingly to avoid panic.
1108 if (tick > (lasttick + xc_tick_jump_limit))
1109 endtick += (tick - lasttick);
1110 lasttick = tick;
1111 if (tick > endtick) {
1112 if (panic_quiesce)
1113 return;
1114 #ifdef CHEETAHPLUS_ERRATUM_25
1115 if (cheetah_sendmondo_recover && recovered == 0) {
1116 if (mondo_recover(cpuid, 0)) {
1118 * We claimed the whole memory or
1119 * full scan is disabled.
1121 recovered++;
1123 tick = gettick();
1124 endtick = tick + xc_tick_limit;
1125 lasttick = tick;
1127 * Recheck idsr
1129 continue;
1130 } else
1131 #endif /* CHEETAHPLUS_ERRATUM_25 */
1133 cmn_err(CE_PANIC, "send mondo timeout "
1134 "(target 0x%x) [%d NACK %d BUSY]",
1135 cpuid, nack, busy);
1139 if (idsr & busymask) {
1140 busy++;
1141 continue;
1143 drv_usecwait(1);
1144 shipit(cpuid, 0);
1145 nack++;
1146 busy = 0;
1148 #ifdef SEND_MONDO_STATS
1150 int n = gettick() - starttick;
1151 if (n < 8192)
1152 x_one_stimes[n >> 7]++;
1153 else
1154 x_one_ltimes[(n >> 13) & 0xf]++;
1156 #endif
1159 void
1160 syncfpu(void)
1165 * Return processor specific async error structure
1166 * size used.
1169 cpu_aflt_size(void)
1171 return (sizeof (ch_async_flt_t));
1175 * Tunable to disable the checking of other cpu logout areas during panic for
1176 * potential syndrome 71 generating errors.
1178 int enable_check_other_cpus_logout = 1;
1181 * Check other cpus logout area for potential synd 71 generating
1182 * errors.
1184 static void
1185 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1186 ch_cpu_logout_t *clop)
1188 struct async_flt *aflt;
1189 ch_async_flt_t ch_flt;
1190 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1192 if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1193 return;
1196 bzero(&ch_flt, sizeof (ch_async_flt_t));
1198 t_afar = clop->clo_data.chd_afar;
1199 t_afsr = clop->clo_data.chd_afsr;
1200 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1201 #if defined(SERRANO)
1202 ch_flt.afar2 = clop->clo_data.chd_afar2;
1203 #endif /* SERRANO */
1206 * In order to simplify code, we maintain this afsr_errs
1207 * variable which holds the aggregate of AFSR and AFSR_EXT
1208 * sticky bits.
1210 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1211 (t_afsr & C_AFSR_ALL_ERRS);
1213 /* Setup the async fault structure */
1214 aflt = (struct async_flt *)&ch_flt;
1215 aflt->flt_id = gethrtime_waitfree();
1216 ch_flt.afsr_ext = t_afsr_ext;
1217 ch_flt.afsr_errs = t_afsr_errs;
1218 aflt->flt_stat = t_afsr;
1219 aflt->flt_addr = t_afar;
1220 aflt->flt_bus_id = cpuid;
1221 aflt->flt_inst = cpuid;
1222 aflt->flt_pc = tpc;
1223 aflt->flt_prot = AFLT_PROT_NONE;
1224 aflt->flt_class = CPU_FAULT;
1225 aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1226 aflt->flt_tl = tl;
1227 aflt->flt_status = ecc_type;
1228 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1231 * Queue events on the async event queue, one event per error bit.
1232 * If no events are queued, queue an event to complain.
1234 if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1235 ch_flt.flt_type = CPU_INV_AFSR;
1236 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1237 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1238 aflt->flt_panic);
1242 * Zero out + invalidate CPU logout.
1244 bzero(clop, sizeof (ch_cpu_logout_t));
1245 clop->clo_data.chd_afar = LOGOUT_INVALID;
1249 * Check the logout areas of all other cpus for unlogged errors.
1251 static void
1252 cpu_check_other_cpus_logout(void)
1254 int i, j;
1255 processorid_t myid;
1256 struct cpu *cp;
1257 ch_err_tl1_data_t *cl1p;
1259 myid = CPU->cpu_id;
1260 for (i = 0; i < NCPU; i++) {
1261 cp = cpu[i];
1263 if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1264 (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1265 continue;
1269 * Check each of the tl>0 logout areas
1271 cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1272 for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1273 if (cl1p->ch_err_tl1_flags == 0)
1274 continue;
1276 cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1277 1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1281 * Check each of the remaining logout areas
1283 cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1284 CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1285 cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1286 CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1287 cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1288 CPU_PRIVATE_PTR(cp, chpr_async_logout));
1293 * The fast_ecc_err handler transfers control here for UCU, UCC events.
1294 * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1295 * flush the error that caused the UCU/UCC, then again here at the end to
1296 * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1297 * the probability of getting a TL>1 Fast ECC trap when we're fielding
1298 * another Fast ECC trap.
1300 * Cheetah+ also handles: TSCE: No additional processing required.
1301 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1303 * Note that the p_clo_flags input is only valid in cases where the
1304 * cpu_private struct is not yet initialized (since that is the only
1305 * time that information cannot be obtained from the logout struct.)
1307 /*ARGSUSED*/
1308 void
1309 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1311 ch_cpu_logout_t *clop;
1312 uint64_t ceen, nceen;
1315 * Get the CPU log out info. If we can't find our CPU private
1316 * pointer, then we will have to make due without any detailed
1317 * logout information.
1319 if (CPU_PRIVATE(CPU) == NULL) {
1320 clop = NULL;
1321 ceen = p_clo_flags & EN_REG_CEEN;
1322 nceen = p_clo_flags & EN_REG_NCEEN;
1323 } else {
1324 clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1325 ceen = clop->clo_flags & EN_REG_CEEN;
1326 nceen = clop->clo_flags & EN_REG_NCEEN;
1329 cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1330 (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1334 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1335 * ECC at TL>0. Need to supply either a error register pointer or a
1336 * cpu logout structure pointer.
1338 static void
1339 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1340 uint64_t nceen, ch_cpu_logout_t *clop)
1342 struct async_flt *aflt;
1343 ch_async_flt_t ch_flt;
1344 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1345 char pr_reason[MAX_REASON_STRING];
1346 ch_cpu_errors_t cpu_error_regs;
1348 bzero(&ch_flt, sizeof (ch_async_flt_t));
1350 * If no cpu logout data, then we will have to make due without
1351 * any detailed logout information.
1353 if (clop == NULL) {
1354 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1355 get_cpu_error_state(&cpu_error_regs);
1356 set_cpu_error_state(&cpu_error_regs);
1357 t_afar = cpu_error_regs.afar;
1358 t_afsr = cpu_error_regs.afsr;
1359 t_afsr_ext = cpu_error_regs.afsr_ext;
1360 #if defined(SERRANO)
1361 ch_flt.afar2 = cpu_error_regs.afar2;
1362 #endif /* SERRANO */
1363 } else {
1364 t_afar = clop->clo_data.chd_afar;
1365 t_afsr = clop->clo_data.chd_afsr;
1366 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1367 #if defined(SERRANO)
1368 ch_flt.afar2 = clop->clo_data.chd_afar2;
1369 #endif /* SERRANO */
1373 * In order to simplify code, we maintain this afsr_errs
1374 * variable which holds the aggregate of AFSR and AFSR_EXT
1375 * sticky bits.
1377 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1378 (t_afsr & C_AFSR_ALL_ERRS);
1379 pr_reason[0] = '\0';
1381 /* Setup the async fault structure */
1382 aflt = (struct async_flt *)&ch_flt;
1383 aflt->flt_id = gethrtime_waitfree();
1384 ch_flt.afsr_ext = t_afsr_ext;
1385 ch_flt.afsr_errs = t_afsr_errs;
1386 aflt->flt_stat = t_afsr;
1387 aflt->flt_addr = t_afar;
1388 aflt->flt_bus_id = getprocessorid();
1389 aflt->flt_inst = CPU->cpu_id;
1390 aflt->flt_pc = tpc;
1391 aflt->flt_prot = AFLT_PROT_NONE;
1392 aflt->flt_class = CPU_FAULT;
1393 aflt->flt_priv = priv;
1394 aflt->flt_tl = tl;
1395 aflt->flt_status = ECC_F_TRAP;
1396 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1399 * XXXX - Phenomenal hack to get around Solaris not getting all the
1400 * cmn_err messages out to the console. The situation is a UCU (in
1401 * priv mode) which causes a WDU which causes a UE (on the retry).
1402 * The messages for the UCU and WDU are enqueued and then pulled off
1403 * the async queue via softint and syslogd starts to process them
1404 * but doesn't get them to the console. The UE causes a panic, but
1405 * since the UCU/WDU messages are already in transit, those aren't
1406 * on the async queue. The hack is to check if we have a matching
1407 * WDU event for the UCU, and if it matches, we're more than likely
1408 * going to panic with a UE, unless we're under protection. So, we
1409 * check to see if we got a matching WDU event and if we're under
1410 * protection.
1412 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1413 * looks like this:
1414 * UCU->WDU->UE
1415 * For Panther, it could look like either of these:
1416 * UCU---->WDU->L3_WDU->UE
1417 * L3_UCU->WDU->L3_WDU->UE
1419 if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1420 aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1421 curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1422 get_cpu_error_state(&cpu_error_regs);
1423 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1424 aflt->flt_panic |=
1425 ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1426 (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1427 (cpu_error_regs.afar == t_afar));
1428 aflt->flt_panic |= ((clop == NULL) &&
1429 (t_afsr_errs & C_AFSR_WDU) &&
1430 (t_afsr_errs & C_AFSR_L3_WDU));
1431 } else {
1432 aflt->flt_panic |=
1433 ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1434 (cpu_error_regs.afar == t_afar));
1435 aflt->flt_panic |= ((clop == NULL) &&
1436 (t_afsr_errs & C_AFSR_WDU));
1441 * Queue events on the async event queue, one event per error bit.
1442 * If no events are queued or no Fast ECC events are on in the AFSR,
1443 * queue an event to complain.
1445 if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1446 ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1447 ch_flt.flt_type = CPU_INV_AFSR;
1448 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1449 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1450 aflt->flt_panic);
1454 * Zero out + invalidate CPU logout.
1456 if (clop) {
1457 bzero(clop, sizeof (ch_cpu_logout_t));
1458 clop->clo_data.chd_afar = LOGOUT_INVALID;
1462 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1463 * or disrupting errors have happened. We do this because if a
1464 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1465 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that
1466 * CEEN works differently on Cheetah than on Spitfire. Also, we enable
1467 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1468 * deferred or disrupting error happening between checking the AFSR and
1469 * enabling NCEEN/CEEN.
1471 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1472 * taken.
1474 set_error_enable(get_error_enable() | (nceen | ceen));
1475 if (clear_errors(&ch_flt)) {
1476 aflt->flt_panic |= ((ch_flt.afsr_errs &
1477 (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1478 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1479 NULL);
1483 * Panic here if aflt->flt_panic has been set. Enqueued errors will
1484 * be logged as part of the panic flow.
1486 if (aflt->flt_panic)
1487 fm_panic("%sError(s)", pr_reason);
1490 * Flushing the Ecache here gets the part of the trap handler that
1491 * is run at TL=1 out of the Ecache.
1493 cpu_flush_ecache();
1497 * This is called via sys_trap from pil15_interrupt code if the
1498 * corresponding entry in ch_err_tl1_pending is set. Checks the
1499 * various ch_err_tl1_data structures for valid entries based on the bit
1500 * settings in the ch_err_tl1_flags entry of the structure.
1502 /*ARGSUSED*/
1503 void
1504 cpu_tl1_error(struct regs *rp, int panic)
1506 ch_err_tl1_data_t *cl1p, cl1;
1507 int i, ncl1ps;
1508 uint64_t me_flags;
1509 uint64_t ceen, nceen;
1511 if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1512 cl1p = &ch_err_tl1_data;
1513 ncl1ps = 1;
1514 } else if (CPU_PRIVATE(CPU) != NULL) {
1515 cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1516 ncl1ps = CH_ERR_TL1_TLMAX;
1517 } else {
1518 ncl1ps = 0;
1521 for (i = 0; i < ncl1ps; i++, cl1p++) {
1522 if (cl1p->ch_err_tl1_flags == 0)
1523 continue;
1526 * Grab a copy of the logout data and invalidate
1527 * the logout area.
1529 cl1 = *cl1p;
1530 bzero(cl1p, sizeof (ch_err_tl1_data_t));
1531 cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1532 me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1535 * Log "first error" in ch_err_tl1_data.
1537 if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1538 ceen = get_error_enable() & EN_REG_CEEN;
1539 nceen = get_error_enable() & EN_REG_NCEEN;
1540 cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1541 1, ceen, nceen, &cl1.ch_err_tl1_logout);
1543 #if defined(CPU_IMP_L1_CACHE_PARITY)
1544 if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1545 cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1546 (caddr_t)cl1.ch_err_tl1_tpc);
1548 #endif /* CPU_IMP_L1_CACHE_PARITY */
1551 * Log "multiple events" in ch_err_tl1_data. Note that
1552 * we don't read and clear the AFSR/AFAR in the TL>0 code
1553 * if the structure is busy, we just do the cache flushing
1554 * we have to do and then do the retry. So the AFSR/AFAR
1555 * at this point *should* have some relevant info. If there
1556 * are no valid errors in the AFSR, we'll assume they've
1557 * already been picked up and logged. For I$/D$ parity,
1558 * we just log an event with an "Unknown" (NULL) TPC.
1560 if (me_flags & CH_ERR_FECC) {
1561 ch_cpu_errors_t cpu_error_regs;
1562 uint64_t t_afsr_errs;
1565 * Get the error registers and see if there's
1566 * a pending error. If not, don't bother
1567 * generating an "Invalid AFSR" error event.
1569 get_cpu_error_state(&cpu_error_regs);
1570 t_afsr_errs = (cpu_error_regs.afsr_ext &
1571 C_AFSR_EXT_ALL_ERRS) |
1572 (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1573 if (t_afsr_errs != 0) {
1574 ceen = get_error_enable() & EN_REG_CEEN;
1575 nceen = get_error_enable() & EN_REG_NCEEN;
1576 cpu_log_fast_ecc_error(NULL, 1,
1577 1, ceen, nceen, NULL);
1580 #if defined(CPU_IMP_L1_CACHE_PARITY)
1581 if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1582 cpu_parity_error(rp, me_flags, NULL);
1584 #endif /* CPU_IMP_L1_CACHE_PARITY */
1589 * Called from Fast ECC TL>0 handler in case of fatal error.
1590 * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1591 * but if we don't, we'll panic with something reasonable.
1593 /*ARGSUSED*/
1594 void
1595 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1597 cpu_tl1_error(rp, 1);
1599 * Should never return, but just in case.
1601 fm_panic("Unsurvivable ECC Error at TL>0");
1605 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1606 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1607 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1608 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1610 * Cheetah+ also handles (No additional processing required):
1611 * DUE, DTO, DBERR (NCEEN controlled)
1612 * THCE (CEEN and ET_ECC_en controlled)
1613 * TUE (ET_ECC_en controlled)
1615 * Panther further adds:
1616 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
1617 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
1618 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled)
1619 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled)
1620 * THCE (CEEN and L2_tag_ECC_en controlled)
1621 * L3_THCE (CEEN and ET_ECC_en controlled)
1623 * Note that the p_clo_flags input is only valid in cases where the
1624 * cpu_private struct is not yet initialized (since that is the only
1625 * time that information cannot be obtained from the logout struct.)
1627 /*ARGSUSED*/
1628 void
1629 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1631 struct async_flt *aflt;
1632 ch_async_flt_t ch_flt;
1633 char pr_reason[MAX_REASON_STRING];
1634 ch_cpu_logout_t *clop;
1635 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1636 ch_cpu_errors_t cpu_error_regs;
1638 bzero(&ch_flt, sizeof (ch_async_flt_t));
1640 * Get the CPU log out info. If we can't find our CPU private
1641 * pointer, then we will have to make due without any detailed
1642 * logout information.
1644 if (CPU_PRIVATE(CPU) == NULL) {
1645 clop = NULL;
1646 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1647 get_cpu_error_state(&cpu_error_regs);
1648 set_cpu_error_state(&cpu_error_regs);
1649 t_afar = cpu_error_regs.afar;
1650 t_afsr = cpu_error_regs.afsr;
1651 t_afsr_ext = cpu_error_regs.afsr_ext;
1652 #if defined(SERRANO)
1653 ch_flt.afar2 = cpu_error_regs.afar2;
1654 #endif /* SERRANO */
1655 } else {
1656 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1657 t_afar = clop->clo_data.chd_afar;
1658 t_afsr = clop->clo_data.chd_afsr;
1659 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1660 #if defined(SERRANO)
1661 ch_flt.afar2 = clop->clo_data.chd_afar2;
1662 #endif /* SERRANO */
1666 * In order to simplify code, we maintain this afsr_errs
1667 * variable which holds the aggregate of AFSR and AFSR_EXT
1668 * sticky bits.
1670 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1671 (t_afsr & C_AFSR_ALL_ERRS);
1673 pr_reason[0] = '\0';
1674 /* Setup the async fault structure */
1675 aflt = (struct async_flt *)&ch_flt;
1676 ch_flt.afsr_ext = t_afsr_ext;
1677 ch_flt.afsr_errs = t_afsr_errs;
1678 aflt->flt_stat = t_afsr;
1679 aflt->flt_addr = t_afar;
1680 aflt->flt_pc = (caddr_t)rp->r_pc;
1681 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1682 aflt->flt_tl = 0;
1683 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1686 * If this trap is a result of one of the errors not masked
1687 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1688 * indicate that a timeout is to be set later.
1690 if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1691 !aflt->flt_panic)
1692 ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1693 else
1694 ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1697 * log the CE and clean up
1699 cpu_log_and_clear_ce(&ch_flt);
1702 * We re-enable CEEN (if required) and check if any disrupting errors
1703 * have happened. We do this because if a disrupting error had occurred
1704 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1705 * Note that CEEN works differently on Cheetah than on Spitfire. Also,
1706 * we enable CEEN *before* checking the AFSR to avoid the small window
1707 * of a error happening between checking the AFSR and enabling CEEN.
1709 if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1710 set_error_enable(get_error_enable() | EN_REG_CEEN);
1711 if (clear_errors(&ch_flt)) {
1712 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1713 NULL);
1717 * Panic here if aflt->flt_panic has been set. Enqueued errors will
1718 * be logged as part of the panic flow.
1720 if (aflt->flt_panic)
1721 fm_panic("%sError(s)", pr_reason);
1725 * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1726 * L3_EDU:BLD, TO, and BERR events.
1727 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1729 * Cheetah+: No additional errors handled.
1731 * Note that the p_clo_flags input is only valid in cases where the
1732 * cpu_private struct is not yet initialized (since that is the only
1733 * time that information cannot be obtained from the logout struct.)
1735 /*ARGSUSED*/
1736 void
1737 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1739 ushort_t ttype, tl;
1740 ch_async_flt_t ch_flt;
1741 struct async_flt *aflt;
1742 int trampolined = 0;
1743 char pr_reason[MAX_REASON_STRING];
1744 ch_cpu_logout_t *clop;
1745 uint64_t ceen, clo_flags;
1746 uint64_t log_afsr;
1747 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1748 ch_cpu_errors_t cpu_error_regs;
1749 int expected = DDI_FM_ERR_UNEXPECTED;
1750 ddi_acc_hdl_t *hp;
1753 * We need to look at p_flag to determine if the thread detected an
1754 * error while dumping core. We can't grab p_lock here, but it's ok
1755 * because we just need a consistent snapshot and we know that everyone
1756 * else will store a consistent set of bits while holding p_lock. We
1757 * don't have to worry about a race because SDOCORE is set once prior
1758 * to doing i/o from the process's address space and is never cleared.
1760 uint_t pflag = ttoproc(curthread)->p_flag;
1762 bzero(&ch_flt, sizeof (ch_async_flt_t));
1764 * Get the CPU log out info. If we can't find our CPU private
1765 * pointer then we will have to make due without any detailed
1766 * logout information.
1768 if (CPU_PRIVATE(CPU) == NULL) {
1769 clop = NULL;
1770 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1771 get_cpu_error_state(&cpu_error_regs);
1772 set_cpu_error_state(&cpu_error_regs);
1773 t_afar = cpu_error_regs.afar;
1774 t_afsr = cpu_error_regs.afsr;
1775 t_afsr_ext = cpu_error_regs.afsr_ext;
1776 #if defined(SERRANO)
1777 ch_flt.afar2 = cpu_error_regs.afar2;
1778 #endif /* SERRANO */
1779 clo_flags = p_clo_flags;
1780 } else {
1781 clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1782 t_afar = clop->clo_data.chd_afar;
1783 t_afsr = clop->clo_data.chd_afsr;
1784 t_afsr_ext = clop->clo_data.chd_afsr_ext;
1785 #if defined(SERRANO)
1786 ch_flt.afar2 = clop->clo_data.chd_afar2;
1787 #endif /* SERRANO */
1788 clo_flags = clop->clo_flags;
1792 * In order to simplify code, we maintain this afsr_errs
1793 * variable which holds the aggregate of AFSR and AFSR_EXT
1794 * sticky bits.
1796 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1797 (t_afsr & C_AFSR_ALL_ERRS);
1798 pr_reason[0] = '\0';
1801 * Grab information encoded into our clo_flags field.
1803 ceen = clo_flags & EN_REG_CEEN;
1804 tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1805 ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1808 * handle the specific error
1810 aflt = (struct async_flt *)&ch_flt;
1811 aflt->flt_id = gethrtime_waitfree();
1812 aflt->flt_bus_id = getprocessorid();
1813 aflt->flt_inst = CPU->cpu_id;
1814 ch_flt.afsr_ext = t_afsr_ext;
1815 ch_flt.afsr_errs = t_afsr_errs;
1816 aflt->flt_stat = t_afsr;
1817 aflt->flt_addr = t_afar;
1818 aflt->flt_pc = (caddr_t)rp->r_pc;
1819 aflt->flt_prot = AFLT_PROT_NONE;
1820 aflt->flt_class = CPU_FAULT;
1821 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1822 aflt->flt_tl = (uchar_t)tl;
1823 aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1824 C_AFSR_PANIC(t_afsr_errs));
1825 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1826 aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1829 * If the trap occurred in privileged mode at TL=0, we need to check to
1830 * see if we were executing in the kernel under on_trap() or t_lofault
1831 * protection. If so, modify the saved registers so that we return
1832 * from the trap to the appropriate trampoline routine.
1834 if (aflt->flt_priv && tl == 0) {
1835 if (curthread->t_ontrap != NULL) {
1836 on_trap_data_t *otp = curthread->t_ontrap;
1838 if (otp->ot_prot & OT_DATA_EC) {
1839 aflt->flt_prot = AFLT_PROT_EC;
1840 otp->ot_trap |= OT_DATA_EC;
1841 rp->r_pc = otp->ot_trampoline;
1842 rp->r_npc = rp->r_pc + 4;
1843 trampolined = 1;
1846 if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1847 (otp->ot_prot & OT_DATA_ACCESS)) {
1848 aflt->flt_prot = AFLT_PROT_ACCESS;
1849 otp->ot_trap |= OT_DATA_ACCESS;
1850 rp->r_pc = otp->ot_trampoline;
1851 rp->r_npc = rp->r_pc + 4;
1852 trampolined = 1;
1854 * for peeks and caut_gets errors are expected
1856 hp = (ddi_acc_hdl_t *)otp->ot_handle;
1857 if (!hp)
1858 expected = DDI_FM_ERR_PEEK;
1859 else if (hp->ah_acc.devacc_attr_access ==
1860 DDI_CAUTIOUS_ACC)
1861 expected = DDI_FM_ERR_EXPECTED;
1864 } else if (curthread->t_lofault) {
1865 aflt->flt_prot = AFLT_PROT_COPY;
1866 rp->r_g1 = EFAULT;
1867 rp->r_pc = curthread->t_lofault;
1868 rp->r_npc = rp->r_pc + 4;
1869 trampolined = 1;
1874 * If we're in user mode or we're doing a protected copy, we either
1875 * want the ASTON code below to send a signal to the user process
1876 * or we want to panic if aft_panic is set.
1878 * If we're in privileged mode and we're not doing a copy, then we
1879 * need to check if we've trampolined. If we haven't trampolined,
1880 * we should panic.
1882 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1883 if (t_afsr_errs &
1884 ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1885 ~(C_AFSR_BERR | C_AFSR_TO)))
1886 aflt->flt_panic |= aft_panic;
1887 } else if (!trampolined) {
1888 aflt->flt_panic = 1;
1892 * If we've trampolined due to a privileged TO or BERR, or if an
1893 * unprivileged TO or BERR occurred, we don't want to enqueue an
1894 * event for that TO or BERR. Queue all other events (if any) besides
1895 * the TO/BERR. Since we may not be enqueing any events, we need to
1896 * ignore the number of events queued. If we haven't trampolined due
1897 * to a TO or BERR, just enqueue events normally.
1899 log_afsr = t_afsr_errs;
1900 if (trampolined) {
1901 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1902 } else if (!aflt->flt_priv) {
1904 * User mode, suppress messages if
1905 * cpu_berr_to_verbose is not set.
1907 if (!cpu_berr_to_verbose)
1908 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1912 * Log any errors that occurred
1914 if (((log_afsr &
1915 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1916 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1917 (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1918 ch_flt.flt_type = CPU_INV_AFSR;
1919 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1920 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1921 aflt->flt_panic);
1925 * Zero out + invalidate CPU logout.
1927 if (clop) {
1928 bzero(clop, sizeof (ch_cpu_logout_t));
1929 clop->clo_data.chd_afar = LOGOUT_INVALID;
1932 #if defined(JALAPENO) || defined(SERRANO)
1934 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1935 * IO errors that may have resulted in this trap.
1937 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1938 cpu_run_bus_error_handlers(aflt, expected);
1942 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1943 * line from the Ecache. We also need to query the bus nexus for
1944 * fatal errors. Attempts to do diagnostic read on caches may
1945 * introduce more errors (especially when the module is bad).
1947 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1949 * Ask our bus nexus friends if they have any fatal errors. If
1950 * so, they will log appropriate error messages.
1952 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1953 aflt->flt_panic = 1;
1956 * We got a UE or RUE and are panicking, save the fault PA in
1957 * a known location so that the platform specific panic code
1958 * can check for copyback errors.
1960 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1961 panic_aflt = *aflt;
1966 * Flush Ecache line or entire Ecache
1968 if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1969 cpu_error_ecache_flush(&ch_flt);
1970 #else /* JALAPENO || SERRANO */
1972 * UE/BERR/TO: Call our bus nexus friends to check for
1973 * IO errors that may have resulted in this trap.
1975 if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1976 cpu_run_bus_error_handlers(aflt, expected);
1980 * UE: If the UE is in memory, we need to flush the bad
1981 * line from the Ecache. We also need to query the bus nexus for
1982 * fatal errors. Attempts to do diagnostic read on caches may
1983 * introduce more errors (especially when the module is bad).
1985 if (t_afsr & C_AFSR_UE) {
1987 * Ask our legacy bus nexus friends if they have any fatal
1988 * errors. If so, they will log appropriate error messages.
1990 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1991 aflt->flt_panic = 1;
1994 * We got a UE and are panicking, save the fault PA in a known
1995 * location so that the platform specific panic code can check
1996 * for copyback errors.
1998 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1999 panic_aflt = *aflt;
2004 * Flush Ecache line or entire Ecache
2006 if (t_afsr_errs &
2007 (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2008 cpu_error_ecache_flush(&ch_flt);
2009 #endif /* JALAPENO || SERRANO */
2012 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2013 * or disrupting errors have happened. We do this because if a
2014 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2015 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that
2016 * CEEN works differently on Cheetah than on Spitfire. Also, we enable
2017 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2018 * deferred or disrupting error happening between checking the AFSR and
2019 * enabling NCEEN/CEEN.
2021 * Note: CEEN reenabled only if it was on when trap taken.
2023 set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2024 if (clear_errors(&ch_flt)) {
2026 * Check for secondary errors, and avoid panicking if we
2027 * have them
2029 if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2030 t_afar) == 0) {
2031 aflt->flt_panic |= ((ch_flt.afsr_errs &
2032 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2034 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2035 NULL);
2039 * Panic here if aflt->flt_panic has been set. Enqueued errors will
2040 * be logged as part of the panic flow.
2042 if (aflt->flt_panic)
2043 fm_panic("%sError(s)", pr_reason);
2046 * If we queued an error and we are going to return from the trap and
2047 * the error was in user mode or inside of a copy routine, set AST flag
2048 * so the queue will be drained before returning to user mode. The
2049 * AST processing will also act on our failure policy.
2051 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2052 int pcb_flag = 0;
2054 if (t_afsr_errs &
2055 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2056 ~(C_AFSR_BERR | C_AFSR_TO)))
2057 pcb_flag |= ASYNC_HWERR;
2059 if (t_afsr & C_AFSR_BERR)
2060 pcb_flag |= ASYNC_BERR;
2062 if (t_afsr & C_AFSR_TO)
2063 pcb_flag |= ASYNC_BTO;
2065 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2066 aston(curthread);
2070 #if defined(CPU_IMP_L1_CACHE_PARITY)
2072 * Handling of data and instruction parity errors (traps 0x71, 0x72).
2074 * For Panther, P$ data parity errors during floating point load hits
2075 * are also detected (reported as TT 0x71) and handled by this trap
2076 * handler.
2078 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2079 * is available.
2081 /*ARGSUSED*/
2082 void
2083 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2085 ch_async_flt_t ch_flt;
2086 struct async_flt *aflt;
2087 uchar_t tl = ((flags & CH_ERR_TL) != 0);
2088 uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2089 uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2090 char *error_class;
2091 int index, way, word;
2092 ch_dc_data_t tmp_dcp;
2093 int dc_set_size = dcache_size / CH_DCACHE_NWAY;
2094 uint64_t parity_bits, pbits;
2095 /* The parity bit array corresponds to the result of summing two bits */
2096 static int parity_bits_popc[] = { 0, 1, 1, 0 };
2099 * Log the error.
2100 * For icache parity errors the fault address is the trap PC.
2101 * For dcache/pcache parity errors the instruction would have to
2102 * be decoded to determine the address and that isn't possible
2103 * at high PIL.
2105 bzero(&ch_flt, sizeof (ch_async_flt_t));
2106 aflt = (struct async_flt *)&ch_flt;
2107 aflt->flt_id = gethrtime_waitfree();
2108 aflt->flt_bus_id = getprocessorid();
2109 aflt->flt_inst = CPU->cpu_id;
2110 aflt->flt_pc = tpc;
2111 aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2112 aflt->flt_prot = AFLT_PROT_NONE;
2113 aflt->flt_class = CPU_FAULT;
2114 aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0;
2115 aflt->flt_tl = tl;
2116 aflt->flt_panic = panic;
2117 aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2118 ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2120 if (iparity) {
2121 cpu_icache_parity_info(&ch_flt);
2122 if (ch_flt.parity_data.ipe.cpl_off != -1)
2123 error_class = FM_EREPORT_CPU_USIII_IDSPE;
2124 else if (ch_flt.parity_data.ipe.cpl_way != -1)
2125 error_class = FM_EREPORT_CPU_USIII_ITSPE;
2126 else
2127 error_class = FM_EREPORT_CPU_USIII_IPE;
2128 aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2129 } else {
2130 cpu_dcache_parity_info(&ch_flt);
2131 if (ch_flt.parity_data.dpe.cpl_off != -1) {
2133 * If not at TL 0 and running on a Jalapeno processor,
2134 * then process as a true ddspe. A true
2135 * ddspe error can only occur if the way == 0
2137 way = ch_flt.parity_data.dpe.cpl_way;
2138 if ((tl == 0) && (way != 0) &&
2139 IS_JALAPENO(cpunodes[CPU->cpu_id].implementation)) {
2140 for (index = 0; index < dc_set_size;
2141 index += dcache_linesize) {
2142 get_dcache_dtag(index + way *
2143 dc_set_size,
2144 (uint64_t *)&tmp_dcp);
2146 * Check data array for even parity.
2147 * The 8 parity bits are grouped into
2148 * 4 pairs each of which covers a 64-bit
2149 * word. The endianness is reversed
2150 * -- the low-order parity bits cover
2151 * the high-order data words.
2153 parity_bits = tmp_dcp.dc_utag >> 8;
2154 for (word = 0; word < 4; word++) {
2155 pbits = (parity_bits >>
2156 (6 - word * 2)) & 3;
2157 if (((popc64(
2158 tmp_dcp.dc_data[word]) +
2159 parity_bits_popc[pbits]) &
2160 1) && (tmp_dcp.dc_tag &
2161 VA13)) {
2162 /* cleanup */
2163 correct_dcache_parity(
2164 dcache_size,
2165 dcache_linesize);
2166 if (cache_boot_state &
2167 DCU_DC) {
2168 flush_dcache();
2171 set_dcu(get_dcu() |
2172 cache_boot_state);
2173 return;
2177 } /* (tl == 0) && (way != 0) && IS JALAPENO */
2178 error_class = FM_EREPORT_CPU_USIII_DDSPE;
2179 } else if (ch_flt.parity_data.dpe.cpl_way != -1)
2180 error_class = FM_EREPORT_CPU_USIII_DTSPE;
2181 else
2182 error_class = FM_EREPORT_CPU_USIII_DPE;
2183 aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2185 * For panther we also need to check the P$ for parity errors.
2187 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2188 cpu_pcache_parity_info(&ch_flt);
2189 if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2190 error_class = FM_EREPORT_CPU_USIII_PDSPE;
2191 aflt->flt_payload =
2192 FM_EREPORT_PAYLOAD_PCACHE_PE;
2197 cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2198 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2200 if (iparity) {
2202 * Invalidate entire I$.
2203 * This is required due to the use of diagnostic ASI
2204 * accesses that may result in a loss of I$ coherency.
2206 if (cache_boot_state & DCU_IC) {
2207 flush_icache();
2210 * According to section P.3.1 of the Panther PRM, we
2211 * need to do a little more for recovery on those
2212 * CPUs after encountering an I$ parity error.
2214 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2215 flush_ipb();
2216 correct_dcache_parity(dcache_size,
2217 dcache_linesize);
2218 flush_pcache();
2220 } else {
2222 * Since the valid bit is ignored when checking parity the
2223 * D$ data and tag must also be corrected. Set D$ data bits
2224 * to zero and set utag to 0, 1, 2, 3.
2226 correct_dcache_parity(dcache_size, dcache_linesize);
2229 * According to section P.3.3 of the Panther PRM, we
2230 * need to do a little more for recovery on those
2231 * CPUs after encountering a D$ or P$ parity error.
2233 * As far as clearing P$ parity errors, it is enough to
2234 * simply invalidate all entries in the P$ since P$ parity
2235 * error traps are only generated for floating point load
2236 * hits.
2238 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2239 flush_icache();
2240 flush_ipb();
2241 flush_pcache();
2246 * Invalidate entire D$ if it was enabled.
2247 * This is done to avoid stale data in the D$ which might
2248 * occur with the D$ disabled and the trap handler doing
2249 * stores affecting lines already in the D$.
2251 if (cache_boot_state & DCU_DC) {
2252 flush_dcache();
2256 * Restore caches to their bootup state.
2258 set_dcu(get_dcu() | cache_boot_state);
2261 * Panic here if aflt->flt_panic has been set. Enqueued errors will
2262 * be logged as part of the panic flow.
2264 if (aflt->flt_panic)
2265 fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2268 * If this error occurred at TL>0 then flush the E$ here to reduce
2269 * the chance of getting an unrecoverable Fast ECC error. This
2270 * flush will evict the part of the parity trap handler that is run
2271 * at TL>1.
2273 if (tl) {
2274 cpu_flush_ecache();
2279 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2280 * to indicate which portions of the captured data should be in the ereport.
2282 void
2283 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2285 int way = ch_flt->parity_data.ipe.cpl_way;
2286 int offset = ch_flt->parity_data.ipe.cpl_off;
2287 int tag_index;
2288 struct async_flt *aflt = (struct async_flt *)ch_flt;
2291 if ((offset != -1) || (way != -1)) {
2293 * Parity error in I$ tag or data
2295 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2296 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2297 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2298 PN_ICIDX_TO_WAY(tag_index);
2299 else
2300 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2301 CH_ICIDX_TO_WAY(tag_index);
2302 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2303 IC_LOGFLAG_MAGIC;
2304 } else {
2306 * Parity error was not identified.
2307 * Log tags and data for all ways.
2309 for (way = 0; way < CH_ICACHE_NWAY; way++) {
2310 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2311 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2312 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2313 PN_ICIDX_TO_WAY(tag_index);
2314 else
2315 ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2316 CH_ICIDX_TO_WAY(tag_index);
2317 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2318 IC_LOGFLAG_MAGIC;
2324 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2325 * to indicate which portions of the captured data should be in the ereport.
2327 void
2328 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2330 int way = ch_flt->parity_data.dpe.cpl_way;
2331 int offset = ch_flt->parity_data.dpe.cpl_off;
2332 int tag_index;
2334 if (offset != -1) {
2336 * Parity error in D$ or P$ data array.
2338 * First check to see whether the parity error is in D$ or P$
2339 * since P$ data parity errors are reported in Panther using
2340 * the same trap.
2342 if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2343 tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2344 ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2345 CH_PCIDX_TO_WAY(tag_index);
2346 ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2347 PC_LOGFLAG_MAGIC;
2348 } else {
2349 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2350 ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2351 CH_DCIDX_TO_WAY(tag_index);
2352 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2353 DC_LOGFLAG_MAGIC;
2355 } else if (way != -1) {
2357 * Parity error in D$ tag.
2359 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2360 ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2361 CH_DCIDX_TO_WAY(tag_index);
2362 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2363 DC_LOGFLAG_MAGIC;
2366 #endif /* CPU_IMP_L1_CACHE_PARITY */
2369 * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2370 * post-process CPU events that are dequeued. As such, it can be invoked
2371 * from softint context, from AST processing in the trap() flow, or from the
2372 * panic flow. We decode the CPU-specific data, and take appropriate actions.
2373 * Historically this entry point was used to log the actual cmn_err(9F) text;
2374 * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2375 * With FMA this function now also returns a flag which indicates to the
2376 * caller whether the ereport should be posted (1) or suppressed (0).
2378 static int
2379 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2381 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2382 struct async_flt *aflt = (struct async_flt *)flt;
2383 uint64_t errors;
2384 extern void memscrub_induced_error(void);
2386 switch (ch_flt->flt_type) {
2387 case CPU_INV_AFSR:
2389 * If it is a disrupting trap and the AFSR is zero, then
2390 * the event has probably already been noted. Do not post
2391 * an ereport.
2393 if ((aflt->flt_status & ECC_C_TRAP) &&
2394 (!(aflt->flt_stat & C_AFSR_MASK)))
2395 return (0);
2396 else
2397 return (1);
2398 case CPU_TO:
2399 case CPU_BERR:
2400 case CPU_FATAL:
2401 case CPU_FPUERR:
2402 return (1);
2404 case CPU_UE_ECACHE_RETIRE:
2405 cpu_log_err(aflt);
2406 cpu_page_retire(ch_flt);
2407 return (1);
2410 * Cases where we may want to suppress logging or perform
2411 * extended diagnostics.
2413 case CPU_CE:
2414 case CPU_EMC:
2416 * We want to skip logging and further classification
2417 * only if ALL the following conditions are true:
2419 * 1. There is only one error
2420 * 2. That error is a correctable memory error
2421 * 3. The error is caused by the memory scrubber (in
2422 * which case the error will have occurred under
2423 * on_trap protection)
2424 * 4. The error is on a retired page
2426 * Note: AFLT_PROT_EC is used places other than the memory
2427 * scrubber. However, none of those errors should occur
2428 * on a retired page.
2430 if ((ch_flt->afsr_errs &
2431 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2432 aflt->flt_prot == AFLT_PROT_EC) {
2434 if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2435 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2438 * Since we're skipping logging, we'll need
2439 * to schedule the re-enabling of CEEN
2441 (void) timeout(cpu_delayed_check_ce_errors,
2442 (void *)(uintptr_t)aflt->flt_inst,
2443 drv_usectohz((clock_t)cpu_ceen_delay_secs
2444 * MICROSEC));
2448 * Inform memscrubber - scrubbing induced
2449 * CE on a retired page.
2451 memscrub_induced_error();
2452 return (0);
2457 * Perform/schedule further classification actions, but
2458 * only if the page is healthy (we don't want bad
2459 * pages inducing too much diagnostic activity). If we could
2460 * not find a page pointer then we also skip this. If
2461 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2462 * to copy and recirculate the event (for further diagnostics)
2463 * and we should not proceed to log it here.
2465 * This must be the last step here before the cpu_log_err()
2466 * below - if an event recirculates cpu_ce_log_err() will
2467 * not call the current function but just proceed directly
2468 * to cpu_ereport_post after the cpu_log_err() avoided below.
2470 * Note: Check cpu_impl_async_log_err if changing this
2472 if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2473 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2474 CE_XDIAG_SKIP_NOPP);
2475 } else {
2476 if (errors != PR_OK) {
2477 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2478 CE_XDIAG_SKIP_PAGEDET);
2479 } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2480 offsetof(ch_async_flt_t, cmn_asyncflt))) {
2481 return (0);
2484 /*FALLTHRU*/
2487 * Cases where we just want to report the error and continue.
2489 case CPU_CE_ECACHE:
2490 case CPU_UE_ECACHE:
2491 case CPU_IV:
2492 case CPU_ORPH:
2493 cpu_log_err(aflt);
2494 return (1);
2497 * Cases where we want to fall through to handle panicking.
2499 case CPU_UE:
2501 * We want to skip logging in the same conditions as the
2502 * CE case. In addition, we want to make sure we're not
2503 * panicking.
2505 if (!panicstr && (ch_flt->afsr_errs &
2506 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2507 aflt->flt_prot == AFLT_PROT_EC) {
2508 if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2509 /* Zero the address to clear the error */
2510 softcall(ecc_page_zero, (void *)aflt->flt_addr);
2512 * Inform memscrubber - scrubbing induced
2513 * UE on a retired page.
2515 memscrub_induced_error();
2516 return (0);
2519 cpu_log_err(aflt);
2520 break;
2522 default:
2524 * If the us3_common.c code doesn't know the flt_type, it may
2525 * be an implementation-specific code. Call into the impldep
2526 * backend to find out what to do: if it tells us to continue,
2527 * break and handle as if falling through from a UE; if not,
2528 * the impldep backend has handled the error and we're done.
2530 switch (cpu_impl_async_log_err(flt, eqep)) {
2531 case CH_ASYNC_LOG_DONE:
2532 return (1);
2533 case CH_ASYNC_LOG_RECIRC:
2534 return (0);
2535 case CH_ASYNC_LOG_CONTINUE:
2536 break; /* continue on to handle UE-like error */
2537 default:
2538 cmn_err(CE_WARN, "discarding error 0x%p with "
2539 "invalid fault type (0x%x)",
2540 (void *)aflt, ch_flt->flt_type);
2541 return (0);
2545 /* ... fall through from the UE case */
2547 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2548 if (!panicstr) {
2549 cpu_page_retire(ch_flt);
2550 } else {
2552 * Clear UEs on panic so that we don't
2553 * get haunted by them during panic or
2554 * after reboot
2556 cpu_clearphys(aflt);
2557 (void) clear_errors(NULL);
2561 return (1);
2565 * Retire the bad page that may contain the flushed error.
2567 void
2568 cpu_page_retire(ch_async_flt_t *ch_flt)
2570 struct async_flt *aflt = (struct async_flt *)ch_flt;
2571 (void) page_retire(aflt->flt_addr, PR_UE);
2575 * Return true if the error specified in the AFSR indicates
2576 * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2577 * for Panther, none for Jalapeno/Serrano).
2579 /* ARGSUSED */
2580 static int
2581 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2583 #if defined(JALAPENO) || defined(SERRANO)
2584 return (0);
2585 #elif defined(CHEETAH_PLUS)
2586 if (IS_PANTHER(cpunodes[cpuid].implementation))
2587 return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2588 return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2589 #else /* CHEETAH_PLUS */
2590 return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2591 #endif
2595 * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2596 * generic event post-processing for correctable and uncorrectable memory,
2597 * E$, and MTag errors. Historically this entry point was used to log bits of
2598 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2599 * converted into an ereport. In addition, it transmits the error to any
2600 * platform-specific service-processor FRU logging routines, if available.
2602 void
2603 cpu_log_err(struct async_flt *aflt)
2605 char unum[UNUM_NAMLEN];
2606 int synd_status, synd_code, afar_status;
2607 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2609 if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2610 aflt->flt_status |= ECC_ECACHE;
2611 else
2612 aflt->flt_status &= ~ECC_ECACHE;
2614 * Determine syndrome status.
2616 synd_status = afsr_to_synd_status(aflt->flt_inst,
2617 ch_flt->afsr_errs, ch_flt->flt_bit);
2620 * Determine afar status.
2622 if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2623 afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2624 ch_flt->flt_bit);
2625 else
2626 afar_status = AFLT_STAT_INVALID;
2628 synd_code = synd_to_synd_code(synd_status,
2629 aflt->flt_synd, ch_flt->flt_bit);
2632 * If afar status is not invalid do a unum lookup.
2634 if (afar_status != AFLT_STAT_INVALID) {
2635 (void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2636 } else {
2637 unum[0] = '\0';
2641 * Do not send the fruid message (plat_ecc_error_data_t)
2642 * to the SC if it can handle the enhanced error information
2643 * (plat_ecc_error2_data_t) or when the tunable
2644 * ecc_log_fruid_enable is set to 0.
2647 if (&plat_ecc_capability_sc_get &&
2648 plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2649 if (&plat_log_fruid_error)
2650 plat_log_fruid_error(synd_code, aflt, unum,
2651 ch_flt->flt_bit);
2654 if (aflt->flt_func != NULL)
2655 aflt->flt_func(aflt, unum);
2657 if (afar_status != AFLT_STAT_INVALID)
2658 cpu_log_diag_info(ch_flt);
2661 * If we have a CEEN error , we do not reenable CEEN until after
2662 * we exit the trap handler. Otherwise, another error may
2663 * occur causing the handler to be entered recursively.
2664 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2665 * to try and ensure that the CPU makes progress in the face
2666 * of a CE storm.
2668 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2669 (void) timeout(cpu_delayed_check_ce_errors,
2670 (void *)(uintptr_t)aflt->flt_inst,
2671 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2676 * Invoked by error_init() early in startup and therefore before
2677 * startup_errorq() is called to drain any error Q -
2679 * startup()
2680 * startup_end()
2681 * error_init()
2682 * cpu_error_init()
2683 * errorq_init()
2684 * errorq_drain()
2685 * start_other_cpus()
2687 * The purpose of this routine is to create error-related taskqs. Taskqs
2688 * are used for this purpose because cpu_lock can't be grabbed from interrupt
2689 * context.
2691 void
2692 cpu_error_init(int items)
2695 * Create taskq(s) to reenable CE
2697 ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2698 items, items, TASKQ_PREPOPULATE);
2701 void
2702 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2704 char unum[UNUM_NAMLEN];
2705 int len;
2707 switch (aflt->flt_class) {
2708 case CPU_FAULT:
2709 cpu_ereport_init(aflt);
2710 if (cpu_async_log_err(aflt, eqep))
2711 cpu_ereport_post(aflt);
2712 break;
2714 case BUS_FAULT:
2715 if (aflt->flt_func != NULL) {
2716 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2717 unum, UNUM_NAMLEN, &len);
2718 aflt->flt_func(aflt, unum);
2720 break;
2722 case RECIRC_CPU_FAULT:
2723 aflt->flt_class = CPU_FAULT;
2724 cpu_log_err(aflt);
2725 cpu_ereport_post(aflt);
2726 break;
2728 case RECIRC_BUS_FAULT:
2729 ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2730 /*FALLTHRU*/
2731 default:
2732 cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2733 "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2734 return;
2739 * Scrub and classify a CE. This function must not modify the
2740 * fault structure passed to it but instead should return the classification
2741 * information.
2744 static uchar_t
2745 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2747 uchar_t disp = CE_XDIAG_EXTALG;
2748 on_trap_data_t otd;
2749 uint64_t orig_err;
2750 ch_cpu_logout_t *clop;
2753 * Clear CEEN. CPU CE TL > 0 trap handling will already have done
2754 * this, but our other callers have not. Disable preemption to
2755 * avoid CPU migration so that we restore CEEN on the correct
2756 * cpu later.
2758 * CEEN is cleared so that further CEs that our instruction and
2759 * data footprint induce do not cause use to either creep down
2760 * kernel stack to the point of overflow, or do so much CE
2761 * notification as to make little real forward progress.
2763 * NCEEN must not be cleared. However it is possible that
2764 * our accesses to the flt_addr may provoke a bus error or timeout
2765 * if the offending address has just been unconfigured as part of
2766 * a DR action. So we must operate under on_trap protection.
2768 kpreempt_disable();
2769 orig_err = get_error_enable();
2770 if (orig_err & EN_REG_CEEN)
2771 set_error_enable(orig_err & ~EN_REG_CEEN);
2774 * Our classification algorithm includes the line state before
2775 * the scrub; we'd like this captured after the detection and
2776 * before the algorithm below - the earlier the better.
2778 * If we've come from a cpu CE trap then this info already exists
2779 * in the cpu logout area.
2781 * For a CE detected by memscrub for which there was no trap
2782 * (running with CEEN off) cpu_log_and_clear_ce has called
2783 * cpu_ce_delayed_ec_logout to capture some cache data, and
2784 * marked the fault structure as incomplete as a flag to later
2785 * logging code.
2787 * If called directly from an IO detected CE there has been
2788 * no line data capture. In this case we logout to the cpu logout
2789 * area - that's appropriate since it's the cpu cache data we need
2790 * for classification. We thus borrow the cpu logout area for a
2791 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2792 * this time (we will invalidate it again below).
2794 * If called from the partner check xcall handler then this cpu
2795 * (the partner) has not necessarily experienced a CE at this
2796 * address. But we want to capture line state before its scrub
2797 * attempt since we use that in our classification.
2799 if (logout_tried == B_FALSE) {
2800 if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2801 disp |= CE_XDIAG_NOLOGOUT;
2805 * Scrub memory, then check AFSR for errors. The AFAR we scrub may
2806 * no longer be valid (if DR'd since the initial event) so we
2807 * perform this scrub under on_trap protection. If this access is
2808 * ok then further accesses below will also be ok - DR cannot
2809 * proceed while this thread is active (preemption is disabled);
2810 * to be safe we'll nonetheless use on_trap again below.
2812 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2813 cpu_scrubphys(ecc);
2814 } else {
2815 no_trap();
2816 if (orig_err & EN_REG_CEEN)
2817 set_error_enable(orig_err);
2818 kpreempt_enable();
2819 return (disp);
2821 no_trap();
2824 * Did the casx read of the scrub log a CE that matches the AFAR?
2825 * Note that it's quite possible that the read sourced the data from
2826 * another cpu.
2828 if (clear_ecc(ecc))
2829 disp |= CE_XDIAG_CE1;
2832 * Read the data again. This time the read is very likely to
2833 * come from memory since the scrub induced a writeback to memory.
2835 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2836 (void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2837 } else {
2838 no_trap();
2839 if (orig_err & EN_REG_CEEN)
2840 set_error_enable(orig_err);
2841 kpreempt_enable();
2842 return (disp);
2844 no_trap();
2846 /* Did that read induce a CE that matches the AFAR? */
2847 if (clear_ecc(ecc))
2848 disp |= CE_XDIAG_CE2;
2851 * Look at the logout information and record whether we found the
2852 * line in l2/l3 cache. For Panther we are interested in whether
2853 * we found it in either cache (it won't reside in both but
2854 * it is possible to read it that way given the moving target).
2856 clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2857 if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2858 clop->clo_data.chd_afar != LOGOUT_INVALID) {
2859 int hit, level;
2860 int state;
2861 int totalsize;
2862 ch_ec_data_t *ecp;
2865 * If hit is nonzero then a match was found and hit will
2866 * be one greater than the index which hit. For Panther we
2867 * also need to pay attention to level to see which of l2$ or
2868 * l3$ it hit in.
2870 hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2871 0, &level);
2873 if (hit) {
2874 --hit;
2875 disp |= CE_XDIAG_AFARMATCH;
2877 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2878 if (level == 2)
2879 ecp = &clop->clo_data.chd_l2_data[hit];
2880 else
2881 ecp = &clop->clo_data.chd_ec_data[hit];
2882 } else {
2883 ASSERT(level == 2);
2884 ecp = &clop->clo_data.chd_ec_data[hit];
2886 totalsize = cpunodes[CPU->cpu_id].ecache_size;
2887 state = cpu_ectag_pa_to_subblk_state(totalsize,
2888 ecc->flt_addr, ecp->ec_tag);
2891 * Cheetah variants use different state encodings -
2892 * the CH_ECSTATE_* defines vary depending on the
2893 * module we're compiled for. Translate into our
2894 * one true version. Conflate Owner-Shared state
2895 * of SSM mode with Owner as victimisation of such
2896 * lines may cause a writeback.
2898 switch (state) {
2899 case CH_ECSTATE_MOD:
2900 disp |= EC_STATE_M;
2901 break;
2903 case CH_ECSTATE_OWN:
2904 case CH_ECSTATE_OWS:
2905 disp |= EC_STATE_O;
2906 break;
2908 case CH_ECSTATE_EXL:
2909 disp |= EC_STATE_E;
2910 break;
2912 case CH_ECSTATE_SHR:
2913 disp |= EC_STATE_S;
2914 break;
2916 default:
2917 disp |= EC_STATE_I;
2918 break;
2923 * If we initiated the delayed logout then we are responsible
2924 * for invalidating the logout area.
2926 if (logout_tried == B_FALSE) {
2927 bzero(clop, sizeof (ch_cpu_logout_t));
2928 clop->clo_data.chd_afar = LOGOUT_INVALID;
2933 * Re-enable CEEN if we turned it off.
2935 if (orig_err & EN_REG_CEEN)
2936 set_error_enable(orig_err);
2937 kpreempt_enable();
2939 return (disp);
2943 * Scrub a correctable memory error and collect data for classification
2944 * of CE type. This function is called in the detection path, ie tl0 handling
2945 * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2947 void
2948 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2951 * Cheetah CE classification does not set any bits in flt_status.
2952 * Instead we will record classification datapoints in flt_disp.
2954 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2957 * To check if the error detected by IO is persistent, sticky or
2958 * intermittent. This is noticed by clear_ecc().
2960 if (ecc->flt_status & ECC_IOBUS)
2961 ecc->flt_stat = C_AFSR_MEMORY;
2964 * Record information from this first part of the algorithm in
2965 * flt_disp.
2967 ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2971 * Select a partner to perform a further CE classification check from.
2972 * Must be called with kernel preemption disabled (to stop the cpu list
2973 * from changing). The detecting cpu we are partnering has cpuid
2974 * aflt->flt_inst; we might not be running on the detecting cpu.
2976 * Restrict choice to active cpus in the same cpu partition as ourselves in
2977 * an effort to stop bad cpus in one partition causing other partitions to
2978 * perform excessive diagnostic activity. Actually since the errorq drain
2979 * is run from a softint most of the time and that is a global mechanism
2980 * this isolation is only partial. Return NULL if we fail to find a
2981 * suitable partner.
2983 * We prefer a partner that is in a different latency group to ourselves as
2984 * we will share fewer datapaths. If such a partner is unavailable then
2985 * choose one in the same lgroup but prefer a different chip and only allow
2986 * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and
2987 * flags includes PTNR_SELFOK then permit selection of the original detector.
2989 * We keep a cache of the last partner selected for a cpu, and we'll try to
2990 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2991 * have passed since that selection was made. This provides the benefit
2992 * of the point-of-view of different partners over time but without
2993 * requiring frequent cpu list traversals.
2996 #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */
2997 #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */
2999 static cpu_t *
3000 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
3002 cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
3003 hrtime_t lasttime, thistime;
3005 ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
3007 dtcr = cpu[aflt->flt_inst];
3010 * Short-circuit for the following cases:
3011 * . the dtcr is not flagged active
3012 * . there is just one cpu present
3013 * . the detector has disappeared
3014 * . we were given a bad flt_inst cpuid; this should not happen
3015 * (eg PCI code now fills flt_inst) but if it does it is no
3016 * reason to panic.
3017 * . there is just one cpu left online in the cpu partition
3019 * If we return NULL after this point then we do not update the
3020 * chpr_ceptnr_seltime which will cause us to perform a full lookup
3021 * again next time; this is the case where the only other cpu online
3022 * in the detector's partition is on the same chip as the detector
3023 * and since CEEN re-enable is throttled even that case should not
3024 * hurt performance.
3026 if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
3027 return (NULL);
3029 if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
3030 if (flags & PTNR_SELFOK) {
3031 *typep = CE_XDIAG_PTNR_SELF;
3032 return (dtcr);
3033 } else {
3034 return (NULL);
3038 thistime = gethrtime();
3039 lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
3042 * Select a starting point.
3044 if (!lasttime) {
3046 * We've never selected a partner for this detector before.
3047 * Start the scan at the next online cpu in the same cpu
3048 * partition.
3050 sp = dtcr->cpu_next_part;
3051 } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
3053 * Our last selection has not aged yet. If this partner:
3054 * . is still a valid cpu,
3055 * . is still in the same partition as the detector
3056 * . is still marked active
3057 * . satisfies the 'flags' argument criteria
3058 * then select it again without updating the timestamp.
3060 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3061 if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3062 !cpu_flagged_active(sp->cpu_flags) ||
3063 (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3064 (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3065 !(flags & PTNR_SIBLINGOK))) {
3066 sp = dtcr->cpu_next_part;
3067 } else {
3068 if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3069 *typep = CE_XDIAG_PTNR_REMOTE;
3070 } else if (sp == dtcr) {
3071 *typep = CE_XDIAG_PTNR_SELF;
3072 } else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3073 *typep = CE_XDIAG_PTNR_SIBLING;
3074 } else {
3075 *typep = CE_XDIAG_PTNR_LOCAL;
3077 return (sp);
3079 } else {
3081 * Our last selection has aged. If it is nonetheless still a
3082 * valid cpu then start the scan at the next cpu in the
3083 * partition after our last partner. If the last selection
3084 * is no longer a valid cpu then go with our default. In
3085 * this way we slowly cycle through possible partners to
3086 * obtain multiple viewpoints over time.
3088 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3089 if (sp == NULL) {
3090 sp = dtcr->cpu_next_part;
3091 } else {
3092 sp = sp->cpu_next_part; /* may be dtcr */
3093 if (sp->cpu_part != dtcr->cpu_part)
3094 sp = dtcr;
3099 * We have a proposed starting point for our search, but if this
3100 * cpu is offline then its cpu_next_part will point to itself
3101 * so we can't use that to iterate over cpus in this partition in
3102 * the loop below. We still want to avoid iterating over cpus not
3103 * in our partition, so in the case that our starting point is offline
3104 * we will repoint it to be the detector itself; and if the detector
3105 * happens to be offline we'll return NULL from the following loop.
3107 if (!cpu_flagged_active(sp->cpu_flags)) {
3108 sp = dtcr;
3111 ptnr = sp;
3112 locptnr = NULL;
3113 sibptnr = NULL;
3114 do {
3115 if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3116 continue;
3117 if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3118 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3119 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3120 *typep = CE_XDIAG_PTNR_REMOTE;
3121 return (ptnr);
3123 if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3124 if (sibptnr == NULL)
3125 sibptnr = ptnr;
3126 continue;
3128 if (locptnr == NULL)
3129 locptnr = ptnr;
3130 } while ((ptnr = ptnr->cpu_next_part) != sp);
3133 * A foreign partner has already been returned if one was available.
3135 * If locptnr is not NULL it is a cpu in the same lgroup as the
3136 * detector, is active, and is not a sibling of the detector.
3138 * If sibptnr is not NULL it is a sibling of the detector, and is
3139 * active.
3141 * If we have to resort to using the detector itself we have already
3142 * checked that it is active.
3144 if (locptnr) {
3145 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3146 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3147 *typep = CE_XDIAG_PTNR_LOCAL;
3148 return (locptnr);
3149 } else if (sibptnr && flags & PTNR_SIBLINGOK) {
3150 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3151 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3152 *typep = CE_XDIAG_PTNR_SIBLING;
3153 return (sibptnr);
3154 } else if (flags & PTNR_SELFOK) {
3155 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3156 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3157 *typep = CE_XDIAG_PTNR_SELF;
3158 return (dtcr);
3161 return (NULL);
3165 * Cross call handler that is requested to run on the designated partner of
3166 * a cpu that experienced a possibly sticky or possibly persistnet CE.
3168 static void
3169 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3171 *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3175 * The associated errorqs are never destroyed so we do not need to deal with
3176 * them disappearing before this timeout fires. If the affected memory
3177 * has been DR'd out since the original event the scrub algrithm will catch
3178 * any errors and return null disposition info. If the original detecting
3179 * cpu has been DR'd out then ereport detector info will not be able to
3180 * lookup CPU type; with a small timeout this is unlikely.
3182 static void
3183 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3185 struct async_flt *aflt = cbarg->lkycb_aflt;
3186 uchar_t disp;
3187 cpu_t *cp;
3188 int ptnrtype;
3190 kpreempt_disable();
3191 if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3192 &ptnrtype)) {
3193 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3194 (uint64_t)&disp);
3195 CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3196 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3197 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3198 } else {
3199 ce_xdiag_lkydrops++;
3200 if (ncpus > 1)
3201 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3202 CE_XDIAG_SKIP_NOPTNR);
3204 kpreempt_enable();
3206 errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3207 kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3211 * Called from errorq drain code when processing a CE error, both from
3212 * CPU and PCI drain functions. Decide what further classification actions,
3213 * if any, we will perform. Perform immediate actions now, and schedule
3214 * delayed actions as required. Note that we are no longer necessarily running
3215 * on the detecting cpu, and that the async_flt structure will not persist on
3216 * return from this function.
3218 * Calls to this function should aim to be self-throtlling in some way. With
3219 * the delayed re-enable of CEEN the absolute rate of calls should not
3220 * be excessive. Callers should also avoid performing in-depth classification
3221 * for events in pages that are already known to be suspect.
3223 * We return nonzero to indicate that the event has been copied and
3224 * recirculated for further testing. The caller should not log the event
3225 * in this case - it will be logged when further test results are available.
3227 * Our possible contexts are that of errorq_drain: below lock level or from
3228 * panic context. We can assume that the cpu we are running on is online.
3232 #ifdef DEBUG
3233 static int ce_xdiag_forceaction;
3234 #endif
3237 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3238 errorq_elem_t *eqep, size_t afltoffset)
3240 ce_dispact_t dispact, action;
3241 cpu_t *cp;
3242 uchar_t dtcrinfo, disp;
3243 int ptnrtype;
3245 if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3246 ce_xdiag_drops++;
3247 return (0);
3248 } else if (!aflt->flt_in_memory) {
3249 ce_xdiag_drops++;
3250 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3251 return (0);
3254 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3257 * Some correctable events are not scrubbed/classified, such as those
3258 * noticed at the tail of cpu_deferred_error. So if there is no
3259 * initial detector classification go no further.
3261 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3262 ce_xdiag_drops++;
3263 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3264 return (0);
3267 dispact = CE_DISPACT(ce_disp_table,
3268 CE_XDIAG_AFARMATCHED(dtcrinfo),
3269 CE_XDIAG_STATE(dtcrinfo),
3270 CE_XDIAG_CE1SEEN(dtcrinfo),
3271 CE_XDIAG_CE2SEEN(dtcrinfo));
3274 action = CE_ACT(dispact); /* bad lookup caught below */
3275 #ifdef DEBUG
3276 if (ce_xdiag_forceaction != 0)
3277 action = ce_xdiag_forceaction;
3278 #endif
3280 switch (action) {
3281 case CE_ACT_LKYCHK: {
3282 caddr_t ndata;
3283 errorq_elem_t *neqep;
3284 struct async_flt *ecc;
3285 ce_lkychk_cb_t *cbargp;
3287 if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3288 ce_xdiag_lkydrops++;
3289 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3290 CE_XDIAG_SKIP_DUPFAIL);
3291 break;
3293 ecc = (struct async_flt *)(ndata + afltoffset);
3295 ASSERT(ecc->flt_class == CPU_FAULT ||
3296 ecc->flt_class == BUS_FAULT);
3297 ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3298 RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3300 cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3301 cbargp->lkycb_aflt = ecc;
3302 cbargp->lkycb_eqp = eqp;
3303 cbargp->lkycb_eqep = neqep;
3305 (void) timeout((void (*)(void *))ce_lkychk_cb,
3306 (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3307 return (1);
3310 case CE_ACT_PTNRCHK:
3311 kpreempt_disable(); /* stop cpu list changing */
3312 if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3313 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3314 (uint64_t)aflt, (uint64_t)&disp);
3315 CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3316 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3317 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3318 } else if (ncpus > 1) {
3319 ce_xdiag_ptnrdrops++;
3320 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3321 CE_XDIAG_SKIP_NOPTNR);
3322 } else {
3323 ce_xdiag_ptnrdrops++;
3324 CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3325 CE_XDIAG_SKIP_UNIPROC);
3327 kpreempt_enable();
3328 break;
3330 case CE_ACT_DONE:
3331 break;
3333 case CE_ACT(CE_DISP_BAD):
3334 default:
3335 #ifdef DEBUG
3336 cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3337 #endif
3338 ce_xdiag_bad++;
3339 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3340 break;
3343 return (0);
3347 * We route all errors through a single switch statement.
3349 void
3350 cpu_ue_log_err(struct async_flt *aflt)
3352 switch (aflt->flt_class) {
3353 case CPU_FAULT:
3354 cpu_ereport_init(aflt);
3355 if (cpu_async_log_err(aflt, NULL))
3356 cpu_ereport_post(aflt);
3357 break;
3359 case BUS_FAULT:
3360 bus_async_log_err(aflt);
3361 break;
3363 default:
3364 cmn_err(CE_WARN, "discarding async error %p with invalid "
3365 "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3366 return;
3371 * Routine for panic hook callback from panic_idle().
3373 void
3374 cpu_async_panic_callb(void)
3376 ch_async_flt_t ch_flt;
3377 struct async_flt *aflt;
3378 ch_cpu_errors_t cpu_error_regs;
3379 uint64_t afsr_errs;
3381 get_cpu_error_state(&cpu_error_regs);
3383 afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3384 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3386 if (afsr_errs) {
3388 bzero(&ch_flt, sizeof (ch_async_flt_t));
3389 aflt = (struct async_flt *)&ch_flt;
3390 aflt->flt_id = gethrtime_waitfree();
3391 aflt->flt_bus_id = getprocessorid();
3392 aflt->flt_inst = CPU->cpu_id;
3393 aflt->flt_stat = cpu_error_regs.afsr;
3394 aflt->flt_addr = cpu_error_regs.afar;
3395 aflt->flt_prot = AFLT_PROT_NONE;
3396 aflt->flt_class = CPU_FAULT;
3397 aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3398 aflt->flt_panic = 1;
3399 ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3400 ch_flt.afsr_errs = afsr_errs;
3401 #if defined(SERRANO)
3402 ch_flt.afar2 = cpu_error_regs.afar2;
3403 #endif /* SERRANO */
3404 (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3409 * Routine to convert a syndrome into a syndrome code.
3411 static int
3412 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3414 if (synd_status == AFLT_STAT_INVALID)
3415 return (-1);
3418 * Use the syndrome to index the appropriate syndrome table,
3419 * to get the code indicating which bit(s) is(are) bad.
3421 if (afsr_bit &
3422 (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3423 if (afsr_bit & C_AFSR_MSYND_ERRS) {
3424 #if defined(JALAPENO) || defined(SERRANO)
3425 if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3426 return (-1);
3427 else
3428 return (BPAR0 + synd);
3429 #else /* JALAPENO || SERRANO */
3430 if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3431 return (-1);
3432 else
3433 return (mtag_syndrome_tab[synd]);
3434 #endif /* JALAPENO || SERRANO */
3435 } else {
3436 if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3437 return (-1);
3438 else
3439 return (ecc_syndrome_tab[synd]);
3441 } else {
3442 return (-1);
3447 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3449 if (&plat_get_mem_sid)
3450 return (plat_get_mem_sid(unum, buf, buflen, lenp));
3451 else
3452 return (ENOTSUP);
3456 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3458 if (&plat_get_mem_offset)
3459 return (plat_get_mem_offset(flt_addr, offp));
3460 else
3461 return (ENOTSUP);
3465 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3467 if (&plat_get_mem_addr)
3468 return (plat_get_mem_addr(unum, sid, offset, addrp));
3469 else
3470 return (ENOTSUP);
3474 * Routine to return a string identifying the physical name
3475 * associated with a memory/cache error.
3478 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3479 uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3480 ushort_t flt_status, char *buf, int buflen, int *lenp)
3482 int synd_code;
3483 int ret;
3486 * An AFSR of -1 defaults to a memory syndrome.
3488 if (flt_stat == (uint64_t)-1)
3489 flt_stat = C_AFSR_CE;
3491 synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3494 * Syndrome code must be either a single-bit error code
3495 * (0...143) or -1 for unum lookup.
3497 if (synd_code < 0 || synd_code >= M2)
3498 synd_code = -1;
3499 if (&plat_get_mem_unum) {
3500 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3501 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3502 buf[0] = '\0';
3503 *lenp = 0;
3506 return (ret);
3509 return (ENOTSUP);
3513 * Wrapper for cpu_get_mem_unum() routine that takes an
3514 * async_flt struct rather than explicit arguments.
3517 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3518 char *buf, int buflen, int *lenp)
3521 * If we come thru here for an IO bus error aflt->flt_stat will
3522 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3523 * so it will interpret this as a memory error.
3525 return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3526 (aflt->flt_class == BUS_FAULT) ?
3527 (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3528 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3529 aflt->flt_status, buf, buflen, lenp));
3533 * Return unum string given synd_code and async_flt into
3534 * the buf with size UNUM_NAMLEN
3536 static int
3537 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3539 int ret, len;
3542 * Syndrome code must be either a single-bit error code
3543 * (0...143) or -1 for unum lookup.
3545 if (synd_code < 0 || synd_code >= M2)
3546 synd_code = -1;
3547 if (&plat_get_mem_unum) {
3548 if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3549 aflt->flt_bus_id, aflt->flt_in_memory,
3550 aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3551 buf[0] = '\0';
3553 return (ret);
3556 buf[0] = '\0';
3557 return (ENOTSUP);
3561 * This routine is a more generic interface to cpu_get_mem_unum()
3562 * that may be used by other modules (e.g. the 'mm' driver, through
3563 * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3564 * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3567 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3568 char *buf, int buflen, int *lenp)
3570 int synd_status, flt_in_memory, ret;
3571 ushort_t flt_status = 0;
3572 char unum[UNUM_NAMLEN];
3573 uint64_t t_afsr_errs;
3576 * Check for an invalid address.
3578 if (afar == (uint64_t)-1)
3579 return (ENXIO);
3581 if (synd == (uint64_t)-1)
3582 synd_status = AFLT_STAT_INVALID;
3583 else
3584 synd_status = AFLT_STAT_VALID;
3586 flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3587 pf_is_memory(afar >> MMU_PAGESHIFT);
3590 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3592 if (*afsr == (uint64_t)-1)
3593 t_afsr_errs = C_AFSR_CE;
3594 else {
3595 t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3596 #if defined(CHEETAH_PLUS)
3597 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3598 t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3599 #endif /* CHEETAH_PLUS */
3603 * Turn on ECC_ECACHE if error type is E$ Data.
3605 if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3606 flt_status |= ECC_ECACHE;
3608 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3609 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3610 if (ret != 0)
3611 return (ret);
3613 if (*lenp >= buflen)
3614 return (ENAMETOOLONG);
3616 (void) strncpy(buf, unum, buflen);
3618 return (0);
3622 * Routine to return memory information associated
3623 * with a physical address and syndrome.
3626 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3627 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3628 int *segsp, int *banksp, int *mcidp)
3630 int synd_status, synd_code;
3632 if (afar == (uint64_t)-1)
3633 return (ENXIO);
3635 if (synd == (uint64_t)-1)
3636 synd_status = AFLT_STAT_INVALID;
3637 else
3638 synd_status = AFLT_STAT_VALID;
3640 synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3642 if (p2get_mem_info != NULL)
3643 return ((p2get_mem_info)(synd_code, afar,
3644 mem_sizep, seg_sizep, bank_sizep,
3645 segsp, banksp, mcidp));
3646 else
3647 return (ENOTSUP);
3651 * Routine to return a string identifying the physical
3652 * name associated with a cpuid.
3655 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3657 int ret;
3658 char unum[UNUM_NAMLEN];
3660 if (&plat_get_cpu_unum) {
3661 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3662 != 0)
3663 return (ret);
3664 } else {
3665 return (ENOTSUP);
3668 if (*lenp >= buflen)
3669 return (ENAMETOOLONG);
3671 (void) strncpy(buf, unum, buflen);
3673 return (0);
3677 * This routine exports the name buffer size.
3679 size_t
3680 cpu_get_name_bufsize()
3682 return (UNUM_NAMLEN);
3686 * Historical function, apparantly not used.
3688 /* ARGSUSED */
3689 void
3690 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3694 * Historical function only called for SBus errors in debugging.
3696 /*ARGSUSED*/
3697 void
3698 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3702 * Clear the AFSR sticky bits. The routine returns a non-zero value if
3703 * any of the AFSR's sticky errors are detected. If a non-null pointer to
3704 * an async fault structure argument is passed in, the captured error state
3705 * (AFSR, AFAR) info will be returned in the structure.
3708 clear_errors(ch_async_flt_t *ch_flt)
3710 struct async_flt *aflt = (struct async_flt *)ch_flt;
3711 ch_cpu_errors_t cpu_error_regs;
3713 get_cpu_error_state(&cpu_error_regs);
3715 if (ch_flt != NULL) {
3716 aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3717 aflt->flt_addr = cpu_error_regs.afar;
3718 ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3719 ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3720 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3721 #if defined(SERRANO)
3722 ch_flt->afar2 = cpu_error_regs.afar2;
3723 #endif /* SERRANO */
3726 set_cpu_error_state(&cpu_error_regs);
3728 return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3729 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3733 * Clear any AFSR error bits, and check for persistence.
3735 * It would be desirable to also insist that syndrome match. PCI handling
3736 * has already filled flt_synd. For errors trapped by CPU we only fill
3737 * flt_synd when we queue the event, so we do not have a valid flt_synd
3738 * during initial classification (it is valid if we're called as part of
3739 * subsequent low-pil additional classification attempts). We could try
3740 * to determine which syndrome to use: we know we're only called for
3741 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3742 * would be esynd/none and esynd/msynd, respectively. If that is
3743 * implemented then what do we do in the case that we do experience an
3744 * error on the same afar but with different syndrome? At the very least
3745 * we should count such occurences. Anyway, for now, we'll leave it as
3746 * it has been for ages.
3748 static int
3749 clear_ecc(struct async_flt *aflt)
3751 ch_cpu_errors_t cpu_error_regs;
3754 * Snapshot the AFSR and AFAR and clear any errors
3756 get_cpu_error_state(&cpu_error_regs);
3757 set_cpu_error_state(&cpu_error_regs);
3760 * If any of the same memory access error bits are still on and
3761 * the AFAR matches, return that the error is persistent.
3763 return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3764 cpu_error_regs.afar == aflt->flt_addr);
3768 * Turn off all cpu error detection, normally only used for panics.
3770 void
3771 cpu_disable_errors(void)
3773 xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3776 * With error detection now turned off, check the other cpus
3777 * logout areas for any unlogged errors.
3779 if (enable_check_other_cpus_logout) {
3780 cpu_check_other_cpus_logout();
3782 * Make a second pass over the logout areas, in case
3783 * there is a failing CPU in an error-trap loop which
3784 * will write to the logout area once it is emptied.
3786 cpu_check_other_cpus_logout();
3791 * Enable errors.
3793 void
3794 cpu_enable_errors(void)
3796 xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3800 * Flush the entire ecache using displacement flush by reading through a
3801 * physical address range twice as large as the Ecache.
3803 void
3804 cpu_flush_ecache(void)
3806 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3807 cpunodes[CPU->cpu_id].ecache_linesize);
3811 * Return CPU E$ set size - E$ size divided by the associativity.
3812 * We use this function in places where the CPU_PRIVATE ptr may not be
3813 * initialized yet. Note that for send_mondo and in the Ecache scrubber,
3814 * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set
3815 * up before the kernel switches from OBP's to the kernel's trap table, so
3816 * we don't have to worry about cpunodes being unitialized.
3819 cpu_ecache_set_size(struct cpu *cp)
3821 if (CPU_PRIVATE(cp))
3822 return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3824 return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3828 * Flush Ecache line.
3829 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3830 * Uses normal displacement flush for Cheetah.
3832 static void
3833 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3835 struct async_flt *aflt = (struct async_flt *)ch_flt;
3836 int ec_set_size = cpu_ecache_set_size(CPU);
3838 ecache_flush_line(aflt->flt_addr, ec_set_size);
3842 * Scrub physical address.
3843 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3844 * Ecache or direct-mapped Ecache.
3846 static void
3847 cpu_scrubphys(struct async_flt *aflt)
3849 int ec_set_size = cpu_ecache_set_size(CPU);
3851 scrubphys(aflt->flt_addr, ec_set_size);
3855 * Clear physical address.
3856 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3857 * Ecache or direct-mapped Ecache.
3859 void
3860 cpu_clearphys(struct async_flt *aflt)
3862 int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3863 int ec_set_size = cpu_ecache_set_size(CPU);
3866 clearphys(aflt->flt_addr, ec_set_size, lsize);
3869 #if defined(CPU_IMP_ECACHE_ASSOC)
3871 * Check for a matching valid line in all the sets.
3872 * If found, return set# + 1. Otherwise return 0.
3874 static int
3875 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3877 struct async_flt *aflt = (struct async_flt *)ch_flt;
3878 int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3879 int ec_set_size = cpu_ecache_set_size(CPU);
3880 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3881 int nway = cpu_ecache_nway();
3882 int i;
3884 for (i = 0; i < nway; i++, ecp++) {
3885 if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3886 (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3887 cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3888 return (i+1);
3890 return (0);
3892 #endif /* CPU_IMP_ECACHE_ASSOC */
3895 * Check whether a line in the given logout info matches the specified
3896 * fault address. If reqval is set then the line must not be Invalid.
3897 * Returns 0 on failure; on success (way + 1) is returned an *level is
3898 * set to 2 for l2$ or 3 for l3$.
3900 static int
3901 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3903 ch_diag_data_t *cdp = data;
3904 ch_ec_data_t *ecp;
3905 int totalsize, ec_set_size;
3906 int i, ways;
3907 int match = 0;
3908 int tagvalid;
3909 uint64_t addr, tagpa;
3910 int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3913 * Check the l2$ logout data
3915 if (ispanther) {
3916 ecp = &cdp->chd_l2_data[0];
3917 ec_set_size = PN_L2_SET_SIZE;
3918 ways = PN_L2_NWAYS;
3919 } else {
3920 ecp = &cdp->chd_ec_data[0];
3921 ec_set_size = cpu_ecache_set_size(CPU);
3922 ways = cpu_ecache_nway();
3923 totalsize = cpunodes[CPU->cpu_id].ecache_size;
3925 /* remove low order PA bits from fault address not used in PA tag */
3926 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3927 for (i = 0; i < ways; i++, ecp++) {
3928 if (ispanther) {
3929 tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3930 tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3931 } else {
3932 tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3933 tagvalid = !cpu_ectag_line_invalid(totalsize,
3934 ecp->ec_tag);
3936 if (tagpa == addr && (!reqval || tagvalid)) {
3937 match = i + 1;
3938 *level = 2;
3939 break;
3943 if (match || !ispanther)
3944 return (match);
3946 /* For Panther we also check the l3$ */
3947 ecp = &cdp->chd_ec_data[0];
3948 ec_set_size = PN_L3_SET_SIZE;
3949 ways = PN_L3_NWAYS;
3950 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3952 for (i = 0; i < ways; i++, ecp++) {
3953 if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3954 !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3955 match = i + 1;
3956 *level = 3;
3957 break;
3961 return (match);
3964 #if defined(CPU_IMP_L1_CACHE_PARITY)
3966 * Record information related to the source of an Dcache Parity Error.
3968 static void
3969 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3971 int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3972 int index;
3975 * Since instruction decode cannot be done at high PIL
3976 * just examine the entire Dcache to locate the error.
3978 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3979 ch_flt->parity_data.dpe.cpl_way = -1;
3980 ch_flt->parity_data.dpe.cpl_off = -1;
3982 for (index = 0; index < dc_set_size; index += dcache_linesize)
3983 cpu_dcache_parity_check(ch_flt, index);
3987 * Check all ways of the Dcache at a specified index for good parity.
3989 static void
3990 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3992 int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3993 uint64_t parity_bits, pbits, data_word;
3994 static int parity_bits_popc[] = { 0, 1, 1, 0 };
3995 int way, word, data_byte;
3996 ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3997 ch_dc_data_t tmp_dcp;
3999 for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
4001 * Perform diagnostic read.
4003 get_dcache_dtag(index + way * dc_set_size,
4004 (uint64_t *)&tmp_dcp);
4007 * Check tag for even parity.
4008 * Sum of 1 bits (including parity bit) should be even.
4010 if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
4012 * If this is the first error log detailed information
4013 * about it and check the snoop tag. Otherwise just
4014 * record the fact that we found another error.
4016 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4017 ch_flt->parity_data.dpe.cpl_way = way;
4018 ch_flt->parity_data.dpe.cpl_cache =
4019 CPU_DC_PARITY;
4020 ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
4022 if (popc64(tmp_dcp.dc_sntag &
4023 CHP_DCSNTAG_PARMASK) & 1) {
4024 ch_flt->parity_data.dpe.cpl_tag |=
4025 CHP_DC_SNTAG;
4026 ch_flt->parity_data.dpe.cpl_lcnt++;
4029 bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
4032 ch_flt->parity_data.dpe.cpl_lcnt++;
4035 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4037 * Panther has more parity bits than the other
4038 * processors for covering dcache data and so each
4039 * byte of data in each word has its own parity bit.
4041 parity_bits = tmp_dcp.dc_pn_data_parity;
4042 for (word = 0; word < 4; word++) {
4043 data_word = tmp_dcp.dc_data[word];
4044 pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
4045 for (data_byte = 0; data_byte < 8;
4046 data_byte++) {
4047 if (((popc64(data_word &
4048 PN_DC_DATA_PARITY_MASK)) & 1) ^
4049 (pbits & 1)) {
4050 cpu_record_dc_data_parity(
4051 ch_flt, dcp, &tmp_dcp, way,
4052 word);
4054 pbits >>= 1;
4055 data_word >>= 8;
4057 parity_bits >>= 8;
4059 } else {
4061 * Check data array for even parity.
4062 * The 8 parity bits are grouped into 4 pairs each
4063 * of which covers a 64-bit word. The endianness is
4064 * reversed -- the low-order parity bits cover the
4065 * high-order data words.
4067 parity_bits = tmp_dcp.dc_utag >> 8;
4068 for (word = 0; word < 4; word++) {
4069 pbits = (parity_bits >> (6 - word * 2)) & 3;
4070 if ((popc64(tmp_dcp.dc_data[word]) +
4071 parity_bits_popc[pbits]) & 1) {
4072 cpu_record_dc_data_parity(ch_flt, dcp,
4073 &tmp_dcp, way, word);
4080 static void
4081 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4082 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4085 * If this is the first error log detailed information about it.
4086 * Otherwise just record the fact that we found another error.
4088 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4089 ch_flt->parity_data.dpe.cpl_way = way;
4090 ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4091 ch_flt->parity_data.dpe.cpl_off = word * 8;
4092 bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4094 ch_flt->parity_data.dpe.cpl_lcnt++;
4098 * Record information related to the source of an Icache Parity Error.
4100 * Called with the Icache disabled so any diagnostic accesses are safe.
4102 static void
4103 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4105 int ic_set_size;
4106 int ic_linesize;
4107 int index;
4109 if (CPU_PRIVATE(CPU)) {
4110 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4111 CH_ICACHE_NWAY;
4112 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4113 } else {
4114 ic_set_size = icache_size / CH_ICACHE_NWAY;
4115 ic_linesize = icache_linesize;
4118 ch_flt->parity_data.ipe.cpl_way = -1;
4119 ch_flt->parity_data.ipe.cpl_off = -1;
4121 for (index = 0; index < ic_set_size; index += ic_linesize)
4122 cpu_icache_parity_check(ch_flt, index);
4126 * Check all ways of the Icache at a specified index for good parity.
4128 static void
4129 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4131 uint64_t parmask, pn_inst_parity;
4132 int ic_set_size;
4133 int ic_linesize;
4134 int flt_index, way, instr, num_instr;
4135 struct async_flt *aflt = (struct async_flt *)ch_flt;
4136 ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4137 ch_ic_data_t tmp_icp;
4139 if (CPU_PRIVATE(CPU)) {
4140 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4141 CH_ICACHE_NWAY;
4142 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4143 } else {
4144 ic_set_size = icache_size / CH_ICACHE_NWAY;
4145 ic_linesize = icache_linesize;
4149 * Panther has twice as many instructions per icache line and the
4150 * instruction parity bit is in a different location.
4152 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4153 num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4154 pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4155 } else {
4156 num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4157 pn_inst_parity = 0;
4161 * Index at which we expect to find the parity error.
4163 flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4165 for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4167 * Diagnostic reads expect address argument in ASI format.
4169 get_icache_dtag(2 * (index + way * ic_set_size),
4170 (uint64_t *)&tmp_icp);
4173 * If this is the index in which we expect to find the
4174 * error log detailed information about each of the ways.
4175 * This information will be displayed later if we can't
4176 * determine the exact way in which the error is located.
4178 if (flt_index == index)
4179 bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4182 * Check tag for even parity.
4183 * Sum of 1 bits (including parity bit) should be even.
4185 if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4187 * If this way is the one in which we expected
4188 * to find the error record the way and check the
4189 * snoop tag. Otherwise just record the fact we
4190 * found another error.
4192 if (flt_index == index) {
4193 ch_flt->parity_data.ipe.cpl_way = way;
4194 ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4196 if (popc64(tmp_icp.ic_sntag &
4197 CHP_ICSNTAG_PARMASK) & 1) {
4198 ch_flt->parity_data.ipe.cpl_tag |=
4199 CHP_IC_SNTAG;
4200 ch_flt->parity_data.ipe.cpl_lcnt++;
4204 ch_flt->parity_data.ipe.cpl_lcnt++;
4205 continue;
4209 * Check instruction data for even parity.
4210 * Bits participating in parity differ for PC-relative
4211 * versus non-PC-relative instructions.
4213 for (instr = 0; instr < num_instr; instr++) {
4214 parmask = (tmp_icp.ic_data[instr] &
4215 CH_ICDATA_PRED_ISPCREL) ?
4216 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4217 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4218 if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4220 * If this way is the one in which we expected
4221 * to find the error record the way and offset.
4222 * Otherwise just log the fact we found another
4223 * error.
4225 if (flt_index == index) {
4226 ch_flt->parity_data.ipe.cpl_way = way;
4227 ch_flt->parity_data.ipe.cpl_off =
4228 instr * 4;
4230 ch_flt->parity_data.ipe.cpl_lcnt++;
4231 continue;
4238 * Record information related to the source of an Pcache Parity Error.
4240 static void
4241 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4243 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4244 int index;
4247 * Since instruction decode cannot be done at high PIL just
4248 * examine the entire Pcache to check for any parity errors.
4250 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4251 ch_flt->parity_data.dpe.cpl_way = -1;
4252 ch_flt->parity_data.dpe.cpl_off = -1;
4254 for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4255 cpu_pcache_parity_check(ch_flt, index);
4259 * Check all ways of the Pcache at a specified index for good parity.
4261 static void
4262 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4264 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4265 int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4266 int way, word, pbit, parity_bits;
4267 ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4268 ch_pc_data_t tmp_pcp;
4270 for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4272 * Perform diagnostic read.
4274 get_pcache_dtag(index + way * pc_set_size,
4275 (uint64_t *)&tmp_pcp);
4277 * Check data array for odd parity. There are 8 parity
4278 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4279 * of those bits covers exactly 8 bytes of the data
4280 * array:
4282 * parity bit P$ data bytes covered
4283 * ---------- ---------------------
4284 * 50 63:56
4285 * 51 55:48
4286 * 52 47:40
4287 * 53 39:32
4288 * 54 31:24
4289 * 55 23:16
4290 * 56 15:8
4291 * 57 7:0
4293 parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4294 for (word = 0; word < pc_data_words; word++) {
4295 pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4296 if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4298 * If this is the first error log detailed
4299 * information about it. Otherwise just record
4300 * the fact that we found another error.
4302 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4303 ch_flt->parity_data.dpe.cpl_way = way;
4304 ch_flt->parity_data.dpe.cpl_cache =
4305 CPU_PC_PARITY;
4306 ch_flt->parity_data.dpe.cpl_off =
4307 word * sizeof (uint64_t);
4308 bcopy(&tmp_pcp, pcp,
4309 sizeof (ch_pc_data_t));
4311 ch_flt->parity_data.dpe.cpl_lcnt++;
4319 * Add L1 Data cache data to the ereport payload.
4321 static void
4322 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4324 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4325 ch_dc_data_t *dcp;
4326 ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4327 uint_t nelem;
4328 int i, ways_to_check, ways_logged = 0;
4331 * If this is an D$ fault then there may be multiple
4332 * ways captured in the ch_parity_log_t structure.
4333 * Otherwise, there will be at most one way captured
4334 * in the ch_diag_data_t struct.
4335 * Check each way to see if it should be encoded.
4337 if (ch_flt->flt_type == CPU_DC_PARITY)
4338 ways_to_check = CH_DCACHE_NWAY;
4339 else
4340 ways_to_check = 1;
4341 for (i = 0; i < ways_to_check; i++) {
4342 if (ch_flt->flt_type == CPU_DC_PARITY)
4343 dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4344 else
4345 dcp = &ch_flt->flt_diag_data.chd_dc_data;
4346 if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4347 bcopy(dcp, &dcdata[ways_logged],
4348 sizeof (ch_dc_data_t));
4349 ways_logged++;
4354 * Add the dcache data to the payload.
4356 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4357 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4358 if (ways_logged != 0) {
4359 nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4360 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4361 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4366 * Add L1 Instruction cache data to the ereport payload.
4368 static void
4369 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4371 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4372 ch_ic_data_t *icp;
4373 ch_ic_data_t icdata[CH_ICACHE_NWAY];
4374 uint_t nelem;
4375 int i, ways_to_check, ways_logged = 0;
4378 * If this is an I$ fault then there may be multiple
4379 * ways captured in the ch_parity_log_t structure.
4380 * Otherwise, there will be at most one way captured
4381 * in the ch_diag_data_t struct.
4382 * Check each way to see if it should be encoded.
4384 if (ch_flt->flt_type == CPU_IC_PARITY)
4385 ways_to_check = CH_ICACHE_NWAY;
4386 else
4387 ways_to_check = 1;
4388 for (i = 0; i < ways_to_check; i++) {
4389 if (ch_flt->flt_type == CPU_IC_PARITY)
4390 icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4391 else
4392 icp = &ch_flt->flt_diag_data.chd_ic_data;
4393 if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4394 bcopy(icp, &icdata[ways_logged],
4395 sizeof (ch_ic_data_t));
4396 ways_logged++;
4401 * Add the icache data to the payload.
4403 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4404 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4405 if (ways_logged != 0) {
4406 nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4407 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4408 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4412 #endif /* CPU_IMP_L1_CACHE_PARITY */
4415 * Add ecache data to payload.
4417 static void
4418 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4420 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4421 ch_ec_data_t *ecp;
4422 ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4423 uint_t nelem;
4424 int i, ways_logged = 0;
4427 * Check each way to see if it should be encoded
4428 * and concatinate it into a temporary buffer.
4430 for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4431 ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4432 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4433 bcopy(ecp, &ecdata[ways_logged],
4434 sizeof (ch_ec_data_t));
4435 ways_logged++;
4440 * Panther CPUs have an additional level of cache and so
4441 * what we just collected was the L3 (ecache) and not the
4442 * L2 cache.
4444 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4446 * Add the L3 (ecache) data to the payload.
4448 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4449 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4450 if (ways_logged != 0) {
4451 nelem = sizeof (ch_ec_data_t) /
4452 sizeof (uint64_t) * ways_logged;
4453 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4454 DATA_TYPE_UINT64_ARRAY, nelem,
4455 (uint64_t *)ecdata, NULL);
4459 * Now collect the L2 cache.
4461 ways_logged = 0;
4462 for (i = 0; i < PN_L2_NWAYS; i++) {
4463 ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4464 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4465 bcopy(ecp, &ecdata[ways_logged],
4466 sizeof (ch_ec_data_t));
4467 ways_logged++;
4473 * Add the L2 cache data to the payload.
4475 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4476 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4477 if (ways_logged != 0) {
4478 nelem = sizeof (ch_ec_data_t) /
4479 sizeof (uint64_t) * ways_logged;
4480 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4481 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL);
4486 * Initialize cpu scheme for specified cpu.
4488 static void
4489 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4491 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4492 uint8_t mask;
4494 mask = cpunodes[cpuid].version;
4495 (void) snprintf(sbuf, sizeof (sbuf), "%llX",
4496 (u_longlong_t)cpunodes[cpuid].device_id);
4497 (void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4498 cpuid, &mask, (const char *)sbuf);
4502 * Returns ereport resource type.
4504 static int
4505 cpu_error_to_resource_type(struct async_flt *aflt)
4507 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4509 switch (ch_flt->flt_type) {
4511 case CPU_CE_ECACHE:
4512 case CPU_UE_ECACHE:
4513 case CPU_UE_ECACHE_RETIRE:
4514 case CPU_ORPH:
4516 * If AFSR error bit indicates L2$ Data for Cheetah,
4517 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4518 * E$ Data type, otherwise, return CPU type.
4520 if (cpu_error_is_ecache_data(aflt->flt_inst,
4521 ch_flt->flt_bit))
4522 return (ERRTYPE_ECACHE_DATA);
4523 return (ERRTYPE_CPU);
4525 case CPU_CE:
4526 case CPU_UE:
4527 case CPU_EMC:
4528 case CPU_DUE:
4529 case CPU_RCE:
4530 case CPU_RUE:
4531 case CPU_FRC:
4532 case CPU_FRU:
4533 return (ERRTYPE_MEMORY);
4535 case CPU_IC_PARITY:
4536 case CPU_DC_PARITY:
4537 case CPU_FPUERR:
4538 case CPU_PC_PARITY:
4539 case CPU_ITLB_PARITY:
4540 case CPU_DTLB_PARITY:
4541 return (ERRTYPE_CPU);
4543 return (ERRTYPE_UNKNOWN);
4547 * Encode the data saved in the ch_async_flt_t struct into
4548 * the FM ereport payload.
4550 static void
4551 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4552 nvlist_t *resource, int *afar_status, int *synd_status)
4554 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4555 *synd_status = AFLT_STAT_INVALID;
4556 *afar_status = AFLT_STAT_INVALID;
4558 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4559 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4560 DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4563 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4564 IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4565 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4566 DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4569 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4570 *afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4571 ch_flt->flt_bit);
4572 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4573 DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4576 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4577 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4578 DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4581 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4582 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4583 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4586 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4587 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4588 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4591 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4592 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4593 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4596 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4597 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4598 DATA_TYPE_BOOLEAN_VALUE,
4599 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4602 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4603 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4604 DATA_TYPE_BOOLEAN_VALUE,
4605 (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4608 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4609 *synd_status = afsr_to_synd_status(aflt->flt_inst,
4610 ch_flt->afsr_errs, ch_flt->flt_bit);
4611 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4612 DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4615 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4616 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4617 DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4620 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4621 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4622 DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4625 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4626 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4627 DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4630 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4631 cpu_payload_add_ecache(aflt, payload);
4633 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4634 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4635 DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4638 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4639 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4640 DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4643 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4644 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4645 DATA_TYPE_UINT32_ARRAY, 16,
4646 (uint32_t *)&ch_flt->flt_fpdata, NULL);
4649 #if defined(CPU_IMP_L1_CACHE_PARITY)
4650 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4651 cpu_payload_add_dcache(aflt, payload);
4652 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4653 cpu_payload_add_icache(aflt, payload);
4654 #endif /* CPU_IMP_L1_CACHE_PARITY */
4656 #if defined(CHEETAH_PLUS)
4657 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4658 cpu_payload_add_pcache(aflt, payload);
4659 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4660 cpu_payload_add_tlb(aflt, payload);
4661 #endif /* CHEETAH_PLUS */
4663 * Create the FMRI that goes into the payload
4664 * and contains the unum info if necessary.
4666 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4667 char unum[UNUM_NAMLEN] = "";
4668 char sid[DIMM_SERIAL_ID_LEN] = "";
4669 int len, ret, rtype, synd_code;
4670 uint64_t offset = (uint64_t)-1;
4672 rtype = cpu_error_to_resource_type(aflt);
4673 switch (rtype) {
4675 case ERRTYPE_MEMORY:
4676 case ERRTYPE_ECACHE_DATA:
4679 * Memory errors, do unum lookup
4681 if (*afar_status == AFLT_STAT_INVALID)
4682 break;
4684 if (rtype == ERRTYPE_ECACHE_DATA)
4685 aflt->flt_status |= ECC_ECACHE;
4686 else
4687 aflt->flt_status &= ~ECC_ECACHE;
4689 synd_code = synd_to_synd_code(*synd_status,
4690 aflt->flt_synd, ch_flt->flt_bit);
4692 if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4693 break;
4695 ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4696 &len);
4698 if (ret == 0) {
4699 (void) cpu_get_mem_offset(aflt->flt_addr,
4700 &offset);
4703 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4704 NULL, unum, (ret == 0) ? sid : NULL, offset);
4705 fm_payload_set(payload,
4706 FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4707 DATA_TYPE_NVLIST, resource, NULL);
4708 break;
4710 case ERRTYPE_CPU:
4712 * On-board processor array error, add cpu resource.
4714 cpu_fmri_cpu_set(resource, aflt->flt_inst);
4715 fm_payload_set(payload,
4716 FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4717 DATA_TYPE_NVLIST, resource, NULL);
4718 break;
4724 * Initialize the way info if necessary.
4726 void
4727 cpu_ereport_init(struct async_flt *aflt)
4729 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4730 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4731 ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4732 int i;
4735 * Initialize the info in the CPU logout structure.
4736 * The I$/D$ way information is not initialized here
4737 * since it is captured in the logout assembly code.
4739 for (i = 0; i < CHD_EC_DATA_SETS; i++)
4740 (ecp + i)->ec_way = i;
4742 for (i = 0; i < PN_L2_NWAYS; i++)
4743 (l2p + i)->ec_way = i;
4747 * Returns whether fault address is valid for this error bit and
4748 * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4751 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4753 struct async_flt *aflt = (struct async_flt *)ch_flt;
4755 return ((t_afsr_bit & C_AFSR_MEMORY) &&
4756 afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4757 AFLT_STAT_VALID &&
4758 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4762 * Returns whether fault address is valid based on the error bit for the
4763 * one event being queued and whether the address is "in memory".
4765 static int
4766 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4768 struct async_flt *aflt = (struct async_flt *)ch_flt;
4769 int afar_status;
4770 uint64_t afsr_errs, afsr_ow, *ow_bits;
4772 if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4773 !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4774 return (0);
4776 afsr_errs = ch_flt->afsr_errs;
4777 afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4779 switch (afar_status) {
4780 case AFLT_STAT_VALID:
4781 return (1);
4783 case AFLT_STAT_AMBIGUOUS:
4785 * Status is ambiguous since another error bit (or bits)
4786 * of equal priority to the specified bit on in the afsr,
4787 * so check those bits. Return 1 only if the bits on in the
4788 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4789 * Otherwise not all the equal priority bits are for memory
4790 * errors, so return 0.
4792 ow_bits = afar_overwrite;
4793 while ((afsr_ow = *ow_bits++) != 0) {
4795 * Get other bits that are on in t_afsr_bit's priority
4796 * class to check for Memory Error bits only.
4798 if (afsr_ow & t_afsr_bit) {
4799 if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4800 return (0);
4801 else
4802 return (1);
4805 /*FALLTHRU*/
4807 default:
4808 return (0);
4812 static void
4813 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4815 struct async_flt *aflt = (struct async_flt *)ch_flt;
4816 ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4817 ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4818 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4819 #if defined(CPU_IMP_ECACHE_ASSOC)
4820 int i, nway;
4821 #endif /* CPU_IMP_ECACHE_ASSOC */
4824 * Check if the CPU log out captured was valid.
4826 if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4827 ch_flt->flt_data_incomplete)
4828 return;
4830 #if defined(CPU_IMP_ECACHE_ASSOC)
4831 nway = cpu_ecache_nway();
4832 i = cpu_ecache_line_valid(ch_flt);
4833 if (i == 0 || i > nway) {
4834 for (i = 0; i < nway; i++)
4835 ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4836 } else
4837 ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4838 #else /* CPU_IMP_ECACHE_ASSOC */
4839 ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4840 #endif /* CPU_IMP_ECACHE_ASSOC */
4842 #if defined(CHEETAH_PLUS)
4843 pn_cpu_log_diag_l2_info(ch_flt);
4844 #endif /* CHEETAH_PLUS */
4846 if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4847 dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4848 dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4851 if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4852 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4853 icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4854 else
4855 icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4856 icp->ic_logflag = IC_LOGFLAG_MAGIC;
4861 * Cheetah ECC calculation.
4863 * We only need to do the calculation on the data bits and can ignore check
4864 * bit and Mtag bit terms in the calculation.
4866 static uint64_t ch_ecc_table[9][2] = {
4868 * low order 64-bits high-order 64-bits
4870 { 0x46bffffeccd1177f, 0x488800022100014c },
4871 { 0x42fccc81331ff77f, 0x14424f1010249184 },
4872 { 0x8898827c222f1ffe, 0x22c1222808184aaf },
4873 { 0xf7632203e131ccf1, 0xe1241121848292b8 },
4874 { 0x7f5511421b113809, 0x901c88d84288aafe },
4875 { 0x1d49412184882487, 0x8f338c87c044c6ef },
4876 { 0xf552181014448344, 0x7ff8f4443e411911 },
4877 { 0x2189240808f24228, 0xfeeff8cc81333f42 },
4878 { 0x3280008440001112, 0xfee88b337ffffd62 },
4882 * 64-bit population count, use well-known popcnt trick.
4883 * We could use the UltraSPARC V9 POPC instruction, but some
4884 * CPUs including Cheetahplus and Jaguar do not support that
4885 * instruction.
4888 popc64(uint64_t val)
4890 int cnt;
4892 for (cnt = 0; val != 0; val &= val - 1)
4893 cnt++;
4894 return (cnt);
4898 * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4899 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4900 * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4901 * instead of doing all the xor's.
4903 uint32_t
4904 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4906 int bitno, s;
4907 int synd = 0;
4909 for (bitno = 0; bitno < 9; bitno++) {
4910 s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4911 popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4912 synd |= (s << bitno);
4914 return (synd);
4919 * Queue one event based on ecc_type_to_info entry. If the event has an AFT1
4920 * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4921 * the UE event queue. Otherwise it is dispatched to the CE event queue.
4923 static void
4924 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4925 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4927 struct async_flt *aflt = (struct async_flt *)ch_flt;
4929 if (reason &&
4930 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4931 (void) strcat(reason, eccp->ec_reason);
4934 ch_flt->flt_bit = eccp->ec_afsr_bit;
4935 ch_flt->flt_type = eccp->ec_flt_type;
4936 if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4937 ch_flt->flt_diag_data = *cdp;
4938 else
4939 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4940 aflt->flt_in_memory =
4941 cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4943 if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4944 aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4945 else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4946 aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4947 else
4948 aflt->flt_synd = 0;
4950 aflt->flt_payload = eccp->ec_err_payload;
4952 if (aflt->flt_panic || (eccp->ec_afsr_bit &
4953 (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4954 cpu_errorq_dispatch(eccp->ec_err_class,
4955 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4956 aflt->flt_panic);
4957 else
4958 cpu_errorq_dispatch(eccp->ec_err_class,
4959 (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4960 aflt->flt_panic);
4964 * Queue events on async event queue one event per error bit. First we
4965 * queue the events that we "expect" for the given trap, then we queue events
4966 * that we may not expect. Return number of events queued.
4969 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4970 ch_cpu_logout_t *clop)
4972 struct async_flt *aflt = (struct async_flt *)ch_flt;
4973 ecc_type_to_info_t *eccp;
4974 int nevents = 0;
4975 uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4976 #if defined(CHEETAH_PLUS)
4977 uint64_t orig_t_afsr_errs;
4978 #endif
4979 uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4980 uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4981 ch_diag_data_t *cdp = NULL;
4983 t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4985 #if defined(CHEETAH_PLUS)
4986 orig_t_afsr_errs = t_afsr_errs;
4989 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4991 if (clop != NULL) {
4993 * Set the AFSR and AFAR fields to the shadow registers. The
4994 * flt_addr and flt_stat fields will be reset to the primaries
4995 * below, but the sdw_addr and sdw_stat will stay as the
4996 * secondaries.
4998 cdp = &clop->clo_sdw_data;
4999 aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
5000 aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
5001 ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
5002 ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5003 (cdp->chd_afsr & C_AFSR_ALL_ERRS);
5006 * If the primary and shadow AFSR differ, tag the shadow as
5007 * the first fault.
5009 if ((primary_afar != cdp->chd_afar) ||
5010 (primary_afsr_errs != ch_flt->afsr_errs)) {
5011 aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
5015 * Check AFSR bits as well as AFSR_EXT bits in order of
5016 * the AFAR overwrite priority. Our stored AFSR_EXT value
5017 * is expected to be zero for those CPUs which do not have
5018 * an AFSR_EXT register.
5020 for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
5021 if ((eccp->ec_afsr_bit &
5022 (ch_flt->afsr_errs & t_afsr_errs)) &&
5023 ((eccp->ec_flags & aflt->flt_status) != 0)) {
5024 cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5025 cdp = NULL;
5026 t_afsr_errs &= ~eccp->ec_afsr_bit;
5027 nevents++;
5032 * If the ME bit is on in the primary AFSR turn all the
5033 * error bits on again that may set the ME bit to make
5034 * sure we see the ME AFSR error logs.
5036 if ((primary_afsr & C_AFSR_ME) != 0)
5037 t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
5039 #endif /* CHEETAH_PLUS */
5041 if (clop != NULL)
5042 cdp = &clop->clo_data;
5045 * Queue expected errors, error bit and fault type must match
5046 * in the ecc_type_to_info table.
5048 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5049 eccp++) {
5050 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
5051 (eccp->ec_flags & aflt->flt_status) != 0) {
5052 #if defined(SERRANO)
5054 * For FRC/FRU errors on Serrano the afar2 captures
5055 * the address and the associated data is
5056 * in the shadow logout area.
5058 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) {
5059 if (clop != NULL)
5060 cdp = &clop->clo_sdw_data;
5061 aflt->flt_addr = ch_flt->afar2;
5062 } else {
5063 if (clop != NULL)
5064 cdp = &clop->clo_data;
5065 aflt->flt_addr = primary_afar;
5067 #else /* SERRANO */
5068 aflt->flt_addr = primary_afar;
5069 #endif /* SERRANO */
5070 aflt->flt_stat = primary_afsr;
5071 ch_flt->afsr_ext = primary_afsr_ext;
5072 ch_flt->afsr_errs = primary_afsr_errs;
5073 cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5074 cdp = NULL;
5075 t_afsr_errs &= ~eccp->ec_afsr_bit;
5076 nevents++;
5081 * Queue unexpected errors, error bit only match.
5083 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5084 eccp++) {
5085 if (eccp->ec_afsr_bit & t_afsr_errs) {
5086 #if defined(SERRANO)
5088 * For FRC/FRU errors on Serrano the afar2 captures
5089 * the address and the associated data is
5090 * in the shadow logout area.
5092 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) {
5093 if (clop != NULL)
5094 cdp = &clop->clo_sdw_data;
5095 aflt->flt_addr = ch_flt->afar2;
5096 } else {
5097 if (clop != NULL)
5098 cdp = &clop->clo_data;
5099 aflt->flt_addr = primary_afar;
5101 #else /* SERRANO */
5102 aflt->flt_addr = primary_afar;
5103 #endif /* SERRANO */
5104 aflt->flt_stat = primary_afsr;
5105 ch_flt->afsr_ext = primary_afsr_ext;
5106 ch_flt->afsr_errs = primary_afsr_errs;
5107 cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5108 cdp = NULL;
5109 t_afsr_errs &= ~eccp->ec_afsr_bit;
5110 nevents++;
5113 return (nevents);
5117 * Return trap type number.
5119 uint8_t
5120 flt_to_trap_type(struct async_flt *aflt)
5122 if (aflt->flt_status & ECC_I_TRAP)
5123 return (TRAP_TYPE_ECC_I);
5124 if (aflt->flt_status & ECC_D_TRAP)
5125 return (TRAP_TYPE_ECC_D);
5126 if (aflt->flt_status & ECC_F_TRAP)
5127 return (TRAP_TYPE_ECC_F);
5128 if (aflt->flt_status & ECC_C_TRAP)
5129 return (TRAP_TYPE_ECC_C);
5130 if (aflt->flt_status & ECC_DP_TRAP)
5131 return (TRAP_TYPE_ECC_DP);
5132 if (aflt->flt_status & ECC_IP_TRAP)
5133 return (TRAP_TYPE_ECC_IP);
5134 if (aflt->flt_status & ECC_ITLB_TRAP)
5135 return (TRAP_TYPE_ECC_ITLB);
5136 if (aflt->flt_status & ECC_DTLB_TRAP)
5137 return (TRAP_TYPE_ECC_DTLB);
5138 return (TRAP_TYPE_UNKNOWN);
5142 * Decide an error type based on detector and leaky/partner tests.
5143 * The following array is used for quick translation - it must
5144 * stay in sync with ce_dispact_t.
5147 static char *cetypes[] = {
5148 CE_DISP_DESC_U,
5149 CE_DISP_DESC_I,
5150 CE_DISP_DESC_PP,
5151 CE_DISP_DESC_P,
5152 CE_DISP_DESC_L,
5153 CE_DISP_DESC_PS,
5154 CE_DISP_DESC_S
5157 char *
5158 flt_to_error_type(struct async_flt *aflt)
5160 ce_dispact_t dispact, disp;
5161 uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5164 * The memory payload bundle is shared by some events that do
5165 * not perform any classification. For those flt_disp will be
5166 * 0 and we will return "unknown".
5168 if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5169 return (cetypes[CE_DISP_UNKNOWN]);
5171 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5174 * It is also possible that no scrub/classification was performed
5175 * by the detector, for instance where a disrupting error logged
5176 * in the AFSR while CEEN was off in cpu_deferred_error.
5178 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5179 return (cetypes[CE_DISP_UNKNOWN]);
5182 * Lookup type in initial classification/action table
5184 dispact = CE_DISPACT(ce_disp_table,
5185 CE_XDIAG_AFARMATCHED(dtcrinfo),
5186 CE_XDIAG_STATE(dtcrinfo),
5187 CE_XDIAG_CE1SEEN(dtcrinfo),
5188 CE_XDIAG_CE2SEEN(dtcrinfo));
5191 * A bad lookup is not something to panic production systems for.
5193 ASSERT(dispact != CE_DISP_BAD);
5194 if (dispact == CE_DISP_BAD)
5195 return (cetypes[CE_DISP_UNKNOWN]);
5197 disp = CE_DISP(dispact);
5199 switch (disp) {
5200 case CE_DISP_UNKNOWN:
5201 case CE_DISP_INTERMITTENT:
5202 break;
5204 case CE_DISP_POSS_PERS:
5206 * "Possible persistent" errors to which we have applied a valid
5207 * leaky test can be separated into "persistent" or "leaky".
5209 lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5210 if (CE_XDIAG_TESTVALID(lkyinfo)) {
5211 if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5212 CE_XDIAG_CE2SEEN(lkyinfo))
5213 disp = CE_DISP_LEAKY;
5214 else
5215 disp = CE_DISP_PERS;
5217 break;
5219 case CE_DISP_POSS_STICKY:
5221 * Promote "possible sticky" results that have been
5222 * confirmed by a partner test to "sticky". Unconfirmed
5223 * "possible sticky" events are left at that status - we do not
5224 * guess at any bad reader/writer etc status here.
5226 ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5227 if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5228 CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5229 disp = CE_DISP_STICKY;
5232 * Promote "possible sticky" results on a uniprocessor
5233 * to "sticky"
5235 if (disp == CE_DISP_POSS_STICKY &&
5236 CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5237 disp = CE_DISP_STICKY;
5238 break;
5240 default:
5241 disp = CE_DISP_UNKNOWN;
5242 break;
5245 return (cetypes[disp]);
5249 * Given the entire afsr, the specific bit to check and a prioritized list of
5250 * error bits, determine the validity of the various overwrite priority
5251 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5252 * different overwrite priorities.
5254 * Given a specific afsr error bit and the entire afsr, there are three cases:
5255 * INVALID: The specified bit is lower overwrite priority than some other
5256 * error bit which is on in the afsr (or IVU/IVC).
5257 * VALID: The specified bit is higher priority than all other error bits
5258 * which are on in the afsr.
5259 * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5260 * bit is on in the afsr.
5263 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5265 uint64_t afsr_ow;
5267 while ((afsr_ow = *ow_bits++) != 0) {
5269 * If bit is in the priority class, check to see if another
5270 * bit in the same class is on => ambiguous. Otherwise,
5271 * the value is valid. If the bit is not on at this priority
5272 * class, but a higher priority bit is on, then the value is
5273 * invalid.
5275 if (afsr_ow & afsr_bit) {
5277 * If equal pri bit is on, ambiguous.
5279 if (afsr & (afsr_ow & ~afsr_bit))
5280 return (AFLT_STAT_AMBIGUOUS);
5281 return (AFLT_STAT_VALID);
5282 } else if (afsr & afsr_ow)
5283 break;
5287 * We didn't find a match or a higher priority bit was on. Not
5288 * finding a match handles the case of invalid AFAR for IVC, IVU.
5290 return (AFLT_STAT_INVALID);
5293 static int
5294 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5296 #if defined(SERRANO)
5297 if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5298 return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5299 else
5300 #endif /* SERRANO */
5301 return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5304 static int
5305 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5307 return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5310 static int
5311 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5313 return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5316 static int
5317 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5319 #if defined(CHEETAH_PLUS)
5321 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5322 * policy for Cheetah+ and separate for Panther CPUs.
5324 if (afsr_bit & C_AFSR_MSYND_ERRS) {
5325 if (IS_PANTHER(cpunodes[cpuid].implementation))
5326 return (afsr_to_msynd_status(afsr, afsr_bit));
5327 else
5328 return (afsr_to_esynd_status(afsr, afsr_bit));
5329 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5330 if (IS_PANTHER(cpunodes[cpuid].implementation))
5331 return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5332 else
5333 return (afsr_to_esynd_status(afsr, afsr_bit));
5334 #else /* CHEETAH_PLUS */
5335 if (afsr_bit & C_AFSR_MSYND_ERRS) {
5336 return (afsr_to_msynd_status(afsr, afsr_bit));
5337 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5338 return (afsr_to_esynd_status(afsr, afsr_bit));
5339 #endif /* CHEETAH_PLUS */
5340 } else {
5341 return (AFLT_STAT_INVALID);
5346 * Slave CPU stick synchronization.
5348 void
5349 sticksync_slave(void)
5351 int i;
5352 int tries = 0;
5353 int64_t tskew;
5354 int64_t av_tskew;
5356 kpreempt_disable();
5357 /* wait for the master side */
5358 while (stick_sync_cmd != SLAVE_START)
5361 * Synchronization should only take a few tries at most. But in the
5362 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5363 * without it's stick synchronized wouldn't be a good citizen.
5365 while (slave_done == 0) {
5367 * Time skew calculation.
5369 av_tskew = tskew = 0;
5371 for (i = 0; i < stick_iter; i++) {
5372 /* make location hot */
5373 timestamp[EV_A_START] = 0;
5374 stick_timestamp(&timestamp[EV_A_START]);
5376 /* tell the master we're ready */
5377 stick_sync_cmd = MASTER_START;
5379 /* and wait */
5380 while (stick_sync_cmd != SLAVE_CONT)
5382 /* Event B end */
5383 stick_timestamp(&timestamp[EV_B_END]);
5385 /* calculate time skew */
5386 tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5387 - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5388 / 2;
5390 /* keep running count */
5391 av_tskew += tskew;
5392 } /* for */
5395 * Adjust stick for time skew if not within the max allowed;
5396 * otherwise we're all done.
5398 if (stick_iter != 0)
5399 av_tskew = av_tskew/stick_iter;
5400 if (ABS(av_tskew) > stick_tsk) {
5402 * If the skew is 1 (the slave's STICK register
5403 * is 1 STICK ahead of the master's), stick_adj
5404 * could fail to adjust the slave's STICK register
5405 * if the STICK read on the slave happens to
5406 * align with the increment of the STICK.
5407 * Therefore, we increment the skew to 2.
5409 if (av_tskew == 1)
5410 av_tskew++;
5411 stick_adj(-av_tskew);
5412 } else
5413 slave_done = 1;
5414 #ifdef DEBUG
5415 if (tries < DSYNC_ATTEMPTS)
5416 stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5417 av_tskew;
5418 ++tries;
5419 #endif /* DEBUG */
5421 } /* while */
5423 /* allow the master to finish */
5424 stick_sync_cmd = EVENT_NULL;
5425 kpreempt_enable();
5429 * Master CPU side of stick synchronization.
5430 * - timestamp end of Event A
5431 * - timestamp beginning of Event B
5433 void
5434 sticksync_master(void)
5436 int i;
5438 kpreempt_disable();
5439 /* tell the slave we've started */
5440 slave_done = 0;
5441 stick_sync_cmd = SLAVE_START;
5443 while (slave_done == 0) {
5444 for (i = 0; i < stick_iter; i++) {
5445 /* wait for the slave */
5446 while (stick_sync_cmd != MASTER_START)
5448 /* Event A end */
5449 stick_timestamp(&timestamp[EV_A_END]);
5451 /* make location hot */
5452 timestamp[EV_B_START] = 0;
5453 stick_timestamp(&timestamp[EV_B_START]);
5455 /* tell the slave to continue */
5456 stick_sync_cmd = SLAVE_CONT;
5457 } /* for */
5459 /* wait while slave calculates time skew */
5460 while (stick_sync_cmd == SLAVE_CONT)
5462 } /* while */
5463 kpreempt_enable();
5467 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5468 * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also,
5469 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5470 * panic idle.
5472 /*ARGSUSED*/
5473 void
5474 cpu_check_allcpus(struct async_flt *aflt)
5477 struct kmem_cache *ch_private_cache;
5480 * Cpu private unitialization. Uninitialize the Ecache scrubber and
5481 * deallocate the scrubber data structures and cpu_private data structure.
5483 void
5484 cpu_uninit_private(struct cpu *cp)
5486 cheetah_private_t *chprp = CPU_PRIVATE(cp);
5488 ASSERT(chprp);
5489 cpu_uninit_ecache_scrub_dr(cp);
5490 CPU_PRIVATE(cp) = NULL;
5491 ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5492 kmem_cache_free(ch_private_cache, chprp);
5493 cmp_delete_cpu(cp->cpu_id);
5498 * Cheetah Cache Scrubbing
5500 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5501 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5502 * protected by either parity or ECC.
5504 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5505 * cache per second). Due to the the specifics of how the I$ control
5506 * logic works with respect to the ASI used to scrub I$ lines, the entire
5507 * I$ is scanned at once.
5511 * Tuneables to enable and disable the scrubbing of the caches, and to tune
5512 * scrubbing behavior. These may be changed via /etc/system or using mdb
5513 * on a running system.
5515 int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */
5518 * The following are the PIL levels that the softints/cross traps will fire at.
5520 uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */
5521 uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */
5522 uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */
5524 #if defined(JALAPENO)
5527 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5528 * on Jalapeno.
5530 int ecache_scrub_enable = 0;
5532 #else /* JALAPENO */
5535 * With all other cpu types, E$ scrubbing is on by default
5537 int ecache_scrub_enable = 1;
5539 #endif /* JALAPENO */
5542 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5545 * The I$ scrubber tends to cause latency problems for real-time SW, so it
5546 * is disabled by default on non-Cheetah systems
5548 int icache_scrub_enable = 0;
5551 * Tuneables specifying the scrub calls per second and the scan rate
5552 * for each cache
5554 * The cyclic times are set during boot based on the following values.
5555 * Changing these values in mdb after this time will have no effect. If
5556 * a different value is desired, it must be set in /etc/system before a
5557 * reboot.
5559 int ecache_calls_a_sec = 1;
5560 int dcache_calls_a_sec = 2;
5561 int icache_calls_a_sec = 2;
5563 int ecache_scan_rate_idle = 1;
5564 int ecache_scan_rate_busy = 1;
5565 int dcache_scan_rate_idle = 1;
5566 int dcache_scan_rate_busy = 1;
5567 int icache_scan_rate_idle = 1;
5568 int icache_scan_rate_busy = 1;
5570 #else /* CHEETAH_PLUS || JALAPENO || SERRANO */
5572 int icache_scrub_enable = 1; /* I$ scrubbing is on by default */
5574 int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */
5575 int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */
5576 int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */
5578 int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */
5579 int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */
5580 int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */
5581 int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */
5582 int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */
5583 int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */
5585 #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */
5588 * In order to scrub on offline cpus, a cross trap is sent. The handler will
5589 * increment the outstanding request counter and schedule a softint to run
5590 * the scrubber.
5592 extern xcfunc_t cache_scrubreq_tl1;
5595 * These are the softint functions for each cache scrubber
5597 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5598 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5599 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5602 * The cache scrub info table contains cache specific information
5603 * and allows for some of the scrub code to be table driven, reducing
5604 * duplication of cache similar code.
5606 * This table keeps a copy of the value in the calls per second variable
5607 * (?cache_calls_a_sec). This makes it much more difficult for someone
5608 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5609 * mdb in a misguided attempt to disable the scrubber).
5611 struct scrub_info {
5612 int *csi_enable; /* scrubber enable flag */
5613 int csi_freq; /* scrubber calls per second */
5614 int csi_index; /* index to chsm_outstanding[] */
5615 uint64_t csi_inum; /* scrubber interrupt number */
5616 cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */
5617 cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */
5618 char csi_name[3]; /* cache name for this scrub entry */
5619 } cache_scrub_info[] = {
5620 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5621 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5622 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5626 * If scrubbing is enabled, increment the outstanding request counter. If it
5627 * is 1 (meaning there were no previous requests outstanding), call
5628 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5629 * a self trap.
5631 static void
5632 do_scrub(struct scrub_info *csi)
5634 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5635 int index = csi->csi_index;
5636 uint32_t *outstanding = &csmp->chsm_outstanding[index];
5638 if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5639 if (atomic_inc_32_nv(outstanding) == 1) {
5640 xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5641 csi->csi_inum, 0);
5647 * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5648 * cross-trap the offline cpus.
5650 static void
5651 do_scrub_offline(struct scrub_info *csi)
5653 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5655 if (CPUSET_ISNULL(cpu_offline_set)) {
5657 * No offline cpus - nothing to do
5659 return;
5662 if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5663 xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5664 csi->csi_index);
5669 * This is the initial setup for the scrubber cyclics - it sets the
5670 * interrupt level, frequency, and function to call.
5672 /*ARGSUSED*/
5673 static void
5674 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5675 cyc_time_t *when)
5677 struct scrub_info *csi = (struct scrub_info *)arg;
5679 ASSERT(csi != NULL);
5680 hdlr->cyh_func = (cyc_func_t)do_scrub;
5681 hdlr->cyh_level = CY_LOW_LEVEL;
5682 hdlr->cyh_arg = arg;
5684 when->cyt_when = 0; /* Start immediately */
5685 when->cyt_interval = NANOSEC / csi->csi_freq;
5689 * Initialization for cache scrubbing.
5690 * This routine is called AFTER all cpus have had cpu_init_private called
5691 * to initialize their private data areas.
5693 void
5694 cpu_init_cache_scrub(void)
5696 int i;
5697 struct scrub_info *csi;
5698 cyc_omni_handler_t omni_hdlr;
5699 cyc_handler_t offline_hdlr;
5700 cyc_time_t when;
5703 * save away the maximum number of lines for the D$
5705 dcache_nlines = dcache_size / dcache_linesize;
5708 * register the softints for the cache scrubbing
5710 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5711 add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5712 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5713 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5715 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5716 add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5717 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5718 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5720 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5721 add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5722 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5723 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5726 * start the scrubbing for all the caches
5728 mutex_enter(&cpu_lock);
5729 for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5731 csi = &cache_scrub_info[i];
5733 if (!(*csi->csi_enable))
5734 continue;
5737 * force the following to be true:
5738 * 1 <= calls_a_sec <= hz
5740 if (csi->csi_freq > hz) {
5741 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5742 "(%d); resetting to hz (%d)", csi->csi_name,
5743 csi->csi_freq, hz);
5744 csi->csi_freq = hz;
5745 } else if (csi->csi_freq < 1) {
5746 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5747 "(%d); resetting to 1", csi->csi_name,
5748 csi->csi_freq);
5749 csi->csi_freq = 1;
5752 omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5753 omni_hdlr.cyo_offline = NULL;
5754 omni_hdlr.cyo_arg = (void *)csi;
5756 offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5757 offline_hdlr.cyh_arg = (void *)csi;
5758 offline_hdlr.cyh_level = CY_LOW_LEVEL;
5760 when.cyt_when = 0; /* Start immediately */
5761 when.cyt_interval = NANOSEC / csi->csi_freq;
5763 csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5764 csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5766 register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5767 mutex_exit(&cpu_lock);
5771 * Indicate that the specified cpu is idle.
5773 void
5774 cpu_idle_ecache_scrub(struct cpu *cp)
5776 if (CPU_PRIVATE(cp) != NULL) {
5777 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5778 csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5783 * Indicate that the specified cpu is busy.
5785 void
5786 cpu_busy_ecache_scrub(struct cpu *cp)
5788 if (CPU_PRIVATE(cp) != NULL) {
5789 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5790 csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5795 * Initialization for cache scrubbing for the specified cpu.
5797 void
5798 cpu_init_ecache_scrub_dr(struct cpu *cp)
5800 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5801 int cpuid = cp->cpu_id;
5803 /* initialize the number of lines in the caches */
5804 csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5805 cpunodes[cpuid].ecache_linesize;
5806 csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5807 CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5810 * do_scrub() and do_scrub_offline() check both the global
5811 * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers
5812 * check this value before scrubbing. Currently, we use it to
5813 * disable the E$ scrubber on multi-core cpus or while running at
5814 * slowed speed. For now, just turn everything on and allow
5815 * cpu_init_private() to change it if necessary.
5817 csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5818 csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5819 csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5821 cpu_busy_ecache_scrub(cp);
5825 * Un-initialization for cache scrubbing for the specified cpu.
5827 static void
5828 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5830 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5833 * un-initialize bookkeeping for cache scrubbing
5835 bzero(csmp, sizeof (ch_scrub_misc_t));
5837 cpu_idle_ecache_scrub(cp);
5841 * Called periodically on each CPU to scrub the D$.
5843 static void
5844 scrub_dcache(int how_many)
5846 int i;
5847 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5848 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5851 * scrub the desired number of lines
5853 for (i = 0; i < how_many; i++) {
5855 * scrub a D$ line
5857 dcache_inval_line(index);
5860 * calculate the next D$ line to scrub, assumes
5861 * that dcache_nlines is a power of 2
5863 index = (index + 1) & (dcache_nlines - 1);
5867 * set the scrub index for the next visit
5869 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5873 * Handler for D$ scrub inum softint. Call scrub_dcache until
5874 * we decrement the outstanding request count to zero.
5876 /*ARGSUSED*/
5877 static uint_t
5878 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5880 int i;
5881 int how_many;
5882 int outstanding;
5883 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5884 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5885 struct scrub_info *csi = (struct scrub_info *)arg1;
5886 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5887 dcache_scan_rate_idle : dcache_scan_rate_busy;
5890 * The scan rates are expressed in units of tenths of a
5891 * percent. A scan rate of 1000 (100%) means the whole
5892 * cache is scanned every second.
5894 how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5896 do {
5897 outstanding = *countp;
5898 for (i = 0; i < outstanding; i++) {
5899 scrub_dcache(how_many);
5901 } while (atomic_add_32_nv(countp, -outstanding));
5903 return (DDI_INTR_CLAIMED);
5907 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5908 * by invalidating lines. Due to the characteristics of the ASI which
5909 * is used to invalidate an I$ line, the entire I$ must be invalidated
5910 * vs. an individual I$ line.
5912 static void
5913 scrub_icache(int how_many)
5915 int i;
5916 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5917 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5918 int icache_nlines = csmp->chsm_icache_nlines;
5921 * scrub the desired number of lines
5923 for (i = 0; i < how_many; i++) {
5925 * since the entire I$ must be scrubbed at once,
5926 * wait until the index wraps to zero to invalidate
5927 * the entire I$
5929 if (index == 0) {
5930 icache_inval_all();
5934 * calculate the next I$ line to scrub, assumes
5935 * that chsm_icache_nlines is a power of 2
5937 index = (index + 1) & (icache_nlines - 1);
5941 * set the scrub index for the next visit
5943 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5947 * Handler for I$ scrub inum softint. Call scrub_icache until
5948 * we decrement the outstanding request count to zero.
5950 /*ARGSUSED*/
5951 static uint_t
5952 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5954 int i;
5955 int how_many;
5956 int outstanding;
5957 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5958 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5959 struct scrub_info *csi = (struct scrub_info *)arg1;
5960 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5961 icache_scan_rate_idle : icache_scan_rate_busy;
5962 int icache_nlines = csmp->chsm_icache_nlines;
5965 * The scan rates are expressed in units of tenths of a
5966 * percent. A scan rate of 1000 (100%) means the whole
5967 * cache is scanned every second.
5969 how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5971 do {
5972 outstanding = *countp;
5973 for (i = 0; i < outstanding; i++) {
5974 scrub_icache(how_many);
5976 } while (atomic_add_32_nv(countp, -outstanding));
5978 return (DDI_INTR_CLAIMED);
5982 * Called periodically on each CPU to scrub the E$.
5984 static void
5985 scrub_ecache(int how_many)
5987 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5988 int i;
5989 int cpuid = CPU->cpu_id;
5990 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5991 int nlines = csmp->chsm_ecache_nlines;
5992 int linesize = cpunodes[cpuid].ecache_linesize;
5993 int ec_set_size = cpu_ecache_set_size(CPU);
5996 * scrub the desired number of lines
5998 for (i = 0; i < how_many; i++) {
6000 * scrub the E$ line
6002 ecache_flush_line(ecache_flushaddr + (index * linesize),
6003 ec_set_size);
6006 * calculate the next E$ line to scrub based on twice
6007 * the number of E$ lines (to displace lines containing
6008 * flush area data), assumes that the number of lines
6009 * is a power of 2
6011 index = (index + 1) & ((nlines << 1) - 1);
6015 * set the ecache scrub index for the next visit
6017 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
6021 * Handler for E$ scrub inum softint. Call the E$ scrubber until
6022 * we decrement the outstanding request count to zero.
6024 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
6025 * become negative after the atomic_add_32_nv(). This is not a problem, as
6026 * the next trip around the loop won't scrub anything, and the next add will
6027 * reset the count back to zero.
6029 /*ARGSUSED*/
6030 static uint_t
6031 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
6033 int i;
6034 int how_many;
6035 int outstanding;
6036 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
6037 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
6038 struct scrub_info *csi = (struct scrub_info *)arg1;
6039 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
6040 ecache_scan_rate_idle : ecache_scan_rate_busy;
6041 int ecache_nlines = csmp->chsm_ecache_nlines;
6044 * The scan rates are expressed in units of tenths of a
6045 * percent. A scan rate of 1000 (100%) means the whole
6046 * cache is scanned every second.
6048 how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
6050 do {
6051 outstanding = *countp;
6052 for (i = 0; i < outstanding; i++) {
6053 scrub_ecache(how_many);
6055 } while (atomic_add_32_nv(countp, -outstanding));
6057 return (DDI_INTR_CLAIMED);
6061 * Timeout function to reenable CE
6063 static void
6064 cpu_delayed_check_ce_errors(void *arg)
6066 if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6067 TQ_NOSLEEP)) {
6068 (void) timeout(cpu_delayed_check_ce_errors, arg,
6069 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6074 * CE Deferred Re-enable after trap.
6076 * When the CPU gets a disrupting trap for any of the errors
6077 * controlled by the CEEN bit, CEEN is disabled in the trap handler
6078 * immediately. To eliminate the possibility of multiple CEs causing
6079 * recursive stack overflow in the trap handler, we cannot
6080 * reenable CEEN while still running in the trap handler. Instead,
6081 * after a CE is logged on a CPU, we schedule a timeout function,
6082 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6083 * seconds. This function will check whether any further CEs
6084 * have occurred on that CPU, and if none have, will reenable CEEN.
6086 * If further CEs have occurred while CEEN is disabled, another
6087 * timeout will be scheduled. This is to ensure that the CPU can
6088 * make progress in the face of CE 'storms', and that it does not
6089 * spend all its time logging CE errors.
6091 static void
6092 cpu_check_ce_errors(void *arg)
6094 int cpuid = (int)(uintptr_t)arg;
6095 cpu_t *cp;
6098 * We acquire cpu_lock.
6100 ASSERT(curthread->t_pil == 0);
6103 * verify that the cpu is still around, DR
6104 * could have got there first ...
6106 mutex_enter(&cpu_lock);
6107 cp = cpu_get(cpuid);
6108 if (cp == NULL) {
6109 mutex_exit(&cpu_lock);
6110 return;
6113 * make sure we don't migrate across CPUs
6114 * while checking our CE status.
6116 kpreempt_disable();
6119 * If we are running on the CPU that got the
6120 * CE, we can do the checks directly.
6122 if (cp->cpu_id == CPU->cpu_id) {
6123 mutex_exit(&cpu_lock);
6124 cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6125 kpreempt_enable();
6126 return;
6128 kpreempt_enable();
6131 * send an x-call to get the CPU that originally
6132 * got the CE to do the necessary checks. If we can't
6133 * send the x-call, reschedule the timeout, otherwise we
6134 * lose CEEN forever on that CPU.
6136 if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6137 xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6138 TIMEOUT_CEEN_CHECK, 0);
6139 mutex_exit(&cpu_lock);
6140 } else {
6142 * When the CPU is not accepting xcalls, or
6143 * the processor is offlined, we don't want to
6144 * incur the extra overhead of trying to schedule the
6145 * CE timeout indefinitely. However, we don't want to lose
6146 * CE checking forever.
6148 * Keep rescheduling the timeout, accepting the additional
6149 * overhead as the cost of correctness in the case where we get
6150 * a CE, disable CEEN, offline the CPU during the
6151 * the timeout interval, and then online it at some
6152 * point in the future. This is unlikely given the short
6153 * cpu_ceen_delay_secs.
6155 mutex_exit(&cpu_lock);
6156 (void) timeout(cpu_delayed_check_ce_errors,
6157 (void *)(uintptr_t)cp->cpu_id,
6158 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6163 * This routine will check whether CEs have occurred while
6164 * CEEN is disabled. Any CEs detected will be logged and, if
6165 * possible, scrubbed.
6167 * The memscrubber will also use this routine to clear any errors
6168 * caused by its scrubbing with CEEN disabled.
6170 * flag == SCRUBBER_CEEN_CHECK
6171 * called from memscrubber, just check/scrub, no reset
6172 * paddr physical addr. for start of scrub pages
6173 * vaddr virtual addr. for scrub area
6174 * psz page size of area to be scrubbed
6176 * flag == TIMEOUT_CEEN_CHECK
6177 * timeout function has triggered, reset timeout or CEEN
6179 * Note: We must not migrate cpus during this function. This can be
6180 * achieved by one of:
6181 * - invoking as target of an x-call in which case we're at XCALL_PIL
6182 * The flag value must be first xcall argument.
6183 * - disabling kernel preemption. This should be done for very short
6184 * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6185 * scrub an extended area with cpu_check_block. The call for
6186 * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6187 * brief for this case.
6188 * - binding to a cpu, eg with thread_affinity_set(). This is used
6189 * in the SCRUBBER_CEEN_CHECK case, but is not practical for
6190 * the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6192 void
6193 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6195 ch_cpu_errors_t cpu_error_regs;
6196 uint64_t ec_err_enable;
6197 uint64_t page_offset;
6199 /* Read AFSR */
6200 get_cpu_error_state(&cpu_error_regs);
6203 * If no CEEN errors have occurred during the timeout
6204 * interval, it is safe to re-enable CEEN and exit.
6206 if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6207 (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6208 if (flag == TIMEOUT_CEEN_CHECK &&
6209 !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6210 set_error_enable(ec_err_enable | EN_REG_CEEN);
6211 return;
6215 * Ensure that CEEN was not reenabled (maybe by DR) before
6216 * we log/clear the error.
6218 if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6219 set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6222 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6223 * timeout will be rescheduled when the error is logged.
6225 if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6226 (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6227 cpu_ce_detected(&cpu_error_regs,
6228 CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6229 else
6230 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6233 * If the memory scrubber runs while CEEN is
6234 * disabled, (or if CEEN is disabled during the
6235 * scrub as a result of a CE being triggered by
6236 * it), the range being scrubbed will not be
6237 * completely cleaned. If there are multiple CEs
6238 * in the range at most two of these will be dealt
6239 * with, (one by the trap handler and one by the
6240 * timeout). It is also possible that none are dealt
6241 * with, (CEEN disabled and another CE occurs before
6242 * the timeout triggers). So to ensure that the
6243 * memory is actually scrubbed, we have to access each
6244 * memory location in the range and then check whether
6245 * that access causes a CE.
6247 if (flag == SCRUBBER_CEEN_CHECK && va) {
6248 if ((cpu_error_regs.afar >= pa) &&
6249 (cpu_error_regs.afar < (pa + psz))) {
6251 * Force a load from physical memory for each
6252 * 64-byte block, then check AFSR to determine
6253 * whether this access caused an error.
6255 * This is a slow way to do a scrub, but as it will
6256 * only be invoked when the memory scrubber actually
6257 * triggered a CE, it should not happen too
6258 * frequently.
6260 * cut down what we need to check as the scrubber
6261 * has verified up to AFAR, so get it's offset
6262 * into the page and start there.
6264 page_offset = (uint64_t)(cpu_error_regs.afar &
6265 (psz - 1));
6266 va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6267 psz -= (uint_t)(P2ALIGN(page_offset, 64));
6268 cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6269 psz);
6274 * Reset error enable if this CE is not masked.
6276 if ((flag == TIMEOUT_CEEN_CHECK) &&
6277 (cpu_error_regs.afsr & cpu_ce_not_deferred))
6278 set_error_enable(ec_err_enable | EN_REG_CEEN);
6283 * Attempt a cpu logout for an error that we did not trap for, such
6284 * as a CE noticed with CEEN off. It is assumed that we are still running
6285 * on the cpu that took the error and that we cannot migrate. Returns
6286 * 0 on success, otherwise nonzero.
6288 static int
6289 cpu_ce_delayed_ec_logout(uint64_t afar)
6291 ch_cpu_logout_t *clop;
6293 if (CPU_PRIVATE(CPU) == NULL)
6294 return (0);
6296 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6297 if (atomic_cas_64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6298 LOGOUT_INVALID)
6299 return (0);
6301 cpu_delayed_logout(afar, clop);
6302 return (1);
6306 * We got an error while CEEN was disabled. We
6307 * need to clean up after it and log whatever
6308 * information we have on the CE.
6310 void
6311 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6313 ch_async_flt_t ch_flt;
6314 struct async_flt *aflt;
6315 char pr_reason[MAX_REASON_STRING];
6317 bzero(&ch_flt, sizeof (ch_async_flt_t));
6318 ch_flt.flt_trapped_ce = flag;
6319 aflt = (struct async_flt *)&ch_flt;
6320 aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6321 ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6322 ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6323 (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6324 aflt->flt_addr = cpu_error_regs->afar;
6325 #if defined(SERRANO)
6326 ch_flt.afar2 = cpu_error_regs->afar2;
6327 #endif /* SERRANO */
6328 aflt->flt_pc = NULL;
6329 aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6330 aflt->flt_tl = 0;
6331 aflt->flt_panic = 0;
6332 cpu_log_and_clear_ce(&ch_flt);
6335 * check if we caused any errors during cleanup
6337 if (clear_errors(&ch_flt)) {
6338 pr_reason[0] = '\0';
6339 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6340 NULL);
6345 * Log/clear CEEN-controlled disrupting errors
6347 static void
6348 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6350 struct async_flt *aflt;
6351 uint64_t afsr, afsr_errs;
6352 ch_cpu_logout_t *clop;
6353 char pr_reason[MAX_REASON_STRING];
6354 on_trap_data_t *otp = curthread->t_ontrap;
6356 aflt = (struct async_flt *)ch_flt;
6357 afsr = aflt->flt_stat;
6358 afsr_errs = ch_flt->afsr_errs;
6359 aflt->flt_id = gethrtime_waitfree();
6360 aflt->flt_bus_id = getprocessorid();
6361 aflt->flt_inst = CPU->cpu_id;
6362 aflt->flt_prot = AFLT_PROT_NONE;
6363 aflt->flt_class = CPU_FAULT;
6364 aflt->flt_status = ECC_C_TRAP;
6366 pr_reason[0] = '\0';
6368 * Get the CPU log out info for Disrupting Trap.
6370 if (CPU_PRIVATE(CPU) == NULL) {
6371 clop = NULL;
6372 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6373 } else {
6374 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6377 if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6378 ch_cpu_errors_t cpu_error_regs;
6380 get_cpu_error_state(&cpu_error_regs);
6381 (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6382 clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6383 clop->clo_data.chd_afar = cpu_error_regs.afar;
6384 clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6385 clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6386 clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6387 clop->clo_sdw_data.chd_afsr_ext =
6388 cpu_error_regs.shadow_afsr_ext;
6389 #if defined(SERRANO)
6390 clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6391 #endif /* SERRANO */
6392 ch_flt->flt_data_incomplete = 1;
6395 * The logging/clear code expects AFSR/AFAR to be cleared.
6396 * The trap handler does it for CEEN enabled errors
6397 * so we need to do it here.
6399 set_cpu_error_state(&cpu_error_regs);
6402 #if defined(JALAPENO) || defined(SERRANO)
6404 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6405 * For Serrano, even thou we do have the AFAR, we still do the
6406 * scrub on the RCE side since that's where the error type can
6407 * be properly classified as intermittent, persistent, etc.
6409 * CE/RCE: If error is in memory and AFAR is valid, scrub the memory.
6410 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6411 * the flt_status bits.
6413 if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6414 (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6415 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6416 cpu_ce_scrub_mem_err(aflt, B_TRUE);
6418 #else /* JALAPENO || SERRANO */
6420 * CE/EMC: If error is in memory and AFAR is valid, scrub the memory.
6421 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6422 * the flt_status bits.
6424 if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6425 if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6426 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6427 cpu_ce_scrub_mem_err(aflt, B_TRUE);
6431 #endif /* JALAPENO || SERRANO */
6434 * Update flt_prot if this error occurred under on_trap protection.
6436 if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6437 aflt->flt_prot = AFLT_PROT_EC;
6440 * Queue events on the async event queue, one event per error bit.
6442 if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6443 (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6444 ch_flt->flt_type = CPU_INV_AFSR;
6445 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6446 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6447 aflt->flt_panic);
6451 * Zero out + invalidate CPU logout.
6453 if (clop) {
6454 bzero(clop, sizeof (ch_cpu_logout_t));
6455 clop->clo_data.chd_afar = LOGOUT_INVALID;
6459 * If either a CPC, WDC or EDC error has occurred while CEEN
6460 * was disabled, we need to flush either the entire
6461 * E$ or an E$ line.
6463 #if defined(JALAPENO) || defined(SERRANO)
6464 if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6465 #else /* JALAPENO || SERRANO */
6466 if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6467 C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6468 #endif /* JALAPENO || SERRANO */
6469 cpu_error_ecache_flush(ch_flt);
6474 * depending on the error type, we determine whether we
6475 * need to flush the entire ecache or just a line.
6477 static int
6478 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6480 struct async_flt *aflt;
6481 uint64_t afsr;
6482 uint64_t afsr_errs = ch_flt->afsr_errs;
6484 aflt = (struct async_flt *)ch_flt;
6485 afsr = aflt->flt_stat;
6488 * If we got multiple errors, no point in trying
6489 * the individual cases, just flush the whole cache
6491 if (afsr & C_AFSR_ME) {
6492 return (ECACHE_FLUSH_ALL);
6496 * If either a CPC, WDC or EDC error has occurred while CEEN
6497 * was disabled, we need to flush entire E$. We can't just
6498 * flush the cache line affected as the ME bit
6499 * is not set when multiple correctable errors of the same
6500 * type occur, so we might have multiple CPC or EDC errors,
6501 * with only the first recorded.
6503 #if defined(JALAPENO) || defined(SERRANO)
6504 if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6505 #else /* JALAPENO || SERRANO */
6506 if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6507 C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6508 #endif /* JALAPENO || SERRANO */
6509 return (ECACHE_FLUSH_ALL);
6512 #if defined(JALAPENO) || defined(SERRANO)
6514 * If only UE or RUE is set, flush the Ecache line, otherwise
6515 * flush the entire Ecache.
6517 if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6518 if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6519 (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6520 return (ECACHE_FLUSH_LINE);
6521 } else {
6522 return (ECACHE_FLUSH_ALL);
6525 #else /* JALAPENO || SERRANO */
6527 * If UE only is set, flush the Ecache line, otherwise
6528 * flush the entire Ecache.
6530 if (afsr_errs & C_AFSR_UE) {
6531 if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6532 C_AFSR_UE) {
6533 return (ECACHE_FLUSH_LINE);
6534 } else {
6535 return (ECACHE_FLUSH_ALL);
6538 #endif /* JALAPENO || SERRANO */
6541 * EDU: If EDU only is set, flush the ecache line, otherwise
6542 * flush the entire Ecache.
6544 if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6545 if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6546 ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6547 return (ECACHE_FLUSH_LINE);
6548 } else {
6549 return (ECACHE_FLUSH_ALL);
6554 * BERR: If BERR only is set, flush the Ecache line, otherwise
6555 * flush the entire Ecache.
6557 if (afsr_errs & C_AFSR_BERR) {
6558 if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6559 return (ECACHE_FLUSH_LINE);
6560 } else {
6561 return (ECACHE_FLUSH_ALL);
6565 return (0);
6568 void
6569 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6571 int ecache_flush_flag =
6572 cpu_error_ecache_flush_required(ch_flt);
6575 * Flush Ecache line or entire Ecache based on above checks.
6577 if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6578 cpu_flush_ecache();
6579 else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6580 cpu_flush_ecache_line(ch_flt);
6586 * Extract the PA portion from the E$ tag.
6588 uint64_t
6589 cpu_ectag_to_pa(int setsize, uint64_t tag)
6591 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6592 return (JG_ECTAG_TO_PA(setsize, tag));
6593 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6594 return (PN_L3TAG_TO_PA(tag));
6595 else
6596 return (CH_ECTAG_TO_PA(setsize, tag));
6600 * Convert the E$ tag PA into an E$ subblock index.
6603 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6605 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6606 return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6607 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6608 /* Panther has only one subblock per line */
6609 return (0);
6610 else
6611 return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6615 * All subblocks in an E$ line must be invalid for
6616 * the line to be invalid.
6619 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6621 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6622 return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6623 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6624 return (PN_L3_LINE_INVALID(tag));
6625 else
6626 return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6630 * Extract state bits for a subblock given the tag. Note that for Panther
6631 * this works on both l2 and l3 tags.
6634 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6636 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6637 return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6638 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6639 return (tag & CH_ECSTATE_MASK);
6640 else
6641 return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6645 * Cpu specific initialization.
6647 void
6648 cpu_mp_init(void)
6650 #ifdef CHEETAHPLUS_ERRATUM_25
6651 if (cheetah_sendmondo_recover) {
6652 cheetah_nudge_init();
6654 #endif
6657 void
6658 cpu_ereport_post(struct async_flt *aflt)
6660 char *cpu_type, buf[FM_MAX_CLASS];
6661 nv_alloc_t *nva = NULL;
6662 nvlist_t *ereport, *detector, *resource;
6663 errorq_elem_t *eqep;
6664 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6665 char unum[UNUM_NAMLEN];
6666 int synd_code;
6667 uint8_t msg_type;
6668 plat_ecc_ch_async_flt_t plat_ecc_ch_flt;
6670 if (aflt->flt_panic || panicstr) {
6671 eqep = errorq_reserve(ereport_errorq);
6672 if (eqep == NULL)
6673 return;
6674 ereport = errorq_elem_nvl(ereport_errorq, eqep);
6675 nva = errorq_elem_nva(ereport_errorq, eqep);
6676 } else {
6677 ereport = fm_nvlist_create(nva);
6681 * Create the scheme "cpu" FMRI.
6683 detector = fm_nvlist_create(nva);
6684 resource = fm_nvlist_create(nva);
6685 switch (cpunodes[aflt->flt_inst].implementation) {
6686 case CHEETAH_IMPL:
6687 cpu_type = FM_EREPORT_CPU_USIII;
6688 break;
6689 case CHEETAH_PLUS_IMPL:
6690 cpu_type = FM_EREPORT_CPU_USIIIplus;
6691 break;
6692 case JALAPENO_IMPL:
6693 cpu_type = FM_EREPORT_CPU_USIIIi;
6694 break;
6695 case SERRANO_IMPL:
6696 cpu_type = FM_EREPORT_CPU_USIIIiplus;
6697 break;
6698 case JAGUAR_IMPL:
6699 cpu_type = FM_EREPORT_CPU_USIV;
6700 break;
6701 case PANTHER_IMPL:
6702 cpu_type = FM_EREPORT_CPU_USIVplus;
6703 break;
6704 default:
6705 cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6706 break;
6709 cpu_fmri_cpu_set(detector, aflt->flt_inst);
6712 * Encode all the common data into the ereport.
6714 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6715 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6717 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6718 fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6719 detector, NULL);
6722 * Encode the error specific data that was saved in
6723 * the async_flt structure into the ereport.
6725 cpu_payload_add_aflt(aflt, ereport, resource,
6726 &plat_ecc_ch_flt.ecaf_afar_status,
6727 &plat_ecc_ch_flt.ecaf_synd_status);
6729 if (aflt->flt_panic || panicstr) {
6730 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6731 } else {
6732 (void) fm_ereport_post(ereport, EVCH_TRYHARD);
6733 fm_nvlist_destroy(ereport, FM_NVA_FREE);
6734 fm_nvlist_destroy(detector, FM_NVA_FREE);
6735 fm_nvlist_destroy(resource, FM_NVA_FREE);
6738 * Send the enhanced error information (plat_ecc_error2_data_t)
6739 * to the SC olny if it can process it.
6742 if (&plat_ecc_capability_sc_get &&
6743 plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6744 msg_type = cpu_flt_bit_to_plat_error(aflt);
6745 if (msg_type != PLAT_ECC_ERROR2_NONE) {
6747 * If afar status is not invalid do a unum lookup.
6749 if (plat_ecc_ch_flt.ecaf_afar_status !=
6750 AFLT_STAT_INVALID) {
6751 synd_code = synd_to_synd_code(
6752 plat_ecc_ch_flt.ecaf_synd_status,
6753 aflt->flt_synd, ch_flt->flt_bit);
6754 (void) cpu_get_mem_unum_synd(synd_code,
6755 aflt, unum);
6756 } else {
6757 unum[0] = '\0';
6759 plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6760 plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6761 plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6762 plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6763 ch_flt->flt_sdw_afsr_ext;
6765 if (&plat_log_fruid_error2)
6766 plat_log_fruid_error2(msg_type, unum, aflt,
6767 &plat_ecc_ch_flt);
6772 void
6773 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6775 int status;
6776 ddi_fm_error_t de;
6778 bzero(&de, sizeof (ddi_fm_error_t));
6780 de.fme_version = DDI_FME_VERSION;
6781 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6782 FM_ENA_FMT1);
6783 de.fme_flag = expected;
6784 de.fme_bus_specific = (void *)aflt->flt_addr;
6785 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6786 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6787 aflt->flt_panic = 1;
6790 void
6791 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6792 errorq_t *eqp, uint_t flag)
6794 struct async_flt *aflt = (struct async_flt *)payload;
6796 aflt->flt_erpt_class = error_class;
6797 errorq_dispatch(eqp, payload, payload_sz, flag);
6801 * This routine may be called by the IO module, but does not do
6802 * anything in this cpu module. The SERD algorithm is handled by
6803 * cpumem-diagnosis engine instead.
6805 /*ARGSUSED*/
6806 void
6807 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6810 void
6811 adjust_hw_copy_limits(int ecache_size)
6814 * Set hw copy limits.
6816 * /etc/system will be parsed later and can override one or more
6817 * of these settings.
6819 * At this time, ecache size seems only mildly relevant.
6820 * We seem to run into issues with the d-cache and stalls
6821 * we see on misses.
6823 * Cycle measurement indicates that 2 byte aligned copies fare
6824 * little better than doing things with VIS at around 512 bytes.
6825 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6826 * aligned is faster whenever the source and destination data
6827 * in cache and the total size is less than 2 Kbytes. The 2K
6828 * limit seems to be driven by the 2K write cache.
6829 * When more than 2K of copies are done in non-VIS mode, stores
6830 * backup in the write cache. In VIS mode, the write cache is
6831 * bypassed, allowing faster cache-line writes aligned on cache
6832 * boundaries.
6834 * In addition, in non-VIS mode, there is no prefetching, so
6835 * for larger copies, the advantage of prefetching to avoid even
6836 * occasional cache misses is enough to justify using the VIS code.
6838 * During testing, it was discovered that netbench ran 3% slower
6839 * when hw_copy_limit_8 was 2K or larger. Apparently for server
6840 * applications, data is only used once (copied to the output
6841 * buffer, then copied by the network device off the system). Using
6842 * the VIS copy saves more L2 cache state. Network copies are
6843 * around 1.3K to 1.5K in size for historical reasons.
6845 * Therefore, a limit of 1K bytes will be used for the 8 byte
6846 * aligned copy even for large caches and 8 MB ecache. The
6847 * infrastructure to allow different limits for different sized
6848 * caches is kept to allow further tuning in later releases.
6851 if (min_ecache_size == 0 && use_hw_bcopy) {
6853 * First time through - should be before /etc/system
6854 * is read.
6855 * Could skip the checks for zero but this lets us
6856 * preserve any debugger rewrites.
6858 if (hw_copy_limit_1 == 0) {
6859 hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6860 priv_hcl_1 = hw_copy_limit_1;
6862 if (hw_copy_limit_2 == 0) {
6863 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6864 priv_hcl_2 = hw_copy_limit_2;
6866 if (hw_copy_limit_4 == 0) {
6867 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6868 priv_hcl_4 = hw_copy_limit_4;
6870 if (hw_copy_limit_8 == 0) {
6871 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6872 priv_hcl_8 = hw_copy_limit_8;
6874 min_ecache_size = ecache_size;
6875 } else {
6877 * MP initialization. Called *after* /etc/system has
6878 * been parsed. One CPU has already been initialized.
6879 * Need to cater for /etc/system having scragged one
6880 * of our values.
6882 if (ecache_size == min_ecache_size) {
6884 * Same size ecache. We do nothing unless we
6885 * have a pessimistic ecache setting. In that
6886 * case we become more optimistic (if the cache is
6887 * large enough).
6889 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6891 * Need to adjust hw_copy_limit* from our
6892 * pessimistic uniprocessor value to a more
6893 * optimistic UP value *iff* it hasn't been
6894 * reset.
6896 if ((ecache_size > 1048576) &&
6897 (priv_hcl_8 == hw_copy_limit_8)) {
6898 if (ecache_size <= 2097152)
6899 hw_copy_limit_8 = 4 *
6900 VIS_COPY_THRESHOLD;
6901 else if (ecache_size <= 4194304)
6902 hw_copy_limit_8 = 4 *
6903 VIS_COPY_THRESHOLD;
6904 else
6905 hw_copy_limit_8 = 4 *
6906 VIS_COPY_THRESHOLD;
6907 priv_hcl_8 = hw_copy_limit_8;
6910 } else if (ecache_size < min_ecache_size) {
6912 * A different ecache size. Can this even happen?
6914 if (priv_hcl_8 == hw_copy_limit_8) {
6916 * The previous value that we set
6917 * is unchanged (i.e., it hasn't been
6918 * scragged by /etc/system). Rewrite it.
6920 if (ecache_size <= 1048576)
6921 hw_copy_limit_8 = 8 *
6922 VIS_COPY_THRESHOLD;
6923 else if (ecache_size <= 2097152)
6924 hw_copy_limit_8 = 8 *
6925 VIS_COPY_THRESHOLD;
6926 else if (ecache_size <= 4194304)
6927 hw_copy_limit_8 = 8 *
6928 VIS_COPY_THRESHOLD;
6929 else
6930 hw_copy_limit_8 = 10 *
6931 VIS_COPY_THRESHOLD;
6932 priv_hcl_8 = hw_copy_limit_8;
6933 min_ecache_size = ecache_size;
6940 * Called from illegal instruction trap handler to see if we can attribute
6941 * the trap to a fpras check.
6944 fpras_chktrap(struct regs *rp)
6946 int op;
6947 struct fpras_chkfngrp *cgp;
6948 uintptr_t tpc = (uintptr_t)rp->r_pc;
6950 if (fpras_chkfngrps == NULL)
6951 return (0);
6953 cgp = &fpras_chkfngrps[CPU->cpu_id];
6954 for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6955 if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6956 tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6957 break;
6959 if (op == FPRAS_NCOPYOPS)
6960 return (0);
6963 * This is an fpRAS failure caught through an illegal
6964 * instruction - trampoline.
6966 rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6967 rp->r_npc = rp->r_pc + 4;
6968 return (1);
6972 * fpras_failure is called when a fpras check detects a bad calculation
6973 * result or an illegal instruction trap is attributed to an fpras
6974 * check. In all cases we are still bound to CPU.
6977 fpras_failure(int op, int how)
6979 int use_hw_bcopy_orig, use_hw_bzero_orig;
6980 uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6981 ch_async_flt_t ch_flt;
6982 struct async_flt *aflt = (struct async_flt *)&ch_flt;
6983 struct fpras_chkfn *sfp, *cfp;
6984 uint32_t *sip, *cip;
6985 int i;
6988 * We're running on a sick CPU. Avoid further FPU use at least for
6989 * the time in which we dispatch an ereport and (if applicable) panic.
6991 use_hw_bcopy_orig = use_hw_bcopy;
6992 use_hw_bzero_orig = use_hw_bzero;
6993 hcl1_orig = hw_copy_limit_1;
6994 hcl2_orig = hw_copy_limit_2;
6995 hcl4_orig = hw_copy_limit_4;
6996 hcl8_orig = hw_copy_limit_8;
6997 use_hw_bcopy = use_hw_bzero = 0;
6998 hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6999 hw_copy_limit_8 = 0;
7001 bzero(&ch_flt, sizeof (ch_async_flt_t));
7002 aflt->flt_id = gethrtime_waitfree();
7003 aflt->flt_class = CPU_FAULT;
7004 aflt->flt_inst = CPU->cpu_id;
7005 aflt->flt_status = (how << 8) | op;
7006 aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
7007 ch_flt.flt_type = CPU_FPUERR;
7010 * We must panic if the copy operation had no lofault protection -
7011 * ie, don't panic for copyin, copyout, kcopy and bcopy called
7012 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
7014 aflt->flt_panic = (curthread->t_lofault == NULL);
7017 * XOR the source instruction block with the copied instruction
7018 * block - this will show us which bit(s) are corrupted.
7020 sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
7021 cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
7022 if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
7023 sip = &sfp->fpras_blk0[0];
7024 cip = &cfp->fpras_blk0[0];
7025 } else {
7026 sip = &sfp->fpras_blk1[0];
7027 cip = &cfp->fpras_blk1[0];
7029 for (i = 0; i < 16; ++i, ++sip, ++cip)
7030 ch_flt.flt_fpdata[i] = *sip ^ *cip;
7032 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
7033 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
7035 if (aflt->flt_panic)
7036 fm_panic("FPU failure on CPU %d", CPU->cpu_id);
7039 * We get here for copyin/copyout and kcopy or bcopy where the
7040 * caller has used on_fault. We will flag the error so that
7041 * the process may be killed The trap_async_hwerr mechanism will
7042 * take appropriate further action (such as a reboot, contract
7043 * notification etc). Since we may be continuing we will
7044 * restore the global hardware copy acceleration switches.
7046 * When we return from this function to the copy function we want to
7047 * avoid potentially bad data being used, ie we want the affected
7048 * copy function to return an error. The caller should therefore
7049 * invoke its lofault handler (which always exists for these functions)
7050 * which will return the appropriate error.
7052 ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
7053 aston(curthread);
7055 use_hw_bcopy = use_hw_bcopy_orig;
7056 use_hw_bzero = use_hw_bzero_orig;
7057 hw_copy_limit_1 = hcl1_orig;
7058 hw_copy_limit_2 = hcl2_orig;
7059 hw_copy_limit_4 = hcl4_orig;
7060 hw_copy_limit_8 = hcl8_orig;
7062 return (1);
7065 #define VIS_BLOCKSIZE 64
7068 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7070 int ret, watched;
7072 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7073 ret = dtrace_blksuword32(addr, data, 0);
7074 if (watched)
7075 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7077 return (ret);
7081 * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7082 * faulted cpu into that state). Cross-trap to the faulted cpu to clear
7083 * CEEN from the EER to disable traps for further disrupting error types
7084 * on that cpu. We could cross-call instead, but that has a larger
7085 * instruction and data footprint than cross-trapping, and the cpu is known
7086 * to be faulted.
7089 void
7090 cpu_faulted_enter(struct cpu *cp)
7092 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7096 * Called when a cpu leaves the CPU_FAULTED state to return to one of
7097 * offline, spare, or online (by the cpu requesting this state change).
7098 * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7099 * disrupting error bits that have accumulated without trapping, then
7100 * we cross-trap to re-enable CEEN controlled traps.
7102 void
7103 cpu_faulted_exit(struct cpu *cp)
7105 ch_cpu_errors_t cpu_error_regs;
7107 cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7108 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7109 cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7110 xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7111 (uint64_t)&cpu_error_regs, 0);
7113 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7117 * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7118 * the errors in the original AFSR, 0 otherwise.
7120 * For all procs if the initial error was a BERR or TO, then it is possible
7121 * that we may have caused a secondary BERR or TO in the process of logging the
7122 * inital error via cpu_run_bus_error_handlers(). If this is the case then
7123 * if the request was protected then a panic is still not necessary, if not
7124 * protected then aft_panic is already set - so either way there's no need
7125 * to set aft_panic for the secondary error.
7127 * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7128 * a store merge, then the error handling code will call cpu_deferred_error().
7129 * When clear_errors() is called, it will determine that secondary errors have
7130 * occurred - in particular, the store merge also caused a EDU and WDU that
7131 * weren't discovered until this point.
7133 * We do three checks to verify that we are in this case. If we pass all three
7134 * checks, we return 1 to indicate that we should not panic. If any unexpected
7135 * errors occur, we return 0.
7137 * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7138 * handled in cpu_disrupting_errors(). Since this function is not even called
7139 * in the case we are interested in, we just return 0 for these processors.
7141 /*ARGSUSED*/
7142 static int
7143 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7144 uint64_t t_afar)
7146 #if defined(CHEETAH_PLUS)
7147 #else /* CHEETAH_PLUS */
7148 struct async_flt *aflt = (struct async_flt *)ch_flt;
7149 #endif /* CHEETAH_PLUS */
7152 * Was the original error a BERR or TO and only a BERR or TO
7153 * (multiple errors are also OK)
7155 if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7157 * Is the new error a BERR or TO and only a BERR or TO
7158 * (multiple errors are also OK)
7160 if ((ch_flt->afsr_errs &
7161 ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7162 return (1);
7165 #if defined(CHEETAH_PLUS)
7166 return (0);
7167 #else /* CHEETAH_PLUS */
7169 * Now look for secondary effects of a UE on cheetah/jalapeno
7171 * Check the original error was a UE, and only a UE. Note that
7172 * the ME bit will cause us to fail this check.
7174 if (t_afsr_errs != C_AFSR_UE)
7175 return (0);
7178 * Check the secondary errors were exclusively an EDU and/or WDU.
7180 if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7181 return (0);
7184 * Check the AFAR of the original error and secondary errors
7185 * match to the 64-byte boundary
7187 if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7188 return (0);
7191 * We've passed all the checks, so it's a secondary error!
7193 return (1);
7194 #endif /* CHEETAH_PLUS */
7198 * Translate the flt_bit or flt_type into an error type. First, flt_bit
7199 * is checked for any valid errors. If found, the error type is
7200 * returned. If not found, the flt_type is checked for L1$ parity errors.
7202 /*ARGSUSED*/
7203 static uint8_t
7204 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7206 #if defined(JALAPENO)
7208 * Currently, logging errors to the SC is not supported on Jalapeno
7210 return (PLAT_ECC_ERROR2_NONE);
7211 #else
7212 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7214 switch (ch_flt->flt_bit) {
7215 case C_AFSR_CE:
7216 return (PLAT_ECC_ERROR2_CE);
7217 case C_AFSR_UCC:
7218 case C_AFSR_EDC:
7219 case C_AFSR_WDC:
7220 case C_AFSR_CPC:
7221 return (PLAT_ECC_ERROR2_L2_CE);
7222 case C_AFSR_EMC:
7223 return (PLAT_ECC_ERROR2_EMC);
7224 case C_AFSR_IVC:
7225 return (PLAT_ECC_ERROR2_IVC);
7226 case C_AFSR_UE:
7227 return (PLAT_ECC_ERROR2_UE);
7228 case C_AFSR_UCU:
7229 case C_AFSR_EDU:
7230 case C_AFSR_WDU:
7231 case C_AFSR_CPU:
7232 return (PLAT_ECC_ERROR2_L2_UE);
7233 case C_AFSR_IVU:
7234 return (PLAT_ECC_ERROR2_IVU);
7235 case C_AFSR_TO:
7236 return (PLAT_ECC_ERROR2_TO);
7237 case C_AFSR_BERR:
7238 return (PLAT_ECC_ERROR2_BERR);
7239 #if defined(CHEETAH_PLUS)
7240 case C_AFSR_L3_EDC:
7241 case C_AFSR_L3_UCC:
7242 case C_AFSR_L3_CPC:
7243 case C_AFSR_L3_WDC:
7244 return (PLAT_ECC_ERROR2_L3_CE);
7245 case C_AFSR_IMC:
7246 return (PLAT_ECC_ERROR2_IMC);
7247 case C_AFSR_TSCE:
7248 return (PLAT_ECC_ERROR2_L2_TSCE);
7249 case C_AFSR_THCE:
7250 return (PLAT_ECC_ERROR2_L2_THCE);
7251 case C_AFSR_L3_MECC:
7252 return (PLAT_ECC_ERROR2_L3_MECC);
7253 case C_AFSR_L3_THCE:
7254 return (PLAT_ECC_ERROR2_L3_THCE);
7255 case C_AFSR_L3_CPU:
7256 case C_AFSR_L3_EDU:
7257 case C_AFSR_L3_UCU:
7258 case C_AFSR_L3_WDU:
7259 return (PLAT_ECC_ERROR2_L3_UE);
7260 case C_AFSR_DUE:
7261 return (PLAT_ECC_ERROR2_DUE);
7262 case C_AFSR_DTO:
7263 return (PLAT_ECC_ERROR2_DTO);
7264 case C_AFSR_DBERR:
7265 return (PLAT_ECC_ERROR2_DBERR);
7266 #endif /* CHEETAH_PLUS */
7267 default:
7268 switch (ch_flt->flt_type) {
7269 #if defined(CPU_IMP_L1_CACHE_PARITY)
7270 case CPU_IC_PARITY:
7271 return (PLAT_ECC_ERROR2_IPE);
7272 case CPU_DC_PARITY:
7273 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7274 if (ch_flt->parity_data.dpe.cpl_cache ==
7275 CPU_PC_PARITY) {
7276 return (PLAT_ECC_ERROR2_PCACHE);
7279 return (PLAT_ECC_ERROR2_DPE);
7280 #endif /* CPU_IMP_L1_CACHE_PARITY */
7281 case CPU_ITLB_PARITY:
7282 return (PLAT_ECC_ERROR2_ITLB);
7283 case CPU_DTLB_PARITY:
7284 return (PLAT_ECC_ERROR2_DTLB);
7285 default:
7286 return (PLAT_ECC_ERROR2_NONE);
7289 #endif /* JALAPENO */