4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
28 * Support routines for managing per-Lxcache state.
31 #include <cmd_Lxcache.h>
40 #include <fm/fmd_api.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/cheetahregs.h>
43 #include <sys/mem_cache.h>
45 #define PN_ECSTATE_NA 5
47 * These values are our threshold values for SERDing CPU's based on the
48 * the # of times we have retired a cache line for each category.
51 #define CMD_CPU_SERD_AGG_1 64
52 #define CMD_CPU_SERD_AGG_2 64
54 static int8_t cmd_lowest_way
[16] = {
55 /* 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
56 -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
57 static int cmd_num_of_bits
[16] = {
58 /* 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
59 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
63 cmd_Lxcache_write(fmd_hdl_t
*hdl
, cmd_Lxcache_t
*Lxcache
)
65 fmd_buf_write(hdl
, NULL
, Lxcache
->Lxcache_bufname
, Lxcache
,
66 sizeof (cmd_Lxcache_pers_t
));
70 cmd_type_to_str(cmd_ptrsubtype_t pstype
)
73 case CMD_PTR_CPU_L2DATA
:
76 case CMD_PTR_CPU_L3DATA
:
79 case CMD_PTR_CPU_L2TAG
:
82 case CMD_PTR_CPU_L3TAG
:
92 cmd_flags_to_str(int flags
)
95 case CMD_LxCACHE_F_ACTIVE
:
97 case CMD_LxCACHE_F_FAULTING
:
99 case CMD_LxCACHE_F_RETIRED
:
101 case CMD_LxCACHE_F_UNRETIRED
:
102 return ("UNRETIRED");
103 case CMD_LxCACHE_F_RERETIRED
:
104 return ("RERETIRED");
106 return ("Unknown_flags");
111 cmd_reason_to_str(int reason
)
114 case CMD_LXSUSPECT_DATA
:
115 return ("SUSPECT_DATA");
116 case CMD_LXSUSPECT_0_TAG
:
117 return ("SUSPECT_0_TAG");
118 case CMD_LXSUSPECT_1_TAG
:
119 return ("SUSPECT_1_TAG");
120 case CMD_LXCONVICTED
:
121 return ("CONVICTED");
122 case CMD_LXFUNCTIONING
:
123 return ("FUNCTIONING");
125 return ("Unknown_reason");
130 cmd_pretty_print_Lxcache(fmd_hdl_t
*hdl
, cmd_Lxcache_t
*Lxcache
)
141 Lxcache
->Lxcache_cpu_bufname
,
142 cmd_type_to_str(Lxcache
->Lxcache_type
),
143 Lxcache
->Lxcache_index
,
144 Lxcache
->Lxcache_way
,
145 Lxcache
->Lxcache_bit
,
146 cmd_reason_to_str(Lxcache
->Lxcache_reason
),
147 cmd_flags_to_str(Lxcache
->Lxcache_flags
));
151 cmd_Lxcache_free(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
, cmd_Lxcache_t
*Lxcache
,
154 cmd_case_t
*cc
= &Lxcache
->Lxcache_case
;
156 fmd_hdl_debug(hdl
, "Entering cmd_Lxcache_free for %s destroy = %d\n",
157 Lxcache
->Lxcache_bufname
, destroy
);
159 if (cc
->cc_cp
!= NULL
)
160 cmd_case_fini(hdl
, cc
->cc_cp
, destroy
);
161 if (cc
->cc_serdnm
!= NULL
) {
162 if (fmd_serd_exists(hdl
, cc
->cc_serdnm
) && destroy
) {
163 fmd_serd_destroy(hdl
, cc
->cc_serdnm
);
164 fmd_hdl_strfree(hdl
, cc
->cc_serdnm
);
165 cc
->cc_serdnm
= NULL
;
168 if (Lxcache
->Lxcache_nvl
) {
169 nvlist_free(Lxcache
->Lxcache_nvl
);
170 Lxcache
->Lxcache_nvl
= NULL
;
173 * Clean up the SERD engine created to handle recheck of TAGS.
174 * This SERD engine was created to save the event pointer.
176 if (Lxcache
->Lxcache_serdnm
!= NULL
) {
177 if (fmd_serd_exists(hdl
, Lxcache
->Lxcache_serdnm
) && destroy
) {
178 fmd_serd_destroy(hdl
, Lxcache
->Lxcache_serdnm
);
179 fmd_hdl_strfree(hdl
, Lxcache
->Lxcache_serdnm
);
180 Lxcache
->Lxcache_serdnm
= NULL
;
183 Lxcache
->Lxcache_timeout_id
= -1;
184 Lxcache
->Lxcache_ep
= NULL
;
185 Lxcache
->Lxcache_retry_count
= 0;
187 fmd_buf_destroy(hdl
, NULL
, Lxcache
->Lxcache_bufname
);
188 cmd_fmri_fini(hdl
, &Lxcache
->Lxcache_asru
, destroy
);
189 cmd_list_delete(&cpu
->cpu_Lxcaches
, Lxcache
);
190 fmd_hdl_free(hdl
, Lxcache
, sizeof (cmd_Lxcache_t
));
194 cmd_Lxcache_destroy(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
, cmd_Lxcache_t
*Lxcache
)
196 cmd_Lxcache_free(hdl
, cpu
, Lxcache
, FMD_B_TRUE
);
200 cmd_Lxcache_lookup_by_type_index_way_bit(cmd_cpu_t
*cpu
,
201 cmd_ptrsubtype_t pstype
, int32_t index
, int8_t way
, int16_t bit
)
203 cmd_Lxcache_t
*Lxcache
;
205 for (Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
); Lxcache
!= NULL
;
206 Lxcache
= cmd_list_next(Lxcache
)) {
207 if ((Lxcache
->Lxcache_type
== pstype
) &&
208 (Lxcache
->Lxcache_index
== (uint32_t)index
) &&
209 (Lxcache
->Lxcache_way
== (uint32_t)way
) &&
210 (Lxcache
->Lxcache_bit
== (uint16_t)bit
))
218 cmd_Lxcache_create(fmd_hdl_t
*hdl
, cmd_xr_t
*xr
, cmd_cpu_t
*cpu
,
219 nvlist_t
*modasru
, cmd_ptrsubtype_t pstype
, int32_t index
,
220 int8_t way
, int16_t bit
)
222 cmd_Lxcache_t
*Lxcache
;
224 const char *pstype_name
;
225 uint8_t fmri_Lxcache_type
;
227 pstype_name
= cmd_type_to_str(pstype
);
229 "\n%s:cpu_id %d:Creating new Lxcache for index=%d way=%d bit=%d\n",
230 pstype_name
, cpu
->cpu_cpuid
, index
, way
, bit
);
232 CMD_CPU_STAT_BUMP(cpu
, Lxcache_creat
);
234 Lxcache
= fmd_hdl_zalloc(hdl
, sizeof (cmd_Lxcache_t
), FMD_SLEEP
);
235 (void) strncpy(Lxcache
->Lxcache_cpu_bufname
,
236 cpu
->cpu_bufname
, CMD_BUFNMLEN
);
237 Lxcache
->Lxcache_nodetype
= CMD_NT_LxCACHE
;
238 Lxcache
->Lxcache_version
= CMD_LxCACHE_VERSION
;
239 Lxcache
->Lxcache_type
= pstype
;
240 Lxcache
->Lxcache_index
= (uint32_t)index
;
241 Lxcache
->Lxcache_way
= (uint32_t)way
;
242 Lxcache
->Lxcache_bit
= (uint16_t)bit
;
243 Lxcache
->Lxcache_reason
= CMD_LXFUNCTIONING
;
244 Lxcache
->Lxcache_flags
= CMD_LxCACHE_F_ACTIVE
;
245 Lxcache
->Lxcache_timeout_id
= -1;
246 Lxcache
->Lxcache_retry_count
= 0;
247 Lxcache
->Lxcache_nvl
= NULL
;
248 Lxcache
->Lxcache_ep
= NULL
;
249 Lxcache
->Lxcache_serdnm
= NULL
;
250 Lxcache
->Lxcache_clcode
= 0;
252 Lxcache
->Lxcache_retired_fmri
[0] = '\0';
254 case CMD_PTR_CPU_L2DATA
:
255 fmri_Lxcache_type
= FM_FMRI_CPU_CACHE_TYPE_L2
;
257 case CMD_PTR_CPU_L3DATA
:
258 fmri_Lxcache_type
= FM_FMRI_CPU_CACHE_TYPE_L3
;
260 case CMD_PTR_CPU_L2TAG
:
261 fmri_Lxcache_type
= FM_FMRI_CPU_CACHE_TYPE_L2
;
263 case CMD_PTR_CPU_L3TAG
:
264 fmri_Lxcache_type
= FM_FMRI_CPU_CACHE_TYPE_L3
;
270 cmd_bufname(Lxcache
->Lxcache_bufname
, sizeof (Lxcache
->Lxcache_bufname
),
271 "Lxcache_%s_%d_%d_%d_%d", pstype_name
, cpu
->cpu_cpuid
,
274 "\n%s:cpu_id %d: new Lxcache name is %s\n",
275 pstype_name
, cpu
->cpu_cpuid
, Lxcache
->Lxcache_bufname
);
276 if ((errno
= nvlist_dup(modasru
, &asru
, 0)) != 0 ||
277 (errno
= nvlist_add_uint32(asru
, FM_FMRI_CPU_CACHE_INDEX
,
279 (errno
= nvlist_add_uint32(asru
, FM_FMRI_CPU_CACHE_WAY
,
280 (uint32_t)way
)) != 0 ||
281 (errno
= nvlist_add_uint16(asru
, FM_FMRI_CPU_CACHE_BIT
,
283 (errno
= nvlist_add_uint8(asru
, FM_FMRI_CPU_CACHE_TYPE
,
284 fmri_Lxcache_type
)) != 0 ||
285 (errno
= fmd_nvl_fmri_expand(hdl
, asru
)) != 0)
286 fmd_hdl_abort(hdl
, "failed to build Lxcache fmri");
287 asru
->nvl_nvflag
|= NV_UNIQUE_NAME_TYPE
;
289 cmd_fmri_init(hdl
, &Lxcache
->Lxcache_asru
, asru
,
290 "%s_asru_%d_%d_%d", pstype_name
, index
, way
, bit
);
294 cmd_list_append(&cpu
->cpu_Lxcaches
, Lxcache
);
295 cmd_Lxcache_write(hdl
, Lxcache
);
301 cmd_Lxcache_lookup_by_index_way(cmd_cpu_t
*cpu
, cmd_ptrsubtype_t pstype
,
302 int32_t index
, int8_t way
)
304 cmd_Lxcache_t
*cache
;
306 for (cache
= cmd_list_next(&cpu
->cpu_Lxcaches
); cache
!= NULL
;
307 cache
= cmd_list_next(cache
)) {
308 if ((cache
->Lxcache_index
== (uint32_t)index
) &&
309 (cache
->Lxcache_way
== (uint32_t)way
) &&
310 (cache
->Lxcache_type
== pstype
)) {
318 static cmd_Lxcache_t
*
319 Lxcache_wrapv1(fmd_hdl_t
*hdl
, cmd_Lxcache_pers_t
*pers
, size_t psz
)
321 cmd_Lxcache_t
*Lxcache
;
323 if (psz
!= sizeof (cmd_Lxcache_pers_t
)) {
324 fmd_hdl_abort(hdl
, "size of state doesn't match size of "
325 "version 1 state (%u bytes).\n",
326 sizeof (cmd_Lxcache_pers_t
));
329 Lxcache
= fmd_hdl_zalloc(hdl
, sizeof (cmd_Lxcache_t
), FMD_SLEEP
);
330 bcopy(pers
, Lxcache
, sizeof (cmd_Lxcache_pers_t
));
331 fmd_hdl_free(hdl
, pers
, psz
);
336 cmd_Lxcache_restore(fmd_hdl_t
*hdl
, fmd_case_t
*cp
, cmd_case_ptr_t
*ptr
)
338 cmd_Lxcache_t
*Lxcache
;
339 cmd_Lxcache_t
*recovered_Lxcache
;
345 * We need to first extract the cpu name by reading directly
346 * from fmd buffers in order to begin our search for Lxcache in
347 * the appropriate cpu list.
348 * After we identify the cpu list using buf name we look
349 * in cpu list for our Lxcache states.
351 fmd_hdl_debug(hdl
, "restoring Lxcache from %s\n", ptr
->ptr_name
);
353 if ((Lxcachesz
= fmd_buf_size(hdl
, NULL
, ptr
->ptr_name
)) == 0) {
354 fmd_hdl_abort(hdl
, "Lxcache referenced by case %s does "
355 "not exist in saved state\n",
356 fmd_case_uuid(hdl
, cp
));
357 } else if (Lxcachesz
!= sizeof (cmd_Lxcache_pers_t
)) {
358 fmd_hdl_abort(hdl
, "Lxcache buffer referenced by case %s "
359 "is %d bytes. Expected size is %d bytes\n",
360 fmd_case_uuid(hdl
, cp
), Lxcachesz
,
361 sizeof (cmd_Lxcache_pers_t
));
364 if ((Lxcache
= cmd_buf_read(hdl
, NULL
, ptr
->ptr_name
,
365 Lxcachesz
)) == NULL
) {
366 fmd_hdl_abort(hdl
, "failed to read Lxcache buf %s",
369 cmd_pretty_print_Lxcache(hdl
, Lxcache
);
371 fmd_hdl_debug(hdl
, "found %d in version field\n",
372 Lxcache
->Lxcache_version
);
373 cpu
= cmd_restore_cpu_only(hdl
, cp
, Lxcache
->Lxcache_cpu_bufname
);
376 "\nCould not restore cpu %s\n",
377 Lxcache
->Lxcache_cpu_bufname
);
380 recovered_Lxcache
= Lxcache
; /* save the recovered Lxcache */
382 for (Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
); Lxcache
!= NULL
;
383 Lxcache
= cmd_list_next(Lxcache
)) {
384 if (strcmp(Lxcache
->Lxcache_bufname
, ptr
->ptr_name
) == 0)
388 if (Lxcache
== NULL
) {
390 switch (recovered_Lxcache
->Lxcache_version
) {
391 case CMD_LxCACHE_VERSION_1
:
392 Lxcache
= Lxcache_wrapv1(hdl
,
393 (cmd_Lxcache_pers_t
*)recovered_Lxcache
,
397 fmd_hdl_abort(hdl
, "unknown version (found %d) "
398 "for Lxcache state referenced by case %s.\n",
399 recovered_Lxcache
->Lxcache_version
,
400 fmd_case_uuid(hdl
, cp
));
404 cmd_fmri_restore(hdl
, &Lxcache
->Lxcache_asru
);
406 * We need to cleanup the information associated with
407 * the timeout routine because these are not checkpointed
408 * and cannot be retored.
410 Lxcache
->Lxcache_timeout_id
= -1;
411 Lxcache
->Lxcache_retry_count
= 0;
412 Lxcache
->Lxcache_nvl
= NULL
;
413 Lxcache
->Lxcache_ep
= NULL
;
414 Lxcache
->Lxcache_serdnm
= NULL
;
416 cmd_list_append(&cpu
->cpu_Lxcaches
, Lxcache
);
418 serdnm
= cmd_Lxcache_serdnm_create(hdl
, cpu
->cpu_cpuid
,
419 Lxcache
->Lxcache_type
, Lxcache
->Lxcache_index
,
420 Lxcache
->Lxcache_way
, Lxcache
->Lxcache_bit
);
422 "cpu_id %d: serdname for the case is %s\n",
423 cpu
->cpu_cpuid
, serdnm
);
425 "cpu_id %d: restoring the case for index %d way %d bit %d\n",
426 cpu
->cpu_cpuid
, Lxcache
->Lxcache_index
,
427 Lxcache
->Lxcache_way
, Lxcache
->Lxcache_bit
);
428 cmd_case_restore(hdl
, &Lxcache
->Lxcache_case
, cp
, serdnm
);
435 cmd_Lxcache_validate(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
)
437 cmd_Lxcache_t
*Lxcache
, *next
;
439 for (Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
);
440 Lxcache
!= NULL
; Lxcache
= next
) {
441 next
= cmd_list_next(Lxcache
);
443 if (fmd_nvl_fmri_unusable(hdl
, Lxcache
->Lxcache_asru_nvl
)) {
444 cmd_Lxcache_destroy(hdl
, cpu
, Lxcache
);
450 cmd_Lxcache_dirty(fmd_hdl_t
*hdl
, cmd_Lxcache_t
*Lxcache
)
452 if (fmd_buf_size(hdl
, NULL
, Lxcache
->Lxcache_bufname
) !=
453 sizeof (cmd_Lxcache_pers_t
))
454 fmd_buf_destroy(hdl
, NULL
, Lxcache
->Lxcache_bufname
);
456 /* No need to rewrite the FMRIs in the Lxcache - they don't change */
457 fmd_buf_write(hdl
, NULL
,
458 Lxcache
->Lxcache_bufname
, &Lxcache
->Lxcache_pers
,
459 sizeof (cmd_Lxcache_pers_t
));
463 cmd_Lxcache_fini(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
)
465 cmd_Lxcache_t
*Lxcache
;
467 while ((Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
)) != NULL
)
468 cmd_Lxcache_free(hdl
, cpu
, Lxcache
, FMD_B_FALSE
);
472 cmd_Lxcache_serdnm_create(fmd_hdl_t
*hdl
, uint32_t cpu_id
,
473 cmd_ptrsubtype_t pstype
,
474 int32_t index
, int8_t way
, int16_t bit
)
476 const char *fmt
= "cpu_%d:%s_%d_%d_%d_serd";
477 const char *serdbase
;
481 serdbase
= cmd_type_to_str(pstype
);
482 sz
= (snprintf(NULL
, 0, fmt
, cpu_id
, serdbase
, index
, way
, bit
) + 1);
483 nm
= fmd_hdl_alloc(hdl
, sz
, FMD_SLEEP
);
484 (void) snprintf(nm
, sz
, fmt
, cpu_id
, serdbase
, index
, way
, bit
);
489 cmd_Lxcache_anonymous_serdnm_create(fmd_hdl_t
*hdl
, uint32_t cpu_id
,
490 cmd_ptrsubtype_t pstype
,
491 int32_t index
, int8_t way
, int16_t bit
)
493 const char *fmt
= "cpu_%d:%s_%d_%d_%d_anonymous_serd";
494 const char *serdbase
;
498 serdbase
= cmd_type_to_str(pstype
);
499 sz
= (snprintf(NULL
, 0, fmt
, cpu_id
, serdbase
, index
, way
, bit
) + 1);
500 nm
= fmd_hdl_alloc(hdl
, sz
, FMD_SLEEP
);
501 (void) snprintf(nm
, sz
, fmt
, cpu_id
, serdbase
, index
, way
, bit
);
506 * Count the number of SERD type 2 ways retired for a given cpu
507 * These are defined to be L3 Cache data retirements
511 cmd_Lx_index_count_type2_ways(cmd_cpu_t
*cpu
)
513 cmd_Lxcache_t
*cache
= NULL
;
514 uint32_t ret_count
= 0;
516 for (cache
= cmd_list_next(&cpu
->cpu_Lxcaches
); cache
!= NULL
;
517 cache
= cmd_list_next(cache
)) {
518 if ((cache
->Lxcache_flags
& CMD_LxCACHE_F_RETIRED
) &&
519 (cache
->Lxcache_type
== CMD_PTR_CPU_L3DATA
)) {
526 * Count the number of SERD type 1 ways retired for a given cpu
527 * These are defined to be L2 Data, tag and L3 Tag retirements
531 cmd_Lx_index_count_type1_ways(cmd_cpu_t
*cpu
)
533 cmd_Lxcache_t
*cache
= NULL
;
534 uint32_t ret_count
= 0;
536 for (cache
= cmd_list_next(&cpu
->cpu_Lxcaches
); cache
!= NULL
;
537 cache
= cmd_list_next(cache
)) {
538 if ((cache
->Lxcache_flags
& CMD_LxCACHE_F_RETIRED
) &&
539 ((cache
->Lxcache_type
== CMD_PTR_CPU_L2DATA
) ||
540 IS_TAG(cache
->Lxcache_type
))) {
548 cmd_fault_the_cpu(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
, cmd_ptrsubtype_t pstype
,
554 cp
= cmd_case_create(hdl
, &cpu
->cpu_header
, pstype
,
557 "\n%s:cpu_id %d Created case %s to retire CPU\n",
558 fltnm
, cpu
->cpu_cpuid
);
560 if ((errno
= fmd_nvl_fmri_expand(hdl
, cpu
->cpu_asru_nvl
)) != 0)
561 fmd_hdl_abort(hdl
, "failed to build CPU fmri");
563 cmd_cpu_create_faultlist(hdl
, cp
, cpu
, fltnm
, NULL
, HUNDRED_PERCENT
);
564 fmd_case_solve(hdl
, cp
);
568 cmd_retire_cpu_if_limits_exceeded(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
,
569 cmd_ptrsubtype_t pstype
, const char *fltnm
)
571 int cpu_retired_1
, cpu_retired_2
;
573 /* Retrieve the number of retired ways for each category */
575 cpu_retired_1
= cmd_Lx_index_count_type1_ways(cpu
);
576 cpu_retired_2
= cmd_Lx_index_count_type2_ways(cpu
);
578 "\n%s:CPU %d retired Type 1 way count is: %d\n",
579 fltnm
, cpu
->cpu_cpuid
, cpu_retired_1
);
580 fmd_hdl_debug(hdl
, "\n%s:CPU %d retired Type 2 way count is: %d\n",
581 fltnm
, cpu
->cpu_cpuid
, cpu_retired_2
);
583 if (((cpu_retired_1
> CMD_CPU_SERD_AGG_1
) ||
584 (cpu_retired_2
> CMD_CPU_SERD_AGG_2
)) &&
585 (cpu
->cpu_faulting
!= FMD_B_TRUE
)) {
586 cmd_fault_the_cpu(hdl
, cpu
, pstype
, fltnm
);
591 cmd_Lxcache_fault(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
, cmd_Lxcache_t
*Lxcache
,
592 const char *fltnm
, nvlist_t
*rsrc
, uint_t cert
)
597 (void) snprintf(fltmsg
, sizeof (fltmsg
), "fault.cpu.%s.%s-line",
598 cmd_cpu_type2name(hdl
, cpu
->cpu_type
), fltnm
);
600 "\n%s:cpu_id %d: fltmsg = %s\n",
601 fltnm
, cpu
->cpu_cpuid
, fltmsg
);
602 if (Lxcache
->Lxcache_flags
& CMD_LxCACHE_F_FAULTING
) {
605 Lxcache
->Lxcache_flags
|= CMD_LxCACHE_F_FAULTING
;
606 flt
= fmd_nvl_create_fault(hdl
, fltmsg
, cert
,
607 Lxcache
->Lxcache_asru
.fmri_nvl
, cpu
->cpu_fru_nvl
, rsrc
);
608 if (nvlist_add_boolean_value(flt
, FM_SUSPECT_MESSAGE
, B_FALSE
) != 0)
609 fmd_hdl_abort(hdl
, "failed to add no-message member to fault");
612 "\n%s:cpu_id %d: adding suspect list to case %s\n",
613 fltnm
, cpu
->cpu_cpuid
,
614 fmd_case_uuid(hdl
, Lxcache
->Lxcache_case
.cc_cp
));
615 fmd_case_add_suspect(hdl
, Lxcache
->Lxcache_case
.cc_cp
, flt
);
616 fmd_case_solve(hdl
, Lxcache
->Lxcache_case
.cc_cp
);
617 if (Lxcache
->Lxcache_retired_fmri
[0] == 0) {
618 if (cmd_fmri_nvl2str(hdl
, Lxcache
->Lxcache_asru
.fmri_nvl
,
619 Lxcache
->Lxcache_retired_fmri
,
620 sizeof (Lxcache
->Lxcache_retired_fmri
)) == -1)
622 "\n%s:cpu_id %d: Failed to save the"
623 " retired fmri string\n",
624 fltnm
, cpu
->cpu_cpuid
);
627 "\n%s:cpu_id %d:Saved the retired fmri string %s\n",
628 fltnm
, cpu
->cpu_cpuid
,
629 Lxcache
->Lxcache_retired_fmri
);
631 Lxcache
->Lxcache_flags
&= ~(CMD_LxCACHE_F_FAULTING
);
636 cmd_Lxcache_close(fmd_hdl_t
*hdl
, void *arg
)
639 cmd_Lxcache_t
*Lxcache
;
642 Lxcache
= (cmd_Lxcache_t
*)arg
;
643 fmd_hdl_debug(hdl
, "cmd_Lxcache_close called for %s\n",
644 Lxcache
->Lxcache_bufname
);
645 cc
= &Lxcache
->Lxcache_case
;
647 for (cpu
= cmd_list_next(&cmd
.cmd_cpus
); cpu
!= NULL
;
648 cpu
= cmd_list_next(cpu
)) {
649 if (strcmp(cpu
->cpu_bufname
,
650 Lxcache
->Lxcache_cpu_bufname
) == 0)
654 fmd_hdl_abort(hdl
, "failed to find the cpu %s for %s\n",
655 Lxcache
->Lxcache_cpu_bufname
,
656 Lxcache
->Lxcache_bufname
);
658 * We will destroy the case and serd engine.
659 * The rest will be destroyed when we retire the CPU
660 * until then we keep the Lxcache strutures alive.
662 if (cc
->cc_cp
!= NULL
) {
663 cmd_case_fini(hdl
, cc
->cc_cp
, FMD_B_TRUE
);
666 if (cc
->cc_serdnm
!= NULL
) {
667 if (fmd_serd_exists(hdl
, cc
->cc_serdnm
))
668 fmd_serd_destroy(hdl
, cc
->cc_serdnm
);
669 fmd_hdl_strfree(hdl
, cc
->cc_serdnm
);
670 cc
->cc_serdnm
= NULL
;
676 cmd_Lxcache_lookup_by_timeout_id(id_t id
)
679 cmd_Lxcache_t
*cmd_Lxcache
;
681 for (cpu
= cmd_list_next(&cmd
.cmd_cpus
); cpu
!= NULL
;
682 cpu
= cmd_list_next(cpu
)) {
683 for (cmd_Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
);
685 cmd_Lxcache
= cmd_list_next(cmd_Lxcache
)) {
686 if (cmd_Lxcache
->Lxcache_timeout_id
== id
)
687 return (cmd_Lxcache
);
694 cmd_Lxcache_gc(fmd_hdl_t
*hdl
)
698 for (cpu
= cmd_list_next(&cmd
.cmd_cpus
); cpu
!= NULL
;
699 cpu
= cmd_list_next(cpu
))
700 cmd_Lxcache_validate(hdl
, cpu
);
704 get_tagdata(cmd_cpu_t
*cpu
, cmd_ptrsubtype_t pstype
,
705 int32_t index
, uint64_t *tag_data
)
708 cache_info_t cache_info
;
710 fd
= open(mem_cache_device
, O_RDONLY
);
713 "cpu_id = %d could not open %s to read tag info.\n",
714 cpu
->cpu_cpuid
, mem_cache_device
);
715 return (CMD_EVD_BAD
);
718 case CMD_PTR_CPU_L2TAG
:
719 case CMD_PTR_CPU_L2DATA
:
720 cache_info
.cache
= L2_CACHE_TAG
;
722 case CMD_PTR_CPU_L3TAG
:
723 case CMD_PTR_CPU_L3DATA
:
724 cache_info
.cache
= L3_CACHE_TAG
;
727 cache_info
.cpu_id
= cpu
->cpu_cpuid
;
728 cache_info
.index
= index
;
729 cache_info
.datap
= tag_data
;
734 if (ioctl(fd
, MEM_CACHE_READ_ERROR_INJECTED_TAGS
, &cache_info
)
736 (void) printf("cpu_id = %d ioctl"
737 " MEM_CACHE_READ_ERROR_INJECTED_TAGS failed"
739 cpu
->cpu_cpuid
, errno
);
741 return (CMD_EVD_BAD
);
744 if (ioctl(fd
, MEM_CACHE_READ_TAGS
, &cache_info
)
746 (void) printf("cpu_id = %d ioctl"
747 " MEM_CACHE_READ_TAGS failed"
749 cpu
->cpu_cpuid
, errno
);
751 return (CMD_EVD_BAD
);
759 get_index_retired_ways(cmd_cpu_t
*cpu
, cmd_ptrsubtype_t pstype
, int32_t index
)
762 uint64_t tag_data
[PN_CACHE_NWAYS
];
764 if (get_tagdata(cpu
, pstype
, index
, tag_data
) != 0) {
768 for (i
= 0; i
< PN_CACHE_NWAYS
; i
++) {
769 if ((tag_data
[i
] & CH_ECSTATE_MASK
) ==
773 return (retired_ways
);
777 cmd_cache_way_retire(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
, cmd_Lxcache_t
*Lxcache
)
780 cache_info_t cache_info
;
783 fltnm
= cmd_type_to_str(Lxcache
->Lxcache_type
);
784 fd
= open(mem_cache_device
, O_RDWR
);
787 "fltnm:cpu_id %d open of %s failed\n",
788 fltnm
, cpu
->cpu_cpuid
, mem_cache_device
);
791 cache_info
.cpu_id
= cpu
->cpu_cpuid
;
792 cache_info
.way
= Lxcache
->Lxcache_way
;
793 cache_info
.bit
= Lxcache
->Lxcache_bit
;
794 cache_info
.index
= Lxcache
->Lxcache_index
;
796 switch (Lxcache
->Lxcache_type
) {
797 case CMD_PTR_CPU_L2TAG
:
798 cache_info
.cache
= L2_CACHE_TAG
;
800 case CMD_PTR_CPU_L2DATA
:
801 cache_info
.cache
= L2_CACHE_DATA
;
803 case CMD_PTR_CPU_L3TAG
:
804 cache_info
.cache
= L3_CACHE_TAG
;
806 case CMD_PTR_CPU_L3DATA
:
807 cache_info
.cache
= L3_CACHE_DATA
;
812 "\n%s:cpu %d: Retiring index %d, way %d bit %d\n",
813 fltnm
, cpu
->cpu_cpuid
, cache_info
.index
, cache_info
.way
,
814 (int16_t)cache_info
.bit
);
815 ret
= ioctl(fd
, MEM_CACHE_RETIRE
, &cache_info
);
819 "fltnm:cpu_id %d MEM_CACHE_RETIRE ioctl failed\n",
820 fltnm
, cpu
->cpu_cpuid
);
828 cmd_cache_way_unretire(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
, cmd_Lxcache_t
*Lxcache
)
831 cache_info_t cache_info
;
834 fltnm
= cmd_type_to_str(Lxcache
->Lxcache_type
);
835 fd
= open(mem_cache_device
, O_RDWR
);
838 "fltnm:cpu_id %d open of %s failed\n",
839 fltnm
, cpu
->cpu_cpuid
, mem_cache_device
);
842 cache_info
.cpu_id
= cpu
->cpu_cpuid
;
843 cache_info
.way
= Lxcache
->Lxcache_way
;
844 cache_info
.bit
= Lxcache
->Lxcache_bit
;
845 cache_info
.index
= Lxcache
->Lxcache_index
;
847 switch (Lxcache
->Lxcache_type
) {
848 case CMD_PTR_CPU_L2TAG
:
849 cache_info
.cache
= L2_CACHE_TAG
;
851 case CMD_PTR_CPU_L2DATA
:
852 cache_info
.cache
= L2_CACHE_DATA
;
854 case CMD_PTR_CPU_L3TAG
:
855 cache_info
.cache
= L3_CACHE_TAG
;
857 case CMD_PTR_CPU_L3DATA
:
858 cache_info
.cache
= L3_CACHE_DATA
;
863 "\n%s:cpu %d: Unretiring index %d, way %d bit %d\n",
864 fltnm
, cpu
->cpu_cpuid
, cache_info
.index
, cache_info
.way
,
865 (int16_t)cache_info
.bit
);
866 ret
= ioctl(fd
, MEM_CACHE_UNRETIRE
, &cache_info
);
870 "fltnm:cpu_id %d MEM_CACHE_UNRETIRE ioctl failed\n",
871 fltnm
, cpu
->cpu_cpuid
);
878 static cmd_Lxcache_t
*
879 cmd_Lxcache_lookup_by_type_index_way_flags(cmd_cpu_t
*cpu
,
880 cmd_ptrsubtype_t type
, int32_t index
, int8_t way
, int32_t flags
)
882 cmd_Lxcache_t
*cmd_Lxcache
;
884 for (cmd_Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
);
886 cmd_Lxcache
= cmd_list_next(cmd_Lxcache
)) {
887 if ((cmd_Lxcache
->Lxcache_index
== index
) &&
888 (cmd_Lxcache
->Lxcache_way
== way
) &&
889 (cmd_Lxcache
->Lxcache_type
== type
) &&
890 (cmd_Lxcache
->Lxcache_flags
& flags
))
891 return (cmd_Lxcache
);
897 cmd_Lxcache_get_bit_array_of_available_ways(cmd_cpu_t
*cpu
,
898 cmd_ptrsubtype_t type
, int32_t index
)
900 uint8_t bit_array_of_unavailable_ways
;
901 uint8_t bit_array_of_available_ways
;
902 cmd_ptrsubtype_t match_type
;
903 cmd_Lxcache_t
*cmd_Lxcache
;
904 uint8_t bit_array_of_retired_ways
;
908 * We scan the Lxcache structures for this CPU and collect
909 * the following 2 information.
910 * - bit_array_of_retired_ways
911 * - bit_array_of_unavailable_ways
912 * If type is Lx_TAG then unavailable_ways will not include ways that
913 * were retired due to DATA faults, because these ways can still be
914 * re-retired for TAG faults.
915 * If 3 ways have been retired then we protect the only remaining
916 * unretired way by marking it as unavailable.
918 bit_array_of_unavailable_ways
= 0;
919 bit_array_of_retired_ways
= 0;
921 case CMD_PTR_CPU_L2TAG
:
922 match_type
= CMD_PTR_CPU_L2DATA
;
924 case CMD_PTR_CPU_L2DATA
:
925 match_type
= CMD_PTR_CPU_L2TAG
;
927 case CMD_PTR_CPU_L3TAG
:
928 match_type
= CMD_PTR_CPU_L3DATA
;
930 case CMD_PTR_CPU_L3DATA
:
931 match_type
= CMD_PTR_CPU_L3TAG
;
935 for (cmd_Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
);
937 cmd_Lxcache
= cmd_list_next(cmd_Lxcache
)) {
938 if ((cmd_Lxcache
->Lxcache_index
== index
) &&
939 ((cmd_Lxcache
->Lxcache_type
== type
) ||
940 (cmd_Lxcache
->Lxcache_type
== match_type
)) &&
941 (cmd_Lxcache
->Lxcache_flags
&
942 (CMD_LxCACHE_F_RETIRED
| CMD_LxCACHE_F_RERETIRED
))) {
943 bit_array_of_retired_ways
|=
944 (1 << cmd_Lxcache
->Lxcache_way
);
946 * If we are calling this while handling TAG errors
947 * we can reretire the cachelines retired due to DATA
948 * errors. We will ignore the cachelnes that are
949 * retired due to DATA faults.
951 if ((type
== CMD_PTR_CPU_L2TAG
) &&
952 (cmd_Lxcache
->Lxcache_type
== CMD_PTR_CPU_L2DATA
))
954 if ((type
== CMD_PTR_CPU_L3TAG
) &&
955 (cmd_Lxcache
->Lxcache_type
== CMD_PTR_CPU_L3DATA
))
957 bit_array_of_unavailable_ways
|=
958 (1 << cmd_Lxcache
->Lxcache_way
);
961 if (cmd_num_of_bits
[bit_array_of_retired_ways
& 0xf] == 3) {
963 * special case: 3 ways are already retired.
964 * The Lone unretired way is set as 1, rest are set as 0.
965 * We now OR this with bit_array_of_unavailable_ways
966 * so that this unretired way will not be allocated.
968 bit_array_of_retired_ways
^= 0xf;
969 bit_array_of_retired_ways
&= 0xf;
970 bit_array_of_unavailable_ways
|= bit_array_of_retired_ways
;
972 bit_array_of_available_ways
=
973 ((bit_array_of_unavailable_ways
^ 0xf) & 0xf);
974 return (bit_array_of_available_ways
);
979 * Look for a way next to the specified way that is
980 * not in a retired state.
981 * We stop when way 3 is reached.
984 cmd_Lxcache_get_next_retirable_way(cmd_cpu_t
*cpu
,
985 int32_t index
, cmd_ptrsubtype_t pstype
, int8_t specified_way
)
987 uint8_t bit_array_of_ways
;
990 if (specified_way
== 3)
992 bit_array_of_ways
= cmd_Lxcache_get_bit_array_of_available_ways(
995 if (specified_way
== 2)
997 else if (specified_way
== 1)
1001 return (cmd_lowest_way
[bit_array_of_ways
& mask
]);
1005 cmd_Lxcache_get_lowest_retirable_way(cmd_cpu_t
*cpu
,
1006 int32_t index
, cmd_ptrsubtype_t pstype
)
1008 uint8_t bit_array_of_ways
;
1010 bit_array_of_ways
= cmd_Lxcache_get_bit_array_of_available_ways(
1013 return (cmd_lowest_way
[bit_array_of_ways
]);
1017 cmd_Lxcache_lookup_by_type_index_way_reason(cmd_cpu_t
*cpu
,
1018 cmd_ptrsubtype_t pstype
, int32_t index
, int8_t way
, int32_t reason
)
1020 cmd_Lxcache_t
*cmd_Lxcache
;
1022 for (cmd_Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
);
1023 cmd_Lxcache
!= NULL
;
1024 cmd_Lxcache
= cmd_list_next(cmd_Lxcache
)) {
1025 if ((cmd_Lxcache
->Lxcache_index
== (uint32_t)index
) &&
1026 (cmd_Lxcache
->Lxcache_way
== (uint32_t)way
) &&
1027 (cmd_Lxcache
->Lxcache_reason
& reason
) &&
1028 (cmd_Lxcache
->Lxcache_type
== pstype
)) {
1029 return (cmd_Lxcache
);
1036 cmd_Lxcache_lookup_by_type_index_bit_reason(cmd_cpu_t
*cpu
,
1037 cmd_ptrsubtype_t pstype
, int32_t index
, int16_t bit
, int32_t reason
)
1039 cmd_Lxcache_t
*cmd_Lxcache
;
1041 for (cmd_Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
);
1042 cmd_Lxcache
!= NULL
;
1043 cmd_Lxcache
= cmd_list_next(cmd_Lxcache
)) {
1044 if ((cmd_Lxcache
->Lxcache_index
== (uint32_t)index
) &&
1045 (cmd_Lxcache
->Lxcache_bit
== (uint16_t)bit
) &&
1046 (cmd_Lxcache
->Lxcache_reason
& reason
) &&
1047 (cmd_Lxcache
->Lxcache_type
== pstype
)) {
1048 return (cmd_Lxcache
);
1055 cmd_Lxcache_destroy_anonymous_serd_engines(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
,
1056 cmd_ptrsubtype_t type
, int32_t index
, int16_t bit
)
1058 cmd_Lxcache_t
*cmd_Lxcache
;
1061 for (cmd_Lxcache
= cmd_list_next(&cpu
->cpu_Lxcaches
);
1062 cmd_Lxcache
!= NULL
;
1063 cmd_Lxcache
= cmd_list_next(cmd_Lxcache
)) {
1064 if ((cmd_Lxcache
->Lxcache_type
== type
) &&
1065 (cmd_Lxcache
->Lxcache_index
== (uint32_t)index
) &&
1066 (cmd_Lxcache
->Lxcache_bit
== (uint16_t)bit
) &&
1067 (cmd_Lxcache
->Lxcache_way
== (uint32_t)CMD_ANON_WAY
)) {
1068 cc
= &cmd_Lxcache
->Lxcache_case
;
1071 if (cc
->cc_serdnm
!= NULL
) {
1072 if (fmd_serd_exists(hdl
, cc
->cc_serdnm
)) {
1074 "\n%s:cpu_id %d destroying SERD"
1076 cmd_type_to_str(type
),
1077 cpu
->cpu_cpuid
, cc
->cc_serdnm
);
1078 fmd_serd_destroy(hdl
, cc
->cc_serdnm
);
1080 fmd_hdl_strfree(hdl
, cc
->cc_serdnm
);
1081 cc
->cc_serdnm
= NULL
;
1088 cmd_fmri_nvl2str(fmd_hdl_t
*hdl
, nvlist_t
*nvl
, char *buf
, size_t buflen
)
1091 uint32_t cpuid
, way
;
1094 char *serstr
= NULL
;
1095 char missing_list
[128];
1097 missing_list
[0] = 0;
1098 if (nvlist_lookup_uint32(nvl
, FM_FMRI_CPU_ID
, &cpuid
) != 0)
1099 (void) strcat(missing_list
, FM_FMRI_CPU_ID
);
1100 if (nvlist_lookup_string(nvl
, FM_FMRI_CPU_SERIAL_ID
, &serstr
) != 0)
1101 (void) strcat(missing_list
, FM_FMRI_CPU_SERIAL_ID
);
1102 if (nvlist_lookup_uint32(nvl
, FM_FMRI_CPU_CACHE_INDEX
, &index
) != 0)
1103 (void) strcat(missing_list
, FM_FMRI_CPU_CACHE_INDEX
);
1104 if (nvlist_lookup_uint32(nvl
, FM_FMRI_CPU_CACHE_WAY
, &way
) != 0)
1105 (void) strcat(missing_list
, FM_FMRI_CPU_CACHE_WAY
);
1106 if (nvlist_lookup_uint16(nvl
, FM_FMRI_CPU_CACHE_BIT
, &bit
) != 0)
1107 (void) strcat(missing_list
, FM_FMRI_CPU_CACHE_BIT
);
1108 if (nvlist_lookup_uint8(nvl
, FM_FMRI_CPU_CACHE_TYPE
, &type
) != 0)
1109 (void) strcat(missing_list
, FM_FMRI_CPU_CACHE_TYPE
);
1111 if (strlen(missing_list
) != 0) {
1113 "\ncmd_fmri_nvl2str: missing %s in fmri\n",
1118 return (snprintf(buf
, buflen
,
1119 "cpu:///%s=%u/%s=%s/%s=%u/%s=%u/%s=%d/%s=%d",
1120 FM_FMRI_CPU_ID
, cpuid
,
1121 FM_FMRI_CPU_SERIAL_ID
, serstr
,
1122 FM_FMRI_CPU_CACHE_INDEX
, index
,
1123 FM_FMRI_CPU_CACHE_WAY
, way
,
1124 FM_FMRI_CPU_CACHE_BIT
, bit
,
1125 FM_FMRI_CPU_CACHE_TYPE
, type
));
1129 cmd_create_case_for_Lxcache(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
,
1130 cmd_Lxcache_t
*cmd_Lxcache
)
1135 if (cmd_Lxcache
->Lxcache_case
.cc_cp
!= NULL
)
1137 cmd_Lxcache
->Lxcache_case
.cc_cp
= cmd_case_create(hdl
,
1138 &cmd_Lxcache
->Lxcache_header
, CMD_PTR_LxCACHE_CASE
,
1140 fltnm
= cmd_type_to_str(cmd_Lxcache
->Lxcache_type
);
1141 if (cmd_Lxcache
->Lxcache_case
.cc_cp
== NULL
) {
1143 "\n%s:cpu_id %d:Failed to create a case for"
1144 " index %d way %d bit %d\n",
1145 fltnm
, cpu
->cpu_cpuid
,
1146 cmd_Lxcache
->Lxcache_index
,
1147 cmd_Lxcache
->Lxcache_way
, cmd_Lxcache
->Lxcache_bit
);
1151 "\n%s:cpu_id %d: New case %s created.\n",
1152 fltnm
, cpu
->cpu_cpuid
, uuid
);
1153 if (cmd_Lxcache
->Lxcache_ep
)
1154 fmd_case_add_ereport(hdl
, cmd_Lxcache
->Lxcache_case
.cc_cp
,
1155 cmd_Lxcache
->Lxcache_ep
);
1160 cmd_repair_fmri(fmd_hdl_t
*hdl
, char *buf
)
1164 err
= fmd_repair_asru(hdl
, buf
);
1167 "Failed to repair %s err = %d\n", buf
, err
);
1173 cmd_Lxcache_unretire(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
,
1174 cmd_Lxcache_t
*unretire_this_Lxcache
, const char *fltnm
)
1176 cmd_ptrsubtype_t data_type
;
1177 cmd_Lxcache_t
*previously_retired_Lxcache
;
1178 int found_reretired_cacheline
= 0;
1182 * If we are unretiring a cacheline retired due to suspected TAG
1183 * fault, then we must first check if we are using a cacheline
1184 * that was retired earlier for DATA fault.
1185 * If so we will not unretire the cacheline.
1186 * We will change the flags to reflect the current condition.
1187 * We will return success, though.
1189 if (IS_TAG(unretire_this_Lxcache
->Lxcache_type
)) {
1190 if (unretire_this_Lxcache
->Lxcache_type
== CMD_PTR_CPU_L2TAG
)
1191 data_type
= CMD_PTR_CPU_L2DATA
;
1192 if (unretire_this_Lxcache
->Lxcache_type
== CMD_PTR_CPU_L3TAG
)
1193 data_type
= CMD_PTR_CPU_L3DATA
;
1195 "\n%s:cpuid %d checking if there is a %s"
1196 " cacheline re-retired at this index %d and way %d\n",
1197 fltnm
, cpu
->cpu_cpuid
, cmd_type_to_str(data_type
),
1198 unretire_this_Lxcache
->Lxcache_index
,
1199 unretire_this_Lxcache
->Lxcache_way
);
1200 previously_retired_Lxcache
=
1201 cmd_Lxcache_lookup_by_type_index_way_flags(
1202 cpu
, data_type
, unretire_this_Lxcache
->Lxcache_index
,
1203 unretire_this_Lxcache
->Lxcache_way
,
1204 CMD_LxCACHE_F_RERETIRED
);
1205 if (previously_retired_Lxcache
) {
1207 "\n%s:cpuid %d Found a %s cacheline re-retired at"
1208 " this index %d and way %d. Will mark this"
1210 fltnm
, cpu
->cpu_cpuid
, cmd_type_to_str(data_type
),
1211 unretire_this_Lxcache
->Lxcache_index
,
1212 unretire_this_Lxcache
->Lxcache_way
);
1214 * We call the cmd_Lxcache_fault to inform fmd
1215 * about the suspect fmri. The cacheline is already
1216 * retired but the existing suspect fmri is for TAG
1217 * fault which will be removed in this routine.
1219 if (previously_retired_Lxcache
->Lxcache_reason
1221 certainty
= HUNDRED_PERCENT
;
1223 certainty
= SUSPECT_PERCENT
;
1224 cmd_Lxcache_fault(hdl
, cpu
, previously_retired_Lxcache
,
1225 fltnm
, cpu
->cpu_fru_nvl
, certainty
);
1226 previously_retired_Lxcache
->Lxcache_flags
=
1227 CMD_LxCACHE_F_RETIRED
;
1229 * Update persistent storage
1231 cmd_Lxcache_write(hdl
, previously_retired_Lxcache
);
1232 found_reretired_cacheline
= 1;
1236 * We have been called to unretire a cacheline retired
1237 * earlier due to DATA errors.
1238 * If this cacheline is marked RERETIRED then it means that
1239 * the cacheline has been retired due to TAG errors and
1240 * we should not be unretiring the cacheline.
1242 if (unretire_this_Lxcache
->Lxcache_flags
&
1243 CMD_LxCACHE_F_RERETIRED
) {
1245 "\n%s:cpuid %d The cacheline at index %d and"
1246 " way %d which we are attempting to unretire"
1247 " is in RERETIRED state. Therefore we will not"
1248 " unretire it but will mark it as RETIRED.\n",
1249 fltnm
, cpu
->cpu_cpuid
,
1250 unretire_this_Lxcache
->Lxcache_index
,
1251 unretire_this_Lxcache
->Lxcache_way
);
1252 found_reretired_cacheline
= 1;
1256 * if we did not find a RERETIRED cacheline above
1257 * unretire the cacheline.
1259 if (!found_reretired_cacheline
) {
1260 if (cmd_cache_way_unretire(hdl
, cpu
, unretire_this_Lxcache
)
1264 unretire_this_Lxcache
->Lxcache_flags
= CMD_LxCACHE_F_UNRETIRED
;
1266 * We have exonerated the cacheline. We need to inform the fmd
1267 * that we have repaired the suspect fmri that we retired earlier.
1268 * The cpumem agent will not unretire cacheline in response to
1269 * the list.repair events it receives.
1271 if (unretire_this_Lxcache
->Lxcache_retired_fmri
[0] != 0) {
1273 "\n%s:cpuid %d Repairing the retired fmri %s",
1274 fltnm
, cpu
->cpu_cpuid
,
1275 unretire_this_Lxcache
->Lxcache_retired_fmri
);
1276 if (cmd_repair_fmri(hdl
,
1277 unretire_this_Lxcache
->Lxcache_retired_fmri
) != 0) {
1279 "\n%s:cpuid %d Failed to repair retired fmri.",
1280 fltnm
, cpu
->cpu_cpuid
);
1282 * We need to retire the cacheline that we just
1285 if (cmd_cache_way_retire(hdl
, cpu
,
1286 unretire_this_Lxcache
) == B_FALSE
) {
1288 * A hopeless situation.
1289 * cannot maintain consistency of cacheline
1290 * sate between fmd and DE.
1294 "\n%s:cpuid %d We are unable to repair"
1295 " the fmri we just unretired and are"
1296 " unable to restore the DE and fmd to"
1298 fltnm
, cpu
->cpu_cpuid
);
1302 unretire_this_Lxcache
->Lxcache_retired_fmri
[0] = 0;
1309 cmd_Lxcache_retire(fmd_hdl_t
*hdl
, cmd_cpu_t
*cpu
,
1310 cmd_Lxcache_t
*retire_this_Lxcache
, const char *fltnm
, uint_t cert
)
1312 cmd_Lxcache_t
*previously_retired_Lxcache
;
1313 cmd_ptrsubtype_t data_type
;
1315 char suspect_list
[128];
1318 "\n%s:cpu_id %d: cmd_Lxcache_retire called for index %d"
1320 fltnm
, cpu
->cpu_cpuid
, retire_this_Lxcache
->Lxcache_index
,
1321 retire_this_Lxcache
->Lxcache_way
, retire_this_Lxcache
->Lxcache_bit
);
1322 if (fmd_case_solved(hdl
, retire_this_Lxcache
->Lxcache_case
.cc_cp
)) {
1324 * Case solved implies that the cache line is already
1325 * retired as SUSPECT_0_TAG and we are here to retire this
1327 * We will first repair the retired cacheline
1328 * so that it does not get retired during replay for
1330 * If we are able to repair the retired cacheline we close the
1331 * case and open a new case for it.
1333 if (retire_this_Lxcache
->Lxcache_reason
!=
1334 CMD_LXSUSPECT_0_TAG
) {
1336 "\n%s:cpu_id %d: Unexpected condition encountered."
1337 " Expected the reason for retirement as"
1338 " SUSPECT_0_TAG however found the reason"
1340 fltnm
, cpu
->cpu_cpuid
,
1342 retire_this_Lxcache
->Lxcache_reason
));
1346 "\n%s:cpu_id %d: We are re-retiring SUSPECT_0_TAG as"
1347 " SUSPECT_1_TAG index %d way %d bit %d\n",
1348 fltnm
, cpu
->cpu_cpuid
,
1349 retire_this_Lxcache
->Lxcache_index
,
1350 retire_this_Lxcache
->Lxcache_way
,
1351 retire_this_Lxcache
->Lxcache_bit
);
1353 "\n%s:cpu_id %d: The existing case for this Lxcache has"
1354 " has been already solved. We will first repair the suspect"
1355 " cacheline and if we are successful then close this case,"
1356 " and open a new case.\n",
1357 fltnm
, cpu
->cpu_cpuid
);
1359 * repair the retired cacheline.
1361 if (retire_this_Lxcache
->Lxcache_retired_fmri
[0] != 0) {
1363 "\n%s:cpuid %d Repairing the retired suspect"
1365 fltnm
, cpu
->cpu_cpuid
,
1366 retire_this_Lxcache
->Lxcache_retired_fmri
);
1367 if (cmd_repair_fmri(hdl
,
1368 retire_this_Lxcache
->Lxcache_retired_fmri
) != 0) {
1370 "\n%s:cpuid %d Failed to repair the"
1372 fltnm
, cpu
->cpu_cpuid
);
1375 retire_this_Lxcache
->Lxcache_retired_fmri
[0] =
1379 uuid
= fmd_case_uuid(hdl
,
1380 retire_this_Lxcache
->Lxcache_case
.cc_cp
);
1382 "\n%s:cpuid %d: Closing the case %s\n",
1383 fltnm
, cpu
->cpu_cpuid
, uuid
);
1384 cmd_case_fini(hdl
, retire_this_Lxcache
->Lxcache_case
.cc_cp
,
1386 retire_this_Lxcache
->Lxcache_case
.cc_cp
= NULL
;
1387 if (cmd_create_case_for_Lxcache(hdl
, cpu
, retire_this_Lxcache
)
1392 * Not a SUSPECT_0_TAG.
1393 * We should be entering this path if the cacheline is
1394 * transitioning from ACTIVE/UNRETIRED to RETIRED state.
1395 * If the cacheline state is not as expected we print debug
1396 * message and return failure.
1398 if ((retire_this_Lxcache
->Lxcache_flags
!=
1399 CMD_LxCACHE_F_ACTIVE
) &&
1400 (retire_this_Lxcache
->Lxcache_flags
1401 != CMD_LxCACHE_F_UNRETIRED
)) {
1403 * Unexpected condition.
1406 "\n%s:cpu_id %d:Unexpected state %s for the"
1407 " cacheline at index %d way %d encountered.\n",
1408 fltnm
, cpu
->cpu_cpuid
,
1410 retire_this_Lxcache
->Lxcache_flags
),
1411 retire_this_Lxcache
->Lxcache_index
,
1412 retire_this_Lxcache
->Lxcache_way
);
1416 suspect_list
[0] = 0;
1417 (void) cmd_fmri_nvl2str(hdl
, retire_this_Lxcache
->Lxcache_asru
.fmri_nvl
,
1418 suspect_list
, sizeof (suspect_list
));
1420 "\n%s:cpu_id %d:current suspect list is %s\n",
1421 fltnm
, cpu
->cpu_cpuid
, suspect_list
);
1422 cmd_Lxcache_fault(hdl
, cpu
, retire_this_Lxcache
, fltnm
,
1425 retire_this_Lxcache
->Lxcache_flags
= CMD_LxCACHE_F_RETIRED
;
1426 if (IS_TAG(retire_this_Lxcache
->Lxcache_type
)) {
1428 * If the cacheline we just retired was retired earlier
1429 * due to DATA faults we mark the Lxcache
1430 * corresponding to DATA as RERETIRED.
1432 if (retire_this_Lxcache
->Lxcache_type
== CMD_PTR_CPU_L2TAG
)
1433 data_type
= CMD_PTR_CPU_L2DATA
;
1434 if (retire_this_Lxcache
->Lxcache_type
== CMD_PTR_CPU_L3TAG
)
1435 data_type
= CMD_PTR_CPU_L3DATA
;
1437 "\n%s:cpuid %d checking if there is a %s"
1438 " cacheline retired at this index %d way %d\n",
1439 fltnm
, cpu
->cpu_cpuid
,
1440 cmd_type_to_str(data_type
),
1441 retire_this_Lxcache
->Lxcache_index
,
1442 retire_this_Lxcache
->Lxcache_way
);
1443 previously_retired_Lxcache
=
1444 cmd_Lxcache_lookup_by_type_index_way_flags(cpu
,
1445 data_type
, retire_this_Lxcache
->Lxcache_index
,
1446 retire_this_Lxcache
->Lxcache_way
, CMD_LxCACHE_F_RETIRED
);
1447 if (previously_retired_Lxcache
) {
1449 "\n%s:cpu_id %d: Found index %d way %d"
1450 " retired earlier. Will mark this Lxcache"
1452 fltnm
, cpu
->cpu_cpuid
,
1453 retire_this_Lxcache
->Lxcache_index
,
1454 retire_this_Lxcache
->Lxcache_way
);
1456 * First repair the retired cacheline and if successful
1457 * close the existing case and create a new case.
1461 * This cacheline has already been retired for
1463 * Repair the previously retired DATA fault cacheline so
1464 * that it does not get retired by fmd during replay.
1466 if (previously_retired_Lxcache
->Lxcache_retired_fmri
[0]
1469 "\n%s:cpuid %d Repairing the cacheline"
1470 " retired due to data errors. %s\n",
1471 fltnm
, cpu
->cpu_cpuid
,
1472 previously_retired_Lxcache
->
1473 Lxcache_retired_fmri
);
1474 if (cmd_repair_fmri(hdl
,
1475 previously_retired_Lxcache
->
1476 Lxcache_retired_fmri
)
1479 "\n%s:cpuid %d Failed to repair the"
1481 fltnm
, cpu
->cpu_cpuid
);
1484 previously_retired_Lxcache
->
1485 Lxcache_retired_fmri
[0] = 0;
1489 previously_retired_Lxcache
->Lxcache_case
.cc_cp
,
1491 previously_retired_Lxcache
->Lxcache_case
.cc_cp
= NULL
;
1492 previously_retired_Lxcache
->Lxcache_flags
=
1493 CMD_LxCACHE_F_RERETIRED
;
1495 * Update persistent storage
1497 cmd_Lxcache_write(hdl
, previously_retired_Lxcache
);
1499 * Create a new case so that this Lxcache structure
1500 * gets restored on replay.
1502 if (cmd_create_case_for_Lxcache(hdl
, cpu
,
1503 previously_retired_Lxcache
) == B_FALSE
)
1507 cmd_retire_cpu_if_limits_exceeded(hdl
, cpu
,
1508 retire_this_Lxcache
->Lxcache_type
,