4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
25 * Copyright 2020 Joyent, Inc.
27 * Assembly code support for Cheetah/Cheetah+ modules
32 #include <sys/asm_linkage.h>
34 #include <vm/hat_sfmmu.h>
35 #include <sys/machparam.h>
36 #include <sys/machcpuvar.h>
37 #include <sys/machthread.h>
38 #include <sys/machtrap.h>
39 #include <sys/privregs.h>
41 #include <sys/cheetahregs.h>
42 #include <sys/us3_module.h>
43 #include <sys/xc_impl.h>
44 #include <sys/intreg.h>
45 #include <sys/async.h>
46 #include <sys/clock.h>
47 #include <sys/cheetahasm.h>
48 #include <sys/cmpregs.h>
51 #include <sys/traptrace.h>
52 #endif /* TRAPTRACE */
56 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \
57 ldxa [%g0]ASI_DCU, tmp1 ;\
58 btst DCU_DC, tmp1 /* is dcache enabled? */ ;\
60 ASM_LD(tmp1, dcache_linesize) ;\
61 ASM_LD(tmp2, dflush_type) ;\
62 cmp tmp2, FLUSHPAGE_TYPE ;\
65 sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\
66 ASM_LD(tmp3, dcache_size) ;\
67 cmp tmp2, FLUSHMATCH_TYPE ;\
71 * flushtype = FLUSHALL_TYPE, flush the whole thing \
73 * tmp1 = cache line size \
75 sub tmp3, tmp1, tmp2 ;\
77 stxa %g0, [tmp2]ASI_DC_TAG ;\
81 sub tmp2, tmp1, tmp2 ;\
85 * flushtype = FLUSHPAGE_TYPE \
87 * arg2 = virtual color \
88 * tmp1 = cache line size \
89 * tmp2 = tag from cache \
93 set MMU_PAGESIZE, tmp3 ;\
94 sllx arg1, MMU_PAGESHIFT, arg1 /* pfn to 43 bit PA */ ;\
95 sub tmp3, tmp1, tmp3 ;\
97 stxa %g0, [arg1 + tmp3]ASI_DC_INVAL ;\
101 bnz,pt %icc, 4b /* branch if not done */ ;\
102 sub tmp3, tmp1, tmp3 ;\
106 * flushtype = FLUSHMATCH_TYPE \
107 * arg1 = tag to compare against \
108 * tmp1 = cache line size \
109 * tmp3 = cache size \
114 sub tmp3, tmp1, arg2 ;\
116 ldxa [arg2]ASI_DC_TAG, tmp2 /* read tag */ ;\
117 btst CHEETAH_DC_VBIT_MASK, tmp2 ;\
118 bz,pn %icc, 5f /* br if no valid sub-blocks */ ;\
119 andn tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\
121 bne,pn %icc, 5f /* branch if tag miss */ ;\
123 stxa %g0, [arg2]ASI_DC_TAG ;\
127 bne,pt %icc, 4b /* branch if not done */ ;\
128 sub arg2, tmp1, arg2 ;\
132 * macro that flushes the entire dcache color
133 * dcache size = 64K, one way 16K
136 * arg = virtual color register (not clobbered)
137 * way = way#, can either be a constant or a register (not clobbered)
138 * tmp1, tmp2, tmp3 = scratch registers
141 #define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3) \
142 ldxa [%g0]ASI_DCU, tmp1; \
143 btst DCU_DC, tmp1; /* is dcache enabled? */ \
145 ASM_LD(tmp1, dcache_linesize) \
147 * arg = virtual color \
148 * tmp1 = cache line size \
150 sllx arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */ \
152 sllx tmp3, 14, tmp3; /* One way 16K */ \
153 or tmp2, tmp3, tmp3; \
154 set MMU_PAGESIZE, tmp2; \
157 * tmp3 = cached page in dcache \
159 sub tmp2, tmp1, tmp2; \
161 stxa %g0, [tmp3 + tmp2]ASI_DC_TAG; \
165 sub tmp2, tmp1, tmp2; \
171 * Cheetah MMU and Cache operations.
174 ENTRY_NP(vtag_flushpage)
176 * flush page from the tlb
183 PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
188 andn %o5, PSTATE_IE, %o4
192 * Then, blow out the tlb
193 * Interrupts are disabled to prevent the primary ctx register
194 * from changing underneath us.
196 sethi %hi(ksfmmup), %o3
197 ldx [%o3 + %lo(ksfmmup)], %o3
199 bne,pt %xcc, 1f ! if not kernel as, go to 1
200 sethi %hi(FLUSH_ADDR), %o3
202 * For Kernel demaps use primary. type = page implicitly
204 stxa %g0, [%o0]ASI_DTLB_DEMAP /* dmmu flush for KCONTEXT */
205 stxa %g0, [%o0]ASI_ITLB_DEMAP /* immu flush for KCONTEXT */
208 wrpr %g0, %o5, %pstate /* enable interrupts */
211 * User demap. We need to set the primary context properly.
212 * Secondary context cannot be used for Cheetah IMMU.
217 SFMMU_CPU_CNUM(%o1, %g1, %g2) ! %g1 = sfmmu cnum on this CPU
219 ldub [%o1 + SFMMU_CEXT], %o4 ! %o4 = sfmmup->sfmmu_cext
220 sll %o4, CTXREG_EXT_SHIFT, %o4
221 or %g1, %o4, %g1 ! %g1 = primary pgsz | cnum
224 set MMU_PCONTEXT, %o4
225 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
226 ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum
227 srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz
228 sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz
229 or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum
230 stxa %g1, [%o4]ASI_DMMU ! wr new ctxum
232 stxa %g0, [%o0]ASI_DTLB_DEMAP
233 stxa %g0, [%o0]ASI_ITLB_DEMAP
234 stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */
239 wrpr %g0, %o5, %pstate /* enable interrupts */
240 SET_SIZE(vtag_flushpage)
242 ENTRY_NP2(vtag_flushall, demap_all)
246 sethi %hi(FLUSH_ADDR), %o3
247 set DEMAP_ALL_TYPE, %g1
248 stxa %g0, [%g1]ASI_DTLB_DEMAP
249 stxa %g0, [%g1]ASI_ITLB_DEMAP
254 SET_SIZE(vtag_flushall)
257 ENTRY_NP(vtag_flushpage_tl1)
259 * x-trap to flush page from tlb and tsb
261 * %g1 = vaddr, zero-extended on 32-bit kernel
264 * assumes TSBE_TAG = 0
266 srln %g1, MMU_PAGESHIFT, %g1
268 sethi %hi(ksfmmup), %g3
269 ldx [%g3 + %lo(ksfmmup)], %g3
271 bne,pt %xcc, 1f ! if not kernel as, go to 1
272 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
274 /* We need to demap in the kernel context */
275 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
276 stxa %g0, [%g1]ASI_DTLB_DEMAP
277 stxa %g0, [%g1]ASI_ITLB_DEMAP
280 /* We need to demap in a user context */
281 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
283 SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU
285 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
286 sll %g4, CTXREG_EXT_SHIFT, %g4
287 or %g6, %g4, %g6 ! %g6 = pgsz | cnum
289 set MMU_PCONTEXT, %g4
290 ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */
291 srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
292 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
293 or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */
294 stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */
295 stxa %g0, [%g1]ASI_DTLB_DEMAP
296 stxa %g0, [%g1]ASI_ITLB_DEMAP
297 stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */
299 SET_SIZE(vtag_flushpage_tl1)
302 ENTRY_NP(vtag_flush_pgcnt_tl1)
304 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
306 * %g1 = vaddr, zero-extended on 32-bit kernel
307 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
309 * NOTE: this handler relies on the fact that no
310 * interrupts or traps can occur during the loop
311 * issuing the TLB_DEMAP operations. It is assumed
312 * that interrupts are disabled and this code is
313 * fetching from the kernel locked text address.
315 * assumes TSBE_TAG = 0
317 set SFMMU_PGCNT_MASK, %g4
318 and %g4, %g2, %g3 /* g3 = pgcnt - 1 */
319 add %g3, 1, %g3 /* g3 = pgcnt */
321 andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */
322 srln %g1, MMU_PAGESHIFT, %g1
324 sethi %hi(ksfmmup), %g4
325 ldx [%g4 + %lo(ksfmmup)], %g4
327 bne,pn %xcc, 1f /* if not kernel as, go to 1 */
328 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
330 /* We need to demap in the kernel context */
331 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
332 set MMU_PAGESIZE, %g2 /* g2 = pgsize */
333 sethi %hi(FLUSH_ADDR), %g5
335 stxa %g0, [%g1]ASI_DTLB_DEMAP
336 stxa %g0, [%g1]ASI_ITLB_DEMAP
337 flush %g5 ! flush required by immu
339 deccc %g3 /* decr pgcnt */
341 add %g1, %g2, %g1 /* next page */
345 * We need to demap in a user context
350 SFMMU_CPU_CNUM(%g2, %g5, %g6) ! %g5 = sfmmu cnum on this CPU
352 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
354 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
355 sll %g4, CTXREG_EXT_SHIFT, %g4
358 set MMU_PCONTEXT, %g4
359 ldxa [%g4]ASI_DMMU, %g6 /* rd old ctxnum */
360 srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
361 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
362 or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */
363 stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */
365 set MMU_PAGESIZE, %g2 /* g2 = pgsize */
366 sethi %hi(FLUSH_ADDR), %g5
368 stxa %g0, [%g1]ASI_DTLB_DEMAP
369 stxa %g0, [%g1]ASI_ITLB_DEMAP
370 flush %g5 ! flush required by immu
372 deccc %g3 /* decr pgcnt */
374 add %g1, %g2, %g1 /* next page */
376 stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */
378 SET_SIZE(vtag_flush_pgcnt_tl1)
380 ENTRY_NP(vtag_flushall_tl1)
382 * x-trap to flush tlb
384 set DEMAP_ALL_TYPE, %g4
385 stxa %g0, [%g4]ASI_DTLB_DEMAP
386 stxa %g0, [%g4]ASI_ITLB_DEMAP
388 SET_SIZE(vtag_flushall_tl1)
392 * vac_flushpage(pfnum, color)
393 * Flush 1 8k page of the D-$ with physical page = pfnum
395 * The cheetah dcache is a 64k psuedo 4 way accaociative cache.
396 * It is virtual indexed, physically tagged cache.
406 * flush page from the d$
408 * %o0 = pfnum, %o1 = color
410 DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
413 SET_SIZE(vac_flushpage)
416 ENTRY_NP(vac_flushpage_tl1)
418 * x-trap to flush page from the d$
420 * %g1 = pfnum, %g2 = color
422 DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
424 SET_SIZE(vac_flushpage_tl1)
427 ENTRY(vac_flushcolor)
431 DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
432 DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
433 DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
434 DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
437 SET_SIZE(vac_flushcolor)
440 ENTRY(vac_flushcolor_tl1)
444 DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
445 DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
446 DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
447 DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
449 SET_SIZE(vac_flushcolor_tl1)
452 * Determine whether or not the IDSR is busy.
453 * Entry: no arguments
454 * Returns: 1 if busy, 0 otherwise
457 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
467 .global _dispatch_status_busy
468 _dispatch_status_busy:
469 .asciz "ASI_INTR_DISPATCH_STATUS error: busy"
473 * Setup interrupt dispatch data registers
475 * %o0 - function or inumber to call
476 * %o1, %o2 - arguments (2 uint64_t's)
483 ! IDSR should not be busy at the moment
485 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
489 sethi %hi(_dispatch_status_busy), %o0
491 or %o0, %lo(_dispatch_status_busy), %o0
494 ALTENTRY(init_mondo_nocheck)
496 ! interrupt vector dispatch data reg 0
502 stxa %o0, [%g1]ASI_INTR_DISPATCH
505 ! interrupt vector dispatch data reg 1
507 stxa %o1, [%g2]ASI_INTR_DISPATCH
510 ! interrupt vector dispatch data reg 2
512 stxa %o2, [%g3]ASI_INTR_DISPATCH
517 SET_SIZE(init_mondo_nocheck)
521 #if !(defined(JALAPENO) || defined(SERRANO))
524 * Ship mondo to aid using busy/nack pair bn
527 sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id
528 sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair
529 or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70
531 stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch
537 #endif /* !(JALAPENO || SERRANO) */
542 * Flush 1 page of the I-$ starting at vaddr
544 * %o1 bytes to be flushed
545 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
546 * the stores from all processors so that a FLUSH instruction is only needed
547 * to ensure pipeline is consistent. This means a single flush is sufficient at
548 * the end of a sequence of stores that updates the instruction stream to
549 * ensure correct operation.
552 ENTRY(flush_instr_mem)
553 flush %o0 ! address irrelevant
556 SET_SIZE(flush_instr_mem)
559 #if defined(CPU_IMP_ECACHE_ASSOC)
561 ENTRY(get_ecache_ctrl)
565 ! Putting an ASI access in the delay slot may
566 ! cause it to be accessed, even when annulled.
570 ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg
574 ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg
578 SET_SIZE(get_ecache_ctrl)
580 #endif /* CPU_IMP_ECACHE_ASSOC */
583 #if !(defined(JALAPENO) || defined(SERRANO))
587 * %o0 - 64 bit physical address
589 * %o2 - ecache linesize
595 * For certain CPU implementations, we have to flush the L2 cache
596 * before flushing the ecache.
598 PN_L2_FLUSHALL(%g3, %g4, %g5)
601 * Flush the entire Ecache using displacement flush.
603 ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
607 SET_SIZE(flush_ecache)
609 #endif /* !(JALAPENO || SERRANO) */
613 ASM_LD(%o0, dcache_size)
614 ASM_LD(%o1, dcache_linesize)
615 CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
618 SET_SIZE(flush_dcache)
622 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
623 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
625 ld [%o0 + CHPR_ICACHE_SIZE], %o0
627 ASM_LD(%o0, icache_size)
628 ASM_LD(%o1, icache_linesize)
630 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
633 SET_SIZE(flush_icache)
635 ENTRY(kdi_flush_idcache)
636 CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
637 CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
641 SET_SIZE(kdi_flush_idcache)
644 PCACHE_FLUSHALL(%o0, %o1, %o2)
647 SET_SIZE(flush_pcache)
650 #if defined(CPU_IMP_L1_CACHE_PARITY)
653 * Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t
654 * structure (see cheetahregs.h):
655 * The Dcache *should* be turned off when this code is executed.
658 ENTRY(get_dcache_dtag)
660 andn %o5, PSTATE_IE | PSTATE_AM, %o3
661 wrpr %g0, %o3, %pstate
663 stx %o0, [%o1 + CH_DC_IDX]
667 ldxa [%o0]ASI_DC_TAG, %o2
668 stx %o2, [%o1 + CH_DC_TAG]
670 ldxa [%o0]ASI_DC_UTAG, %o2
672 stx %o2, [%o1 + CH_DC_UTAG]
673 ldxa [%o0]ASI_DC_SNP_TAG, %o2
674 stx %o2, [%o1 + CH_DC_SNTAG]
675 add %o1, CH_DC_DATA, %o1
678 membar #Sync ! required before ASI_DC_DATA
679 ldxa [%o0 + %o3]ASI_DC_DATA, %o2
680 membar #Sync ! required after ASI_DC_DATA
682 cmp %o3, CH_DC_DATA_REG_SIZE - 8
687 * Unlike other CPUs in the family, D$ data parity bits for Panther
688 * do not reside in the microtag. Instead, we have to read them
689 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
690 * of just having 8 parity bits to protect all 32 bytes of data
691 * per line, we now have 32 bits of parity.
694 cmp %o3, PANTHER_IMPL
699 * move our pointer to the next field where we store parity bits
700 * and add the offset of the last parity byte since we will be
701 * storing all 4 parity bytes within one 64 bit field like this:
703 * +------+------------+------------+------------+------------+
704 * | - | DC_parity | DC_parity | DC_parity | DC_parity |
705 * | - | for word 3 | for word 2 | for word 1 | for word 0 |
706 * +------+------------+------------+------------+------------+
707 * 63:32 31:24 23:16 15:8 7:0
709 add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
711 /* add the DC_data_parity bit into our working index */
713 sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
716 membar #Sync ! required before ASI_DC_DATA
717 ldxa [%o0 + %o3]ASI_DC_DATA, %o2
718 membar #Sync ! required after ASI_DC_DATA
721 cmp %o3, CH_DC_DATA_REG_SIZE - 8
726 wrpr %g0, %o5, %pstate
727 SET_SIZE(get_dcache_dtag)
731 * Get icache data and tag. The data argument is a pointer to a ch_ic_data_t
732 * structure (see cheetahregs.h):
733 * The Icache *Must* be turned off when this function is called.
734 * This is because diagnostic accesses to the Icache interfere with cache
738 ENTRY(get_icache_dtag)
740 andn %o5, PSTATE_IE | PSTATE_AM, %o3
741 wrpr %g0, %o3, %pstate
743 stx %o0, [%o1 + CH_IC_IDX]
744 ldxa [%o0]ASI_IC_TAG, %o2
745 stx %o2, [%o1 + CH_IC_PATAG]
746 add %o0, CH_ICTAG_UTAG, %o0
747 ldxa [%o0]ASI_IC_TAG, %o2
748 add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
749 stx %o2, [%o1 + CH_IC_UTAG]
750 ldxa [%o0]ASI_IC_TAG, %o2
751 add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
752 stx %o2, [%o1 + CH_IC_UPPER]
753 ldxa [%o0]ASI_IC_TAG, %o2
754 andn %o0, CH_ICTAG_TMASK, %o0
755 stx %o2, [%o1 + CH_IC_LOWER]
756 ldxa [%o0]ASI_IC_SNP_TAG, %o2
757 stx %o2, [%o1 + CH_IC_SNTAG]
758 add %o1, CH_IC_DATA, %o1
761 ldxa [%o0 + %o3]ASI_IC_DATA, %o2
763 cmp %o3, PN_IC_DATA_REG_SIZE - 8
768 wrpr %g0, %o5, %pstate
769 SET_SIZE(get_icache_dtag)
772 * Get pcache data and tags.
774 * pcache_idx - fully constructed VA for for accessing P$ diagnostic
775 * registers. Contains PC_way and PC_addr shifted into
776 * the correct bit positions. See the PRM for more details.
777 * data - pointer to a ch_pc_data_t
778 * structure (see cheetahregs.h):
781 ENTRY(get_pcache_dtag)
783 andn %o5, PSTATE_IE | PSTATE_AM, %o3
784 wrpr %g0, %o3, %pstate
786 stx %o0, [%o1 + CH_PC_IDX]
787 ldxa [%o0]ASI_PC_STATUS_DATA, %o2
788 stx %o2, [%o1 + CH_PC_STATUS]
789 ldxa [%o0]ASI_PC_TAG, %o2
790 stx %o2, [%o1 + CH_PC_TAG]
791 ldxa [%o0]ASI_PC_SNP_TAG, %o2
792 stx %o2, [%o1 + CH_PC_SNTAG]
793 add %o1, CH_PC_DATA, %o1
796 ldxa [%o0 + %o3]ASI_PC_DATA, %o2
798 cmp %o3, CH_PC_DATA_REG_SIZE - 8
803 wrpr %g0, %o5, %pstate
804 SET_SIZE(get_pcache_dtag)
806 #endif /* CPU_IMP_L1_CACHE_PARITY */
809 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
810 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
811 * %o0 - 64 bit constant
814 stxa %o0, [%g0]ASI_DCU ! Store to DCU
815 flush %g0 /* flush required after changing the IC bit */
822 * Return DCU register.
825 ldxa [%g0]ASI_DCU, %o0 /* DCU control register */
831 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
833 * This handler is used to check for softints generated by error trap
834 * handlers to report errors. On Cheetah, this mechanism is used by the
835 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
836 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
837 * NB: Must be 8 instructions or less to fit in trap table and code must
841 ENTRY_NP(ch_pil15_interrupt_instr)
842 ASM_JMP(%g1, ch_pil15_interrupt)
843 SET_SIZE(ch_pil15_interrupt_instr)
846 ENTRY_NP(ch_pil15_interrupt)
849 * Since pil_interrupt is hacked to assume that every level 15
850 * interrupt is generated by the CPU to indicate a performance
851 * counter overflow this gets ugly. Before calling pil_interrupt
852 * the Error at TL>0 pending status is inspected. If it is
853 * non-zero, then an error has occurred and it is handled.
854 * Otherwise control is transfered to pil_interrupt. Note that if
855 * an error is detected pil_interrupt will not be called and
856 * overflow interrupts may be lost causing erroneous performance
857 * measurements. However, error-recovery will have a detrimental
858 * effect on performance anyway.
861 set ch_err_tl1_pending, %g4
862 ldub [%g1 + %g4], %g2
867 * We have a pending TL>0 error, clear the TL>0 pending status.
876 wr %g5, CLEAR_SOFTINT
879 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
880 * to process the Fast ECC/Cache Parity at TL>0 error. Clear
883 set cpu_tl1_error, %g1
890 * The logout is invalid.
892 * Call the default interrupt handler.
894 sethi %hi(pil_interrupt), %g1
895 jmp %g1 + %lo(pil_interrupt)
898 SET_SIZE(ch_pil15_interrupt)
904 * Cheetah provides error checking for all memory access paths between
905 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
906 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
907 * AFAR and one of the following traps is generated (provided that it
908 * is enabled in External Cache Error Enable Register) to handle that
910 * 1. trap 0x70: Precise trap
911 * tt0_fecc for errors at trap level(TL)>=0
912 * 2. trap 0x0A and 0x32: Deferred trap
913 * async_err for errors at TL>=0
914 * 3. trap 0x63: Disrupting trap
915 * ce_err for errors at TL=0
916 * (Note that trap 0x63 cannot happen at trap level > 0)
918 * Trap level one handlers panic the system except for the fast ecc
919 * error handler which tries to recover from certain errors.
923 * FAST ECC TRAP STRATEGY:
925 * Software must handle single and multi bit errors which occur due to data
926 * or instruction cache reads from the external cache. A single or multi bit
927 * error occuring in one of these situations results in a precise trap.
929 * The basic flow of this trap handler is as follows:
931 * 1) Record the state and then turn off the Dcache and Icache. The Dcache
932 * is disabled because bad data could have been installed. The Icache is
933 * turned off because we want to capture the Icache line related to the
935 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
936 * 3) Park sibling core if caches are shared (to avoid race condition while
937 * accessing shared resources such as L3 data staging register during
939 * 4) Read the AFAR and AFSR.
940 * 5) If CPU logout structure is not being used, then:
941 * 6) Clear all errors from the AFSR.
942 * 7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
943 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous
945 * 9) Unpark sibling core if we parked it earlier.
946 * 10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
948 * 6) Otherwise, if CPU logout structure is being used:
949 * 7) Incriment the "logout busy count".
950 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous
952 * 9) Unpark sibling core if we parked it earlier.
953 * 10) Issue a retry since the other CPU error logging code will end up
954 * finding this error bit and logging information about it later.
955 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
956 * yet initialized such that we can't even check the logout struct, then
957 * we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
958 * call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
959 * to determine information such as TL, TT, CEEN and NCEEN settings, etc
960 * in the high level trap handler since we don't have access to detailed
961 * logout information in cases where the cpu_private struct is not yet
964 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
965 * Fast ECC traps in the TL=0 code. If we get a Fast ECC event here in
966 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
967 * since it is uses different code/data from this handler, has a better
968 * chance of fixing things up than simply recursing through this code
969 * again (this would probably cause an eventual kernel stack overflow).
970 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
971 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
972 * the Fast ECC at TL>0 handler and eventually Red Mode.
974 * Note that for Cheetah (and only Cheetah), we use alias addresses for
975 * flushing rather than ASI accesses (which don't exist on Cheetah).
976 * Should we encounter a Fast ECC error within this handler on Cheetah,
977 * there's a good chance it's within the ecache_flushaddr buffer (since
978 * it's the largest piece of memory we touch in the handler and it is
979 * usually kernel text/data). For that reason the Fast ECC at TL>0
980 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
984 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
985 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
986 * architecture-specific files.
987 * NB: Must be 8 instructions or less to fit in trap table and code must
991 ENTRY_NP(fecc_err_instr)
992 membar #Sync ! Cheetah requires membar #Sync
995 * Save current DCU state. Turn off the Dcache and Icache.
997 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
998 andn %g1, DCU_DC + DCU_IC, %g4
999 stxa %g4, [%g0]ASI_DCU
1000 flush %g0 /* flush required after changing the IC bit */
1002 ASM_JMP(%g4, fast_ecc_err)
1003 SET_SIZE(fecc_err_instr)
1006 #if !(defined(JALAPENO) || defined(SERRANO))
1010 ENTRY_NP(fast_ecc_err)
1013 * Turn off CEEN and NCEEN.
1015 ldxa [%g0]ASI_ESTATE_ERR, %g3
1016 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1017 stxa %g4, [%g0]ASI_ESTATE_ERR
1018 membar #Sync ! membar sync required
1021 * Check to see whether we need to park our sibling core
1022 * before recording diagnostic information from caches
1023 * which may be shared by both cores.
1024 * We use %g1 to store information about whether or not
1025 * we had to park the core (%g1 holds our DCUCR value and
1026 * we only use bits from that register which are "reserved"
1027 * to keep track of core parking) so that we know whether
1028 * or not to unpark later. %g5 and %g4 are scratch registers.
1030 PARK_SIBLING_CORE(%g1, %g5, %g4)
1033 * Do the CPU log out capture.
1034 * %g3 = "failed?" return value.
1035 * %g2 = Input = AFAR. Output the clo_flags info which is passed
1036 * into this macro via %g4. Output only valid if cpu_private
1037 * struct has not been initialized.
1038 * CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1039 * %g4 = Trap information stored in the cpu logout flags field
1045 /* store the CEEN and NCEEN values, TL=0 */
1046 and %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1047 set CHPR_FECCTL0_LOGOUT, %g6
1048 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1051 * Flush the Ecache (and L2 cache for Panther) to get the error out
1052 * of the Ecache. If the UCC or UCU is on a dirty line, then the
1053 * following flush will turn that into a WDC or WDU, respectively.
1055 PN_L2_FLUSHALL(%g4, %g5, %g6)
1058 mulx %g4, CPU_NODE_SIZE, %g4
1061 ld [%g4 + ECACHE_LINESIZE], %g5
1062 ld [%g4 + ECACHE_SIZE], %g4
1064 ASM_LDX(%g6, ecache_flushaddr)
1065 ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1068 * Flush the Dcache. Since bad data could have been installed in
1069 * the Dcache we must flush it before re-enabling it.
1071 ASM_LD(%g5, dcache_size)
1072 ASM_LD(%g6, dcache_linesize)
1073 CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1076 * Flush the Icache. Since we turned off the Icache to capture the
1077 * Icache line it is now stale or corrupted and we must flush it
1078 * before re-enabling it.
1080 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1081 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
1083 ld [%g5 + CHPR_ICACHE_SIZE], %g5
1085 ASM_LD(%g5, icache_size)
1086 ASM_LD(%g6, icache_linesize)
1088 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1091 * check to see whether we parked our sibling core at the start
1092 * of this handler. If so, we need to unpark it here.
1093 * We use DCUCR reserved bits (stored in %g1) to keep track of
1094 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1096 UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1099 * Restore the Dcache and Icache to the previous state.
1101 stxa %g1, [%g0]ASI_DCU
1102 flush %g0 /* flush required after changing the IC bit */
1105 * Make sure our CPU logout operation was successful.
1112 * If the logout structure had been busy, how many times have
1113 * we tried to use it and failed (nesting count)? If we have
1114 * already recursed a substantial number of times, then we can
1115 * assume things are not going to get better by themselves and
1116 * so it would be best to panic.
1118 cmp %g3, CLO_NESTING_MAX
1123 mov PTL1_BAD_ECC, %g1
1127 * Otherwise, if the logout structure was busy but we have not
1128 * nested more times than our maximum value, then we simply
1129 * issue a retry. Our TL=0 trap handler code will check and
1130 * clear the AFSR after it is done logging what is currently
1131 * in the logout struct and handle this event at that time.
1136 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1137 * already at PIL 15.
1139 set cpu_fast_ecc_error, %g1
1143 movl %icc, PIL_14, %g4
1145 SET_SIZE(fast_ecc_err)
1147 #endif /* !(JALAPENO || SERRANO) */
1151 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1153 * The basic flow of this trap handler is as follows:
1155 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1156 * software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1157 * will use to save %g1 and %g2.
1158 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1159 * we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1160 * handler (using the just saved %g1).
1161 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1162 * (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1163 * NB: we don't turn off the Icache because bad data is not installed nor
1164 * will we be doing any diagnostic accesses.
1165 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1166 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1167 * %tpc, %tnpc, %tstate values previously saved).
1168 * 6) set %tl to %tl - 1.
1169 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1170 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1171 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear. For
1172 * Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1173 * Save the values in ch_err_tl1_data. For Panther, read the shadow
1174 * AFSR_EXT and save the value in ch_err_tl1_data.
1175 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1176 * being queued. We'll report them via the AFSR/AFAR capture in step 13.
1177 * 11) Flush the Ecache.
1178 * NB: the Ecache is flushed assuming the largest possible size with
1179 * the smallest possible line size since access to the cpu_nodes may
1180 * cause an unrecoverable DTLB miss.
1181 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1182 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1183 * For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1184 * Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther,
1185 * read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1186 * 14) Flush and re-enable the Dcache if it was on at step 3.
1187 * 15) Do TRAPTRACE if enabled.
1188 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1189 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1190 * 18) Cause a softint 15. The pil15_interrupt handler will inspect the
1191 * event pending flag and call cpu_tl1_error via systrap if set.
1192 * 19) Restore the registers from step 5 and issue retry.
1196 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1197 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1198 * architecture-specific files. This generates a "Software Trap 0" at TL>0,
1199 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1200 * NB: Must be 8 instructions or less to fit in trap table and code must
1204 ENTRY_NP(fecc_err_tl1_instr)
1205 CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1206 SET_SIZE(fecc_err_tl1_instr)
1209 * Software trap 0 at TL>0.
1210 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1211 * the various architecture-specific files. This is used as a continuation
1212 * of the fast ecc handling where we've bought an extra TL level, so we can
1213 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1214 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1215 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1216 * order two bits from %g1 and %g2 respectively).
1217 * NB: Must be 8 instructions or less to fit in trap table and code must
1221 ENTRY_NP(fecc_err_tl1_cont_instr)
1222 CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1223 SET_SIZE(fecc_err_tl1_cont_instr)
1227 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1229 * AFSR errors bits which cause this trap are:
1230 * CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1232 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1233 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1235 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1236 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1238 * Cheetah+ also handles (No additional processing required):
1239 * DUE, DTO, DBERR (NCEEN controlled)
1240 * THCE (CEEN and ET_ECC_en controlled)
1241 * TUE (ET_ECC_en controlled)
1243 * Panther further adds:
1244 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
1245 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
1246 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled)
1247 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled)
1248 * THCE (CEEN and L2_tag_ECC_en controlled)
1249 * L3_THCE (CEEN and ET_ECC_en controlled)
1252 * 1. Disable hardware corrected disrupting errors only (CEEN)
1253 * 2. Park sibling core if caches are shared (to avoid race
1254 * condition while accessing shared resources such as L3
1255 * data staging register during CPU logout.
1256 * 3. If the CPU logout structure is not currently being used:
1257 * 4. Clear AFSR error bits
1258 * 5. Capture Ecache, Dcache and Icache lines associated
1260 * 6. Unpark sibling core if we parked it earlier.
1261 * 7. call cpu_disrupting_error via sys_trap at PIL 14
1262 * unless we're already running at PIL 15.
1263 * 4. Otherwise, if the CPU logout structure is busy:
1264 * 5. Incriment "logout busy count" and place into %g3
1265 * 6. Unpark sibling core if we parked it earlier.
1266 * 7. Issue a retry since the other CPU error logging
1267 * code will end up finding this error bit and logging
1268 * information about it later.
1269 * 5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1270 * not yet initialized such that we can't even check the logout
1271 * struct, then we place the clo_flags data into %g2
1272 * (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1273 * systrap. The clo_flags parameter is used to determine information
1274 * such as TL, TT, CEEN settings, etc in the high level trap
1275 * handler since we don't have access to detailed logout information
1276 * in cases where the cpu_private struct is not yet initialized.
1278 * %g3: [ logout busy count ] - arg #2
1279 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1284 membar #Sync ! Cheetah requires membar #Sync
1287 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1288 * to prevent recursion.
1290 ldxa [%g0]ASI_ESTATE_ERR, %g1
1291 bclr EN_REG_CEEN, %g1
1292 stxa %g1, [%g0]ASI_ESTATE_ERR
1293 membar #Sync ! membar sync required
1296 * Save current DCU state. Turn off Icache to allow capture of
1297 * Icache data by DO_CPU_LOGOUT.
1299 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
1300 andn %g1, DCU_IC, %g4
1301 stxa %g4, [%g0]ASI_DCU
1302 flush %g0 /* flush required after changing the IC bit */
1305 * Check to see whether we need to park our sibling core
1306 * before recording diagnostic information from caches
1307 * which may be shared by both cores.
1308 * We use %g1 to store information about whether or not
1309 * we had to park the core (%g1 holds our DCUCR value and
1310 * we only use bits from that register which are "reserved"
1311 * to keep track of core parking) so that we know whether
1312 * or not to unpark later. %g5 and %g4 are scratch registers.
1314 PARK_SIBLING_CORE(%g1, %g5, %g4)
1317 * Do the CPU log out capture.
1318 * %g3 = "failed?" return value.
1319 * %g2 = Input = AFAR. Output the clo_flags info which is passed
1320 * into this macro via %g4. Output only valid if cpu_private
1321 * struct has not been initialized.
1322 * CHPR_CECC_LOGOUT = cpu logout structure offset input
1323 * %g4 = Trap information stored in the cpu logout flags field
1329 clr %g4 ! TL=0 bit in afsr
1330 set CHPR_CECC_LOGOUT, %g6
1331 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1334 * Flush the Icache. Since we turned off the Icache to capture the
1335 * Icache line it is now stale or corrupted and we must flush it
1336 * before re-enabling it.
1338 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1339 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
1341 ld [%g5 + CHPR_ICACHE_SIZE], %g5
1343 ASM_LD(%g5, icache_size)
1344 ASM_LD(%g6, icache_linesize)
1346 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1349 * check to see whether we parked our sibling core at the start
1350 * of this handler. If so, we need to unpark it here.
1351 * We use DCUCR reserved bits (stored in %g1) to keep track of
1352 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1354 UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1357 * Restore Icache to previous state.
1359 stxa %g1, [%g0]ASI_DCU
1360 flush %g0 /* flush required after changing the IC bit */
1363 * Make sure our CPU logout operation was successful.
1370 * If the logout structure had been busy, how many times have
1371 * we tried to use it and failed (nesting count)? If we have
1372 * already recursed a substantial number of times, then we can
1373 * assume things are not going to get better by themselves and
1374 * so it would be best to panic.
1376 cmp %g3, CLO_NESTING_MAX
1381 mov PTL1_BAD_ECC, %g1
1385 * Otherwise, if the logout structure was busy but we have not
1386 * nested more times than our maximum value, then we simply
1387 * issue a retry. Our TL=0 trap handler code will check and
1388 * clear the AFSR after it is done logging what is currently
1389 * in the logout struct and handle this event at that time.
1394 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1395 * already at PIL 15.
1397 set cpu_disrupting_error, %g1
1401 movl %icc, PIL_14, %g4
1406 * This trap cannot happen at TL>0 which means this routine will never
1407 * actually be called and so we treat this like a BAD TRAP panic.
1410 ENTRY_NP(ce_err_tl1)
1413 mov PTL1_BAD_TRAP, %g1
1415 SET_SIZE(ce_err_tl1)
1419 * The async_err function handles deferred trap types 0xA
1420 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1422 * AFSR errors bits which cause this trap are:
1423 * UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1424 * On some platforms, EMU may causes cheetah to pull the error pin
1425 * never giving Solaris a chance to take a trap.
1427 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1428 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1431 * 1. Disable CEEN and NCEEN errors to prevent recursive errors.
1432 * 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1433 * I$ line in DO_CPU_LOGOUT.
1434 * 3. Park sibling core if caches are shared (to avoid race
1435 * condition while accessing shared resources such as L3
1436 * data staging register during CPU logout.
1437 * 4. If the CPU logout structure is not currently being used:
1438 * 5. Clear AFSR error bits
1439 * 6. Capture Ecache, Dcache and Icache lines associated
1441 * 7. Unpark sibling core if we parked it earlier.
1442 * 8. call cpu_deferred_error via sys_trap.
1443 * 5. Otherwise, if the CPU logout structure is busy:
1444 * 6. Incriment "logout busy count"
1445 * 7. Unpark sibling core if we parked it earlier.
1446 * 8) Issue a retry since the other CPU error logging
1447 * code will end up finding this error bit and logging
1448 * information about it later.
1449 * 6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1450 * not yet initialized such that we can't even check the logout
1451 * struct, then we place the clo_flags data into %g2
1452 * (sys_trap->have_win arg #1) and call cpu_deferred_error via
1453 * systrap. The clo_flags parameter is used to determine information
1454 * such as TL, TT, CEEN settings, etc in the high level trap handler
1455 * since we don't have access to detailed logout information in cases
1456 * where the cpu_private struct is not yet initialized.
1458 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1459 * %g3: [ logout busy count ] - arg #2
1463 membar #Sync ! Cheetah requires membar #Sync
1466 * Disable CEEN and NCEEN.
1468 ldxa [%g0]ASI_ESTATE_ERR, %g3
1469 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1470 stxa %g4, [%g0]ASI_ESTATE_ERR
1471 membar #Sync ! membar sync required
1474 * Save current DCU state.
1475 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1476 * Do this regardless of whether this is a Data Access Error or
1477 * Instruction Access Error Trap.
1478 * Disable Dcache for both Data Access Error and Instruction Access
1479 * Error per Cheetah PRM P.5 Note 6.
1481 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
1482 andn %g1, DCU_IC + DCU_DC, %g4
1483 stxa %g4, [%g0]ASI_DCU
1484 flush %g0 /* flush required after changing the IC bit */
1487 * Check to see whether we need to park our sibling core
1488 * before recording diagnostic information from caches
1489 * which may be shared by both cores.
1490 * We use %g1 to store information about whether or not
1491 * we had to park the core (%g1 holds our DCUCR value and
1492 * we only use bits from that register which are "reserved"
1493 * to keep track of core parking) so that we know whether
1494 * or not to unpark later. %g6 and %g4 are scratch registers.
1496 PARK_SIBLING_CORE(%g1, %g6, %g4)
1499 * Do the CPU logout capture.
1501 * %g3 = "failed?" return value.
1502 * %g2 = Input = AFAR. Output the clo_flags info which is passed
1503 * into this macro via %g4. Output only valid if cpu_private
1504 * struct has not been initialized.
1505 * CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1506 * %g4 = Trap information stored in the cpu logout flags field
1512 andcc %g5, T_TL1, %g0
1514 movnz %xcc, 1, %g6 ! set %g6 if T_TL1 set
1515 sllx %g6, CLO_FLAGS_TL_SHIFT, %g6
1516 sllx %g5, CLO_FLAGS_TT_SHIFT, %g4
1517 set CLO_FLAGS_TT_MASK, %g2
1518 and %g4, %g2, %g4 ! ttype
1519 or %g6, %g4, %g4 ! TT and TL
1520 and %g3, EN_REG_CEEN, %g3 ! CEEN value
1521 or %g3, %g4, %g4 ! TT and TL and CEEN
1522 set CHPR_ASYNC_LOGOUT, %g6
1523 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1526 * If the logout struct was busy, we may need to pass the
1527 * TT, TL, and CEEN information to the TL=0 handler via
1528 * systrap parameter so save it off here.
1537 * Flush the Icache. Since we turned off the Icache to capture the
1538 * Icache line it is now stale or corrupted and we must flush it
1539 * before re-enabling it.
1541 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1542 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
1544 ld [%g5 + CHPR_ICACHE_SIZE], %g5
1546 ASM_LD(%g5, icache_size)
1547 ASM_LD(%g6, icache_linesize)
1549 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1552 * XXX - Don't we need to flush the Dcache before turning it back
1553 * on to avoid stale or corrupt data? Was this broken?
1556 * Flush the Dcache before turning it back on since it may now
1557 * contain stale or corrupt data.
1559 ASM_LD(%g5, dcache_size)
1560 ASM_LD(%g6, dcache_linesize)
1561 CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1564 * check to see whether we parked our sibling core at the start
1565 * of this handler. If so, we need to unpark it here.
1566 * We use DCUCR reserved bits (stored in %g1) to keep track of
1567 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1569 UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1572 * Restore Icache and Dcache to previous state.
1574 stxa %g1, [%g0]ASI_DCU
1575 flush %g0 /* flush required after changing the IC bit */
1578 * Make sure our CPU logout operation was successful.
1585 * If the logout structure had been busy, how many times have
1586 * we tried to use it and failed (nesting count)? If we have
1587 * already recursed a substantial number of times, then we can
1588 * assume things are not going to get better by themselves and
1589 * so it would be best to panic.
1591 cmp %g3, CLO_NESTING_MAX
1596 mov PTL1_BAD_ECC, %g1
1600 * Otherwise, if the logout structure was busy but we have not
1601 * nested more times than our maximum value, then we simply
1602 * issue a retry. Our TL=0 trap handler code will check and
1603 * clear the AFSR after it is done logging what is currently
1604 * in the logout struct and handle this event at that time.
1608 RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1609 async_err_resetskip:
1610 set cpu_deferred_error, %g1
1612 mov PIL_15, %g4 ! run at pil 15
1615 #if defined(CPU_IMP_L1_CACHE_PARITY)
1618 * D$ parity error trap (trap 71) at TL=0.
1619 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1620 * the various architecture-specific files. This merely sets up the
1621 * arguments for cpu_parity_error and calls it via sys_trap.
1622 * NB: Must be 8 instructions or less to fit in trap table and code must
1625 ENTRY_NP(dcache_parity_instr)
1626 membar #Sync ! Cheetah+ requires membar #Sync
1627 set cpu_parity_error, %g1
1628 or %g0, CH_ERR_DPE, %g2
1630 sethi %hi(sys_trap), %g7
1631 jmp %g7 + %lo(sys_trap)
1632 mov PIL_15, %g4 ! run at pil 15
1633 SET_SIZE(dcache_parity_instr)
1637 * D$ parity error trap (trap 71) at TL>0.
1638 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1639 * the various architecture-specific files. This generates a "Software
1640 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1641 * continue the handling there.
1642 * NB: Must be 8 instructions or less to fit in trap table and code must
1645 ENTRY_NP(dcache_parity_tl1_instr)
1646 CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1647 SET_SIZE(dcache_parity_tl1_instr)
1651 * Software trap 1 at TL>0.
1652 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
1653 * of the various architecture-specific files. This is used as a continuation
1654 * of the dcache parity handling where we've bought an extra TL level, so we
1655 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1656 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1657 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1658 * order two bits from %g1 and %g2 respectively).
1659 * NB: Must be 8 instructions or less to fit in trap table and code must
1662 ENTRY_NP(dcache_parity_tl1_cont_instr)
1663 CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
1664 SET_SIZE(dcache_parity_tl1_cont_instr)
1667 * D$ parity error at TL>0 handler
1668 * We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter
1669 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1672 ENTRY_NP(dcache_parity_tl1_err)
1675 * This macro saves all the %g registers in the ch_err_tl1_data
1676 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1677 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
1678 * the ch_err_tl1_data structure and %g2 will have the original
1679 * flags in the ch_err_tl1_data structure. All %g registers
1680 * except for %g1 and %g2 will be available.
1682 CH_ERR_TL1_ENTER(CH_ERR_DPE);
1686 * Get current trap trace entry physical pointer.
1689 sll %g6, TRAPTR_SIZE_SHIFT, %g6
1690 set trap_trace_ctl, %g5
1692 ld [%g6 + TRAPTR_LIMIT], %g5
1694 be %icc, dpe_tl1_skip_tt
1696 ldx [%g6 + TRAPTR_PBASE], %g5
1697 ld [%g6 + TRAPTR_OFFSET], %g4
1701 * Create trap trace entry.
1704 wr %g0, TRAPTR_ASI, %asi
1706 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi
1708 stha %g4, [%g5 + TRAP_ENT_TL]%asi
1710 stha %g4, [%g5 + TRAP_ENT_TT]%asi
1712 stna %g4, [%g5 + TRAP_ENT_TPC]%asi
1714 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1715 stna %sp, [%g5 + TRAP_ENT_SP]%asi
1716 stna %g0, [%g5 + TRAP_ENT_TR]%asi
1717 stna %g0, [%g5 + TRAP_ENT_F1]%asi
1718 stna %g0, [%g5 + TRAP_ENT_F2]%asi
1719 stna %g0, [%g5 + TRAP_ENT_F3]%asi
1720 stna %g0, [%g5 + TRAP_ENT_F4]%asi
1724 * Advance trap trace pointer.
1726 ld [%g6 + TRAPTR_OFFSET], %g5
1727 ld [%g6 + TRAPTR_LIMIT], %g4
1728 st %g5, [%g6 + TRAPTR_LAST_OFFSET]
1729 add %g5, TRAP_ENT_SIZE, %g5
1730 sub %g4, TRAP_ENT_SIZE, %g4
1733 st %g5, [%g6 + TRAPTR_OFFSET]
1735 #endif /* TRAPTRACE */
1738 * I$ and D$ are automatically turned off by HW when the CPU hits
1739 * a dcache or icache parity error so we will just leave those two
1740 * off for now to avoid repeating this trap.
1741 * For Panther, however, since we trap on P$ data parity errors
1742 * and HW does not automatically disable P$, we need to disable it
1743 * here so that we don't encounter any recursive traps when we
1746 ldxa [%g0]ASI_DCU, %g3
1748 sllx %g4, DCU_PE_SHIFT, %g4
1750 stxa %g3, [%g0]ASI_DCU
1754 * We fall into this macro if we've successfully logged the error in
1755 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1756 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
1757 * Restores the %g registers and issues retry.
1760 SET_SIZE(dcache_parity_tl1_err)
1763 * I$ parity error trap (trap 72) at TL=0.
1764 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
1765 * the various architecture-specific files. This merely sets up the
1766 * arguments for cpu_parity_error and calls it via sys_trap.
1767 * NB: Must be 8 instructions or less to fit in trap table and code must
1771 ENTRY_NP(icache_parity_instr)
1772 membar #Sync ! Cheetah+ requires membar #Sync
1773 set cpu_parity_error, %g1
1774 or %g0, CH_ERR_IPE, %g2
1776 sethi %hi(sys_trap), %g7
1777 jmp %g7 + %lo(sys_trap)
1778 mov PIL_15, %g4 ! run at pil 15
1779 SET_SIZE(icache_parity_instr)
1782 * I$ parity error trap (trap 72) at TL>0.
1783 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
1784 * the various architecture-specific files. This generates a "Software
1785 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
1786 * continue the handling there.
1787 * NB: Must be 8 instructions or less to fit in trap table and code must
1790 ENTRY_NP(icache_parity_tl1_instr)
1791 CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
1792 SET_SIZE(icache_parity_tl1_instr)
1795 * Software trap 2 at TL>0.
1796 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
1797 * of the various architecture-specific files. This is used as a continuation
1798 * of the icache parity handling where we've bought an extra TL level, so we
1799 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1800 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1801 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1802 * order two bits from %g1 and %g2 respectively).
1803 * NB: Must be 8 instructions or less to fit in trap table and code must
1806 ENTRY_NP(icache_parity_tl1_cont_instr)
1807 CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
1808 SET_SIZE(icache_parity_tl1_cont_instr)
1812 * I$ parity error at TL>0 handler
1813 * We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter
1814 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1817 ENTRY_NP(icache_parity_tl1_err)
1820 * This macro saves all the %g registers in the ch_err_tl1_data
1821 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1822 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
1823 * the ch_err_tl1_data structure and %g2 will have the original
1824 * flags in the ch_err_tl1_data structure. All %g registers
1825 * except for %g1 and %g2 will be available.
1827 CH_ERR_TL1_ENTER(CH_ERR_IPE);
1831 * Get current trap trace entry physical pointer.
1834 sll %g6, TRAPTR_SIZE_SHIFT, %g6
1835 set trap_trace_ctl, %g5
1837 ld [%g6 + TRAPTR_LIMIT], %g5
1839 be %icc, ipe_tl1_skip_tt
1841 ldx [%g6 + TRAPTR_PBASE], %g5
1842 ld [%g6 + TRAPTR_OFFSET], %g4
1846 * Create trap trace entry.
1849 wr %g0, TRAPTR_ASI, %asi
1851 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi
1853 stha %g4, [%g5 + TRAP_ENT_TL]%asi
1855 stha %g4, [%g5 + TRAP_ENT_TT]%asi
1857 stna %g4, [%g5 + TRAP_ENT_TPC]%asi
1859 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1860 stna %sp, [%g5 + TRAP_ENT_SP]%asi
1861 stna %g0, [%g5 + TRAP_ENT_TR]%asi
1862 stna %g0, [%g5 + TRAP_ENT_F1]%asi
1863 stna %g0, [%g5 + TRAP_ENT_F2]%asi
1864 stna %g0, [%g5 + TRAP_ENT_F3]%asi
1865 stna %g0, [%g5 + TRAP_ENT_F4]%asi
1869 * Advance trap trace pointer.
1871 ld [%g6 + TRAPTR_OFFSET], %g5
1872 ld [%g6 + TRAPTR_LIMIT], %g4
1873 st %g5, [%g6 + TRAPTR_LAST_OFFSET]
1874 add %g5, TRAP_ENT_SIZE, %g5
1875 sub %g4, TRAP_ENT_SIZE, %g4
1878 st %g5, [%g6 + TRAPTR_OFFSET]
1880 #endif /* TRAPTRACE */
1883 * We fall into this macro if we've successfully logged the error in
1884 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1885 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
1886 * Restores the %g registers and issues retry.
1890 SET_SIZE(icache_parity_tl1_err)
1892 #endif /* CPU_IMP_L1_CACHE_PARITY */
1896 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1897 * tte, the virtual address, and the ctxnum of the specified tlb entry. They
1898 * should only be used in places where you have no choice but to look at the
1901 * Note: These two routines are required by the Estar "cpr" loadable module.
1904 ENTRY_NP(itlb_rd_entry)
1906 ldxa [%o0]ASI_ITLB_ACCESS, %g1
1908 ldxa [%o0]ASI_ITLB_TAGREAD, %g2
1909 set TAGREAD_CTX_MASK, %o4
1913 SET_SIZE(itlb_rd_entry)
1916 ENTRY_NP(dtlb_rd_entry)
1918 ldxa [%o0]ASI_DTLB_ACCESS, %g1
1920 ldxa [%o0]ASI_DTLB_TAGREAD, %g2
1921 set TAGREAD_CTX_MASK, %o4
1925 SET_SIZE(dtlb_rd_entry)
1928 #if !(defined(JALAPENO) || defined(SERRANO))
1930 ENTRY(get_safari_config)
1931 ldxa [%g0]ASI_SAFARI_CONFIG, %o0
1934 SET_SIZE(get_safari_config)
1937 ENTRY(set_safari_config)
1938 stxa %o0, [%g0]ASI_SAFARI_CONFIG
1942 SET_SIZE(set_safari_config)
1944 #endif /* !(JALAPENO || SERRANO) */
1948 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
1949 * registers. In an effort to make the change in the
1950 * tick/stick counter as consistent as possible, we disable
1951 * all interrupts while we're changing the registers. We also
1952 * ensure that the read and write instructions are in the same
1953 * line in the instruction cache.
1955 ENTRY_NP(cpu_clearticknpt)
1956 rdpr %pstate, %g1 /* save processor state */
1957 andn %g1, PSTATE_IE, %g3 /* turn off */
1958 wrpr %g0, %g3, %pstate /* interrupts */
1959 rdpr %tick, %g2 /* get tick register */
1960 brgez,pn %g2, 1f /* if NPT bit off, we're done */
1961 mov 1, %g3 /* create mask */
1962 sllx %g3, 63, %g3 /* for NPT bit */
1964 .align 8 /* Ensure rd/wr in same i$ line */
1966 rdpr %tick, %g2 /* get tick register */
1967 wrpr %g3, %g2, %tick /* write tick register, */
1968 /* clearing NPT bit */
1970 rd STICK, %g2 /* get stick register */
1971 brgez,pn %g2, 3f /* if NPT bit off, we're done */
1972 mov 1, %g3 /* create mask */
1973 sllx %g3, 63, %g3 /* for NPT bit */
1975 .align 8 /* Ensure rd/wr in same i$ line */
1977 rd STICK, %g2 /* get stick register */
1978 wr %g3, %g2, STICK /* write stick register, */
1979 /* clearing NPT bit */
1982 wrpr %g0, %g1, %pstate /* restore processor state */
1984 SET_SIZE(cpu_clearticknpt)
1987 #if defined(CPU_IMP_L1_CACHE_PARITY)
1990 * correct_dcache_parity(size_t size, size_t linesize)
1992 * Correct D$ data parity by zeroing the data and initializing microtag
1993 * for all indexes and all ways of the D$.
1996 ENTRY(correct_dcache_parity)
2000 * %o0 = input D$ size
2001 * %o1 = input D$ line size
2007 sub %o0, %o1, %o0 ! init cache line address
2010 * For Panther CPUs, we also need to clear the data parity bits
2011 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2014 cmp %o3, PANTHER_IMPL
2016 clr %o3 ! zero for non-Panther
2018 sll %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2022 * Set utag = way since it must be unique within an index.
2024 srl %o0, 14, %o2 ! get cache way (DC_way)
2025 membar #Sync ! required before ASI_DC_UTAG
2026 stxa %o2, [%o0]ASI_DC_UTAG ! set D$ utag = cache way
2027 membar #Sync ! required after ASI_DC_UTAG
2030 * Zero line of D$ data (and data parity bits for Panther)
2033 or %o0, %o3, %o4 ! same address + DC_data_parity
2035 membar #Sync ! required before ASI_DC_DATA
2036 stxa %g0, [%o0 + %o2]ASI_DC_DATA ! zero 8 bytes of D$ data
2037 membar #Sync ! required after ASI_DC_DATA
2039 * We also clear the parity bits if this is a panther. For non-Panther
2040 * CPUs, we simply end up clearing the $data register twice.
2042 stxa %g0, [%o4 + %o2]ASI_DC_DATA
2055 SET_SIZE(correct_dcache_parity)
2057 #endif /* CPU_IMP_L1_CACHE_PARITY */
2060 ENTRY_NP(stick_timestamp)
2061 rd STICK, %g1 ! read stick reg
2063 srlx %g1, 1, %g1 ! clear npt bit
2066 stx %g1, [%o0] ! store the timestamp
2067 SET_SIZE(stick_timestamp)
2071 rdpr %pstate, %g1 ! save processor state
2072 andn %g1, PSTATE_IE, %g3
2073 ba 1f ! cache align stick adj
2074 wrpr %g0, %g3, %pstate ! turn off interrupts
2079 rd STICK, %g4 ! read stick reg
2080 add %g4, %o0, %o1 ! adjust stick with skew
2081 wr %o1, %g0, STICK ! write stick reg
2084 wrpr %g1, %pstate ! restore processor state
2087 ENTRY_NP(kdi_get_stick)
2092 SET_SIZE(kdi_get_stick)
2095 * Invalidate the specified line from the D$.
2098 * %o0 - index for the invalidation, specifies DC_way and DC_addr
2100 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2101 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2103 * The format of the stored 64-bit value is:
2105 * +----------+--------+----------+
2106 * | Reserved | DC_tag | DC_valid |
2107 * +----------+--------+----------+
2110 * DC_tag is the 30-bit physical tag of the associated line.
2111 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2113 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2115 * +----------+--------+----------+----------+
2116 * | Reserved | DC_way | DC_addr | Reserved |
2117 * +----------+--------+----------+----------+
2118 * 63 16 15 14 13 5 4 0
2120 * DC_way is a 2-bit index that selects one of the 4 ways.
2121 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2123 * Setting the DC_valid bit to zero for the specified DC_way and
2124 * DC_addr index into the D$ results in an invalidation of a D$ line.
2126 ENTRY(dcache_inval_line)
2127 sll %o0, 5, %o0 ! shift index into DC_way and DC_addr
2128 stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits
2132 SET_SIZE(dcache_inval_line)
2135 * Invalidate the entire I$
2138 * %o0 - specifies IC_way, IC_addr, IC_tag
2140 * %o2 - used to save and restore DCU value
2142 * %o5 - used to save and restore PSTATE
2144 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2145 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2146 * block out snoops and invalidates to the I$, causing I$ consistency
2147 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2149 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2150 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2151 * info below describes store (write) use of ASI_IC_TAG. Note that read
2152 * use of ASI_IC_TAG behaves differently.
2154 * The format of the stored 64-bit value is:
2156 * +----------+--------+---------------+-----------+
2157 * | Reserved | Valid | IC_vpred<7:0> | Undefined |
2158 * +----------+--------+---------------+-----------+
2159 * 63 55 54 53 46 45 0
2161 * Valid is the 1-bit valid field for both the physical and snoop tags.
2162 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2163 * the 32-byte boundary aligned address specified by IC_addr.
2165 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2167 * +----------+--------+---------+--------+---------+
2168 * | Reserved | IC_way | IC_addr | IC_tag |Reserved |
2169 * +----------+--------+---------+--------+---------+
2170 * 63 16 15 14 13 5 4 3 2 0
2172 * IC_way is a 2-bit index that selects one of the 4 ways.
2173 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2174 * IC_addr[5] is a "don't care" for a store.
2175 * IC_tag set to 2 specifies that the stored value is to be interpreted
2176 * as containing Valid and IC_vpred as described above.
2178 * Setting the Valid bit to zero for the specified IC_way and
2179 * IC_addr index into the I$ results in an invalidation of an I$ line.
2181 ENTRY(icache_inval_all)
2183 andn %o5, PSTATE_IE, %o3
2184 wrpr %g0, %o3, %pstate ! clear IE bit
2186 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2187 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
2189 ld [%o0 + CHPR_ICACHE_SIZE], %o0
2191 ASM_LD(%o0, icache_size)
2192 ASM_LD(%o1, icache_linesize)
2194 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2197 wrpr %g0, %o5, %pstate ! restore earlier pstate
2198 SET_SIZE(icache_inval_all)
2202 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2203 * crosstrap. It atomically increments the outstanding request counter and,
2204 * if there was not already an outstanding request, branches to setsoftint_tl1
2205 * to enqueue an intr_vec for the given inum.
2212 ! %g2 - index into chsm_outstanding array
2215 ! %g2, %g3, %g5 - scratch
2216 ! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2217 ! %g6 - setsoftint_tl1 address
2219 ENTRY_NP(cache_scrubreq_tl1)
2220 mulx %g2, CHSM_OUTSTANDING_INCR, %g2
2221 set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2223 GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2224 ld [%g4], %g2 ! cpu's chsm_outstanding[index]
2226 ! no need to use atomic instructions for the following
2227 ! increment - we're at tl1
2230 brnz,pn %g2, 1f ! no need to enqueue more intr_vec
2231 st %g3, [%g4] ! delay - store incremented counter
2232 ASM_JMP(%g6, setsoftint_tl1)
2236 SET_SIZE(cache_scrubreq_tl1)
2240 * Get the error state for the processor.
2241 * Note that this must not be used at TL>0
2243 ENTRY(get_cpu_error_state)
2244 #if defined(CHEETAH_PLUS)
2245 set ASI_SHADOW_REG_VA, %o2
2246 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg
2247 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2248 ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg
2249 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2250 GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers
2251 cmp %o3, PANTHER_IMPL
2253 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN
2254 set ASI_AFSR_EXT_VA, %o2
2255 ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg
2256 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2257 set ASI_SHADOW_AFSR_EXT_VA, %o2
2258 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg
2259 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2263 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
2265 #else /* CHEETAH_PLUS */
2266 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2267 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2268 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2269 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2270 #endif /* CHEETAH_PLUS */
2271 #if defined(SERRANO)
2273 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2274 * We save this in the afar2 of the register save area.
2276 set ASI_MCU_AFAR2_VA, %o2
2277 ldxa [%o2]ASI_MCU_CTRL, %o1
2278 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2279 #endif /* SERRANO */
2280 ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg
2281 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR]
2282 ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg
2284 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR]
2285 SET_SIZE(get_cpu_error_state)
2288 * Check a page of memory for errors.
2290 * Load each 64 byte block from physical memory.
2291 * Check AFSR after each load to see if an error
2292 * was caused. If so, log/scrub that error.
2294 * Used to determine if a page contains
2295 * CEs when CEEN is disabled.
2297 ENTRY(cpu_check_block)
2299 ! get a new window with room for the error regs
2301 save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2302 srl %i1, 6, %l4 ! clear top bits of psz
2304 rd %fprs, %l2 ! store FP
2305 wr %g0, FPRS_FEF, %fprs ! enable FP
2307 ldda [%i0]ASI_BLK_P, %d0 ! load a block
2309 ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg
2310 brz,a,pt %l3, 2f ! check for error
2314 ! if error, read the error regs and log it
2316 call get_cpu_error_state
2317 add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2320 ! cpu_ce_detected(ch_cpu_errors_t *, flag)
2322 call cpu_ce_detected ! log the error
2323 mov CE_CEEN_TIMEOUT, %o1
2325 dec %l4 ! next 64-byte block
2327 add %i0, 64, %i0 ! increment block addr
2329 wr %l2, %g0, %fprs ! restore FP
2333 SET_SIZE(cpu_check_block)
2336 * Perform a cpu logout called from C. This is used where we did not trap
2337 * for the error but still want to gather "what we can". Caller must make
2338 * sure cpu private area exists and that the indicated logout area is free
2339 * for use, and that we are unable to migrate cpus.
2341 ENTRY(cpu_delayed_logout)
2343 andn %o2, PSTATE_IE, %o2
2344 wrpr %g0, %o2, %pstate ! disable interrupts
2345 PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value
2346 add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2349 GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2351 UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again
2353 or %o2, PSTATE_IE, %o2
2354 wrpr %g0, %o2, %pstate
2357 SET_SIZE(cpu_delayed_logout)
2359 ENTRY(dtrace_blksuword32)
2360 save %sp, -SA(MINFRAME + 4), %sp
2363 andn %l1, PSTATE_IE, %l2 ! disable interrupts to
2364 wrpr %g0, %l2, %pstate ! protect our FPU diddling
2367 andcc %l0, FPRS_FEF, %g0
2368 bz,a,pt %xcc, 1f ! if the fpu is disabled
2369 wr %g0, FPRS_FEF, %fprs ! ... enable the fpu
2371 st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack
2375 * We're about to write a block full or either total garbage
2376 * (not kernel data, don't worry) or user floating-point data
2377 * (so it only _looks_ like garbage).
2379 ld [%i1], %f0 ! modify the block
2381 stn %l5, [THREAD_REG + T_LOFAULT] ! set up the lofault handler
2382 stda %d0, [%i0]ASI_BLK_COMMIT_S ! store the modified block
2384 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler
2387 wr %g0, %l0, %fprs ! restore %fprs
2389 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
2392 wrpr %g0, %l1, %pstate ! restore interrupts
2395 restore %g0, %g0, %o0
2399 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler
2402 wr %g0, %l0, %fprs ! restore %fprs
2404 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
2407 wrpr %g0, %l1, %pstate ! restore interrupts
2410 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2411 * which deals with watchpoints. Otherwise, just return -1.
2416 restore %g0, -1, %o0
2418 call dtrace_blksuword32_err
2421 SET_SIZE(dtrace_blksuword32)
2423 #ifdef CHEETAHPLUS_ERRATUM_25
2425 /* Claim a chunk of physical address space. */
2431 casxa [%o3]ASI_MEM, %g0, %g0
2435 SET_SIZE(claimlines)
2438 * CPU feature initialization,
2442 ENTRY(cpu_feature_init)
2443 save %sp, -SA(MINFRAME), %sp
2444 sethi %hi(cheetah_bpe_off), %o0
2445 ld [%o0 + %lo(cheetah_bpe_off)], %o0
2448 rd ASR_DISPATCH_CONTROL, %o0
2449 andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0
2450 wr %o0, 0, ASR_DISPATCH_CONTROL
2453 ! get the device_id and store the device_id
2454 ! in the appropriate cpunodes structure
2455 ! given the cpus index
2458 mulx %o0, CPU_NODE_SIZE, %o0
2459 set cpunodes + DEVICE_ID, %o1
2460 ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
2461 stx %o2, [%o0 + %o1]
2462 #ifdef CHEETAHPLUS_ERRATUM_34
2464 ! apply Cheetah+ erratum 34 workaround
2466 call itlb_erratum34_fixup
2468 call dtlb_erratum34_fixup
2470 #endif /* CHEETAHPLUS_ERRATUM_34 */
2473 SET_SIZE(cpu_feature_init)
2476 * Copy a tsb entry atomically, from src to dest.
2477 * src must be 128 bit aligned.
2479 ENTRY(copy_tsb_entry)
2480 ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data
2485 SET_SIZE(copy_tsb_entry)
2487 #endif /* CHEETAHPLUS_ERRATUM_25 */
2489 #ifdef CHEETAHPLUS_ERRATUM_34
2492 ! In Cheetah+ erratum 34, under certain conditions an ITLB locked
2493 ! index 0 TTE will erroneously be displaced when a new TTE is
2494 ! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34,
2495 ! locked index 0 TTEs must be relocated.
2497 ! NOTE: Care must be taken to avoid an ITLB miss in this routine.
2499 ENTRY_NP(itlb_erratum34_fixup)
2502 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
2504 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
2505 ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data
2506 ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag
2508 cmp %o1, %g0 ! Is this entry valid?
2510 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
2514 retl ! Nope, outta here...
2515 wrpr %g0, %o3, %pstate ! Enable interrupts
2517 sethi %hi(FLUSH_ADDR), %o4
2518 stxa %g0, [%o2]ASI_ITLB_DEMAP ! Flush this mapping
2519 flush %o4 ! Flush required for I-MMU
2521 ! Start search from index 1 up. This is because the kernel force
2522 ! loads its text page at index 15 in sfmmu_kernel_remap() and we
2523 ! don't want our relocated entry evicted later.
2525 ! NOTE: We assume that we'll be successful in finding an unlocked
2526 ! or invalid entry. If that isn't the case there are bound to
2531 ldxa [%g3]ASI_ITLB_ACCESS, %o4 ! Load TTE from t16
2533 ! If this entry isn't valid, we'll choose to displace it (regardless
2536 cmp %o4, %g0 ! TTE is > 0 iff not valid
2537 bge %xcc, 4f ! If invalid, go displace
2538 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
2539 bnz,a %icc, 3b ! If locked, look at next
2540 add %g3, (1 << 3), %g3 ! entry
2543 ! We found an unlocked or invalid entry; we'll explicitly load
2544 ! the former index 0 entry here.
2546 sethi %hi(FLUSH_ADDR), %o4
2547 set MMU_TAG_ACCESS, %g4
2548 stxa %o2, [%g4]ASI_IMMU
2549 stxa %o1, [%g3]ASI_ITLB_ACCESS
2550 flush %o4 ! Flush required for I-MMU
2552 wrpr %g0, %o3, %pstate ! Enable interrupts
2553 SET_SIZE(itlb_erratum34_fixup)
2556 ! In Cheetah+ erratum 34, under certain conditions a DTLB locked
2557 ! index 0 TTE will erroneously be displaced when a new TTE is
2558 ! loaded. In order to avoid cheetah+ erratum 34, locked index 0
2559 ! TTEs must be relocated.
2561 ENTRY_NP(dtlb_erratum34_fixup)
2564 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
2566 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
2567 ldxa [%g0]ASI_DTLB_ACCESS, %o1 ! %o1 = entry 0 data
2568 ldxa [%g0]ASI_DTLB_TAGREAD, %o2 ! %o2 = entry 0 tag
2570 cmp %o1, %g0 ! Is this entry valid?
2572 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
2576 retl ! Nope, outta here...
2577 wrpr %g0, %o3, %pstate ! Enable interrupts
2579 stxa %g0, [%o2]ASI_DTLB_DEMAP ! Flush this mapping
2582 ! Start search from index 1 up.
2584 ! NOTE: We assume that we'll be successful in finding an unlocked
2585 ! or invalid entry. If that isn't the case there are bound to
2590 ldxa [%g3]ASI_DTLB_ACCESS, %o4 ! Load TTE from t16
2592 ! If this entry isn't valid, we'll choose to displace it (regardless
2595 cmp %o4, %g0 ! TTE is > 0 iff not valid
2596 bge %xcc, 4f ! If invalid, go displace
2597 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
2598 bnz,a %icc, 3b ! If locked, look at next
2599 add %g3, (1 << 3), %g3 ! entry
2602 ! We found an unlocked or invalid entry; we'll explicitly load
2603 ! the former index 0 entry here.
2605 set MMU_TAG_ACCESS, %g4
2606 stxa %o2, [%g4]ASI_DMMU
2607 stxa %o1, [%g3]ASI_DTLB_ACCESS
2610 wrpr %g0, %o3, %pstate ! Enable interrupts
2611 SET_SIZE(dtlb_erratum34_fixup)
2613 #endif /* CHEETAHPLUS_ERRATUM_34 */