usr/src/uts/sun4u/cpu/us3_common_asm.S

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2020 Joyent, Inc.
  26  *
  27  * Assembly code support for Cheetah/Cheetah+ modules
  28  */
  29
  30 #include "assym.h"
  31
  32 #include <sys/asm_linkage.h>
  33 #include <sys/mmu.h>
  34 #include <vm/hat_sfmmu.h>
  35 #include <sys/machparam.h>
  36 #include <sys/machcpuvar.h>
  37 #include <sys/machthread.h>
  38 #include <sys/machtrap.h>
  39 #include <sys/privregs.h>
  40 #include <sys/trap.h>
  41 #include <sys/cheetahregs.h>
  42 #include <sys/us3_module.h>
  43 #include <sys/xc_impl.h>
  44 #include <sys/intreg.h>
  45 #include <sys/async.h>
  46 #include <sys/clock.h>
  47 #include <sys/cheetahasm.h>
  48 #include <sys/cmpregs.h>
  49
  50 #ifdef TRAPTRACE
  51 #include <sys/traptrace.h>
  52 #endif /* TRAPTRACE */
  53
  54 /* BEGIN CSTYLED */
  55
  56 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)                  \
  57         ldxa    [%g0]ASI_DCU, tmp1                                      ;\
  58         btst    DCU_DC, tmp1            /* is dcache enabled? */        ;\
  59         bz,pn   %icc, 1f                                                ;\
  60         ASM_LD(tmp1, dcache_linesize)                                   ;\
  61         ASM_LD(tmp2, dflush_type)                                       ;\
  62         cmp     tmp2, FLUSHPAGE_TYPE                                    ;\
  63         be,pt   %icc, 2f                                                ;\
  64         nop                                                             ;\
  65         sllx    arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */   ;\
  66         ASM_LD(tmp3, dcache_size)                                       ;\
  67         cmp     tmp2, FLUSHMATCH_TYPE                                   ;\
  68         be,pt   %icc, 3f                                                ;\
  69         nop                                                             ;\
  70         /*                                                              \
  71          * flushtype = FLUSHALL_TYPE, flush the whole thing             \
  72          * tmp3 = cache size                                            \
  73          * tmp1 = cache line size                                       \
  74          */                                                             \
  75         sub     tmp3, tmp1, tmp2                                        ;\
  76 4:                                                                      \
  77         stxa    %g0, [tmp2]ASI_DC_TAG                                   ;\
  78         membar  #Sync                                                   ;\
  79         cmp     %g0, tmp2                                               ;\
  80         bne,pt  %icc, 4b                                                ;\
  81         sub     tmp2, tmp1, tmp2                                        ;\
  82         ba,pt   %icc, 1f                                                ;\
  83         nop                                                             ;\
  84         /*                                                              \
  85          * flushtype = FLUSHPAGE_TYPE                                   \
  86          * arg1 = pfn                                                   \
  87          * arg2 = virtual color                                         \
  88          * tmp1 = cache line size                                       \
  89          * tmp2 = tag from cache                                        \
  90          * tmp3 = counter                                               \
  91          */                                                             \
  92 2:                                                                      \
  93         set     MMU_PAGESIZE, tmp3                                      ;\
  94         sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA     */   ;\
  95         sub     tmp3, tmp1, tmp3                                        ;\
  96 4:                                                                      \
  97         stxa    %g0, [arg1 + tmp3]ASI_DC_INVAL                          ;\
  98         membar  #Sync                                                   ;\
  99 5:                                                                      \
 100         cmp     %g0, tmp3                                               ;\
 101         bnz,pt  %icc, 4b                /* branch if not done */        ;\
 102         sub     tmp3, tmp1, tmp3                                        ;\
 103         ba,pt   %icc, 1f                                                ;\
 104         nop                                                             ;\
 105         /*                                                              \
 106          * flushtype = FLUSHMATCH_TYPE                                  \
 107          * arg1 = tag to compare against                                \
 108          * tmp1 = cache line size                                       \
 109          * tmp3 = cache size                                            \
 110          * arg2 = counter                                               \
 111          * tmp2 = cache tag                                             \
 112          */                                                             \
 113 3:                                                                      \
 114         sub     tmp3, tmp1, arg2                                        ;\
 115 4:                                                                      \
 116         ldxa    [arg2]ASI_DC_TAG, tmp2          /* read tag */          ;\
 117         btst    CHEETAH_DC_VBIT_MASK, tmp2                              ;\
 118         bz,pn   %icc, 5f                /* br if no valid sub-blocks */ ;\
 119         andn    tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\
 120         cmp     tmp2, arg1                                              ;\
 121         bne,pn  %icc, 5f                /* branch if tag miss */        ;\
 122         nop                                                             ;\
 123         stxa    %g0, [arg2]ASI_DC_TAG                                   ;\
 124         membar  #Sync                                                   ;\
 125 5:                                                                      \
 126         cmp     %g0, arg2                                               ;\
 127         bne,pt  %icc, 4b                /* branch if not done */        ;\
 128         sub     arg2, tmp1, arg2                                        ;\
 129 1:
 130
 131 /*
 132  * macro that flushes the entire dcache color
 133  * dcache size = 64K, one way 16K
 134  *
 135  * In:
 136  *    arg = virtual color register (not clobbered)
 137  *    way = way#, can either be a constant or a register (not clobbered)
 138  *    tmp1, tmp2, tmp3 = scratch registers
 139  *
 140  */
 141 #define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3)                   \
 142         ldxa    [%g0]ASI_DCU, tmp1;                                     \
 143         btst    DCU_DC, tmp1;           /* is dcache enabled? */        \
 144         bz,pn   %icc, 1f;                                               \
 145         ASM_LD(tmp1, dcache_linesize)                                   \
 146         /*                                                              \
 147          * arg = virtual color                                          \
 148          * tmp1 = cache line size                                       \
 149          */                                                             \
 150         sllx    arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */    \
 151         mov     way, tmp3;                                              \
 152         sllx    tmp3, 14, tmp3;           /* One way 16K */             \
 153         or      tmp2, tmp3, tmp3;                                       \
 154         set     MMU_PAGESIZE, tmp2;                                     \
 155         /*                                                              \
 156          * tmp2 = page size                                             \
 157          * tmp3 =  cached page in dcache                                \
 158          */                                                             \
 159         sub     tmp2, tmp1, tmp2;                                       \
 160 2:                                                                      \
 161         stxa    %g0, [tmp3 + tmp2]ASI_DC_TAG;                           \
 162         membar  #Sync;                                                  \
 163         cmp     %g0, tmp2;                                              \
 164         bne,pt  %icc, 2b;                                               \
 165         sub     tmp2, tmp1, tmp2;                                       \
 166 1:
 167
 168 /* END CSTYLED */
 169
 170 /*
 171  * Cheetah MMU and Cache operations.
 172  */
 173
 174         ENTRY_NP(vtag_flushpage)
 175         /*
 176          * flush page from the tlb
 177          *
 178          * %o0 = vaddr
 179          * %o1 = sfmmup
 180          */
 181         rdpr    %pstate, %o5
 182 #ifdef DEBUG
 183         PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
 184 #endif /* DEBUG */
 185         /*
 186          * disable ints
 187          */
 188         andn    %o5, PSTATE_IE, %o4
 189         wrpr    %o4, 0, %pstate
 190
 191         /*
 192          * Then, blow out the tlb
 193          * Interrupts are disabled to prevent the primary ctx register
 194          * from changing underneath us.
 195          */
 196         sethi   %hi(ksfmmup), %o3
 197         ldx     [%o3 + %lo(ksfmmup)], %o3
 198         cmp     %o3, %o1
 199         bne,pt   %xcc, 1f                       ! if not kernel as, go to 1
 200           sethi %hi(FLUSH_ADDR), %o3
 201         /*
 202          * For Kernel demaps use primary. type = page implicitly
 203          */
 204         stxa    %g0, [%o0]ASI_DTLB_DEMAP        /* dmmu flush for KCONTEXT */
 205         stxa    %g0, [%o0]ASI_ITLB_DEMAP        /* immu flush for KCONTEXT */
 206         flush   %o3
 207         retl
 208           wrpr  %g0, %o5, %pstate               /* enable interrupts */
 209 1:
 210         /*
 211          * User demap.  We need to set the primary context properly.
 212          * Secondary context cannot be used for Cheetah IMMU.
 213          * %o0 = vaddr
 214          * %o1 = sfmmup
 215          * %o3 = FLUSH_ADDR
 216          */
 217         SFMMU_CPU_CNUM(%o1, %g1, %g2)           ! %g1 = sfmmu cnum on this CPU
 218
 219         ldub    [%o1 + SFMMU_CEXT], %o4         ! %o4 = sfmmup->sfmmu_cext
 220         sll     %o4, CTXREG_EXT_SHIFT, %o4
 221         or      %g1, %o4, %g1                   ! %g1 = primary pgsz | cnum
 222
 223         wrpr    %g0, 1, %tl
 224         set     MMU_PCONTEXT, %o4
 225         or      DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
 226         ldxa    [%o4]ASI_DMMU, %o2              ! %o2 = save old ctxnum
 227         srlx    %o2, CTXREG_NEXT_SHIFT, %o1     ! need to preserve nucleus pgsz
 228         sllx    %o1, CTXREG_NEXT_SHIFT, %o1     ! %o1 = nucleus pgsz
 229         or      %g1, %o1, %g1                   ! %g1 = nucleus pgsz | primary pgsz | cnum
 230         stxa    %g1, [%o4]ASI_DMMU              ! wr new ctxum
 231
 232         stxa    %g0, [%o0]ASI_DTLB_DEMAP
 233         stxa    %g0, [%o0]ASI_ITLB_DEMAP
 234         stxa    %o2, [%o4]ASI_DMMU              /* restore old ctxnum */
 235         flush   %o3
 236         wrpr    %g0, 0, %tl
 237
 238         retl
 239         wrpr    %g0, %o5, %pstate               /* enable interrupts */
 240         SET_SIZE(vtag_flushpage)
 241
 242         ENTRY_NP2(vtag_flushall, demap_all)
 243         /*
 244          * flush the tlb
 245          */
 246         sethi   %hi(FLUSH_ADDR), %o3
 247         set     DEMAP_ALL_TYPE, %g1
 248         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 249         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 250         flush   %o3
 251         retl
 252         nop
 253         SET_SIZE(demap_all)
 254         SET_SIZE(vtag_flushall)
 255
 256
 257         ENTRY_NP(vtag_flushpage_tl1)
 258         /*
 259          * x-trap to flush page from tlb and tsb
 260          *
 261          * %g1 = vaddr, zero-extended on 32-bit kernel
 262          * %g2 = sfmmup
 263          *
 264          * assumes TSBE_TAG = 0
 265          */
 266         srln    %g1, MMU_PAGESHIFT, %g1
 267
 268         sethi   %hi(ksfmmup), %g3
 269         ldx     [%g3 + %lo(ksfmmup)], %g3
 270         cmp     %g3, %g2
 271         bne,pt  %xcc, 1f                        ! if not kernel as, go to 1
 272           slln  %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 273
 274         /* We need to demap in the kernel context */
 275         or      DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
 276         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 277         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 278         retry
 279 1:
 280         /* We need to demap in a user context */
 281         or      DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
 282
 283         SFMMU_CPU_CNUM(%g2, %g6, %g3)   ! %g6 = sfmmu cnum on this CPU
 284
 285         ldub    [%g2 + SFMMU_CEXT], %g4         ! %g4 = sfmmup->cext
 286         sll     %g4, CTXREG_EXT_SHIFT, %g4
 287         or      %g6, %g4, %g6                   ! %g6 = pgsz | cnum
 288
 289         set     MMU_PCONTEXT, %g4
 290         ldxa    [%g4]ASI_DMMU, %g5              /* rd old ctxnum */
 291         srlx    %g5, CTXREG_NEXT_SHIFT, %g2     /* %g2 = nucleus pgsz */
 292         sllx    %g2, CTXREG_NEXT_SHIFT, %g2     /* preserve nucleus pgsz */
 293         or      %g6, %g2, %g6                   /* %g6 = nucleus pgsz | primary pgsz | cnum */
 294         stxa    %g6, [%g4]ASI_DMMU              /* wr new ctxum */
 295         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 296         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 297         stxa    %g5, [%g4]ASI_DMMU              /* restore old ctxnum */
 298         retry
 299         SET_SIZE(vtag_flushpage_tl1)
 300
 301
 302         ENTRY_NP(vtag_flush_pgcnt_tl1)
 303         /*
 304          * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
 305          *
 306          * %g1 = vaddr, zero-extended on 32-bit kernel
 307          * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
 308          *
 309          * NOTE: this handler relies on the fact that no
 310          *      interrupts or traps can occur during the loop
 311          *      issuing the TLB_DEMAP operations. It is assumed
 312          *      that interrupts are disabled and this code is
 313          *      fetching from the kernel locked text address.
 314          *
 315          * assumes TSBE_TAG = 0
 316          */
 317         set     SFMMU_PGCNT_MASK, %g4
 318         and     %g4, %g2, %g3                   /* g3 = pgcnt - 1 */
 319         add     %g3, 1, %g3                     /* g3 = pgcnt */
 320
 321         andn    %g2, SFMMU_PGCNT_MASK, %g2      /* g2 = sfmmup */
 322         srln    %g1, MMU_PAGESHIFT, %g1
 323
 324         sethi   %hi(ksfmmup), %g4
 325         ldx     [%g4 + %lo(ksfmmup)], %g4
 326         cmp     %g4, %g2
 327         bne,pn   %xcc, 1f                       /* if not kernel as, go to 1 */
 328           slln  %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 329
 330         /* We need to demap in the kernel context */
 331         or      DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
 332         set     MMU_PAGESIZE, %g2               /* g2 = pgsize */
 333         sethi   %hi(FLUSH_ADDR), %g5
 334 4:
 335         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 336         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 337         flush   %g5                             ! flush required by immu
 338
 339         deccc   %g3                             /* decr pgcnt */
 340         bnz,pt  %icc,4b
 341           add   %g1, %g2, %g1                   /* next page */
 342         retry
 343 1:
 344         /*
 345          * We need to demap in a user context
 346          *
 347          * g2 = sfmmup
 348          * g3 = pgcnt
 349          */
 350         SFMMU_CPU_CNUM(%g2, %g5, %g6)           ! %g5 = sfmmu cnum on this CPU
 351
 352         or      DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
 353
 354         ldub    [%g2 + SFMMU_CEXT], %g4         ! %g4 = sfmmup->cext
 355         sll     %g4, CTXREG_EXT_SHIFT, %g4
 356         or      %g5, %g4, %g5
 357
 358         set     MMU_PCONTEXT, %g4
 359         ldxa    [%g4]ASI_DMMU, %g6              /* rd old ctxnum */
 360         srlx    %g6, CTXREG_NEXT_SHIFT, %g2     /* %g2 = nucleus pgsz */
 361         sllx    %g2, CTXREG_NEXT_SHIFT, %g2     /* preserve nucleus pgsz */
 362         or      %g5, %g2, %g5                   /* %g5 = nucleus pgsz | primary pgsz | cnum */
 363         stxa    %g5, [%g4]ASI_DMMU              /* wr new ctxum */
 364
 365         set     MMU_PAGESIZE, %g2               /* g2 = pgsize */
 366         sethi   %hi(FLUSH_ADDR), %g5
 367 3:
 368         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 369         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 370         flush   %g5                             ! flush required by immu
 371
 372         deccc   %g3                             /* decr pgcnt */
 373         bnz,pt  %icc,3b
 374           add   %g1, %g2, %g1                   /* next page */
 375
 376         stxa    %g6, [%g4]ASI_DMMU              /* restore old ctxnum */
 377         retry
 378         SET_SIZE(vtag_flush_pgcnt_tl1)
 379
 380         ENTRY_NP(vtag_flushall_tl1)
 381         /*
 382          * x-trap to flush tlb
 383          */
 384         set     DEMAP_ALL_TYPE, %g4
 385         stxa    %g0, [%g4]ASI_DTLB_DEMAP
 386         stxa    %g0, [%g4]ASI_ITLB_DEMAP
 387         retry
 388         SET_SIZE(vtag_flushall_tl1)
 389
 390
 391 /*
 392  * vac_flushpage(pfnum, color)
 393  *      Flush 1 8k page of the D-$ with physical page = pfnum
 394  *      Algorithm:
 395  *              The cheetah dcache is a 64k psuedo 4 way accaociative cache.
 396  *              It is virtual indexed, physically tagged cache.
 397  */
 398         .seg    ".data"
 399         .align  8
 400         .global dflush_type
 401 dflush_type:
 402         .word   FLUSHPAGE_TYPE
 403
 404         ENTRY(vac_flushpage)
 405         /*
 406          * flush page from the d$
 407          *
 408          * %o0 = pfnum, %o1 = color
 409          */
 410         DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
 411         retl
 412           nop
 413         SET_SIZE(vac_flushpage)
 414
 415
 416         ENTRY_NP(vac_flushpage_tl1)
 417         /*
 418          * x-trap to flush page from the d$
 419          *
 420          * %g1 = pfnum, %g2 = color
 421          */
 422         DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
 423         retry
 424         SET_SIZE(vac_flushpage_tl1)
 425
 426
 427         ENTRY(vac_flushcolor)
 428         /*
 429          * %o0 = vcolor
 430          */
 431         DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
 432         DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
 433         DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
 434         DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
 435         retl
 436           nop
 437         SET_SIZE(vac_flushcolor)
 438
 439
 440         ENTRY(vac_flushcolor_tl1)
 441         /*
 442          * %g1 = vcolor
 443          */
 444         DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
 445         DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
 446         DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
 447         DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
 448         retry
 449         SET_SIZE(vac_flushcolor_tl1)
 450
 451 /*
 452  * Determine whether or not the IDSR is busy.
 453  * Entry: no arguments
 454  * Returns: 1 if busy, 0 otherwise
 455  */
 456         ENTRY(idsr_busy)
 457         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 458         clr     %o0
 459         btst    IDSR_BUSY, %g1
 460         bz,a,pt %xcc, 1f
 461         mov     1, %o0
 462 1:
 463         retl
 464         nop
 465         SET_SIZE(idsr_busy)
 466
 467         .global _dispatch_status_busy
 468 _dispatch_status_busy:
 469         .asciz  "ASI_INTR_DISPATCH_STATUS error: busy"
 470         .align  4
 471
 472 /*
 473  * Setup interrupt dispatch data registers
 474  * Entry:
 475  *      %o0 - function or inumber to call
 476  *      %o1, %o2 - arguments (2 uint64_t's)
 477  */
 478         .seg "text"
 479
 480         ENTRY(init_mondo)
 481 #ifdef DEBUG
 482         !
 483         ! IDSR should not be busy at the moment
 484         !
 485         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 486         btst    IDSR_BUSY, %g1
 487         bz,pt   %xcc, 1f
 488         nop
 489         sethi   %hi(_dispatch_status_busy), %o0
 490         call    panic
 491         or      %o0, %lo(_dispatch_status_busy), %o0
 492 #endif /* DEBUG */
 493
 494         ALTENTRY(init_mondo_nocheck)
 495         !
 496         ! interrupt vector dispatch data reg 0
 497         !
 498 1:
 499         mov     IDDR_0, %g1
 500         mov     IDDR_1, %g2
 501         mov     IDDR_2, %g3
 502         stxa    %o0, [%g1]ASI_INTR_DISPATCH
 503
 504         !
 505         ! interrupt vector dispatch data reg 1
 506         !
 507         stxa    %o1, [%g2]ASI_INTR_DISPATCH
 508
 509         !
 510         ! interrupt vector dispatch data reg 2
 511         !
 512         stxa    %o2, [%g3]ASI_INTR_DISPATCH
 513
 514         membar  #Sync
 515         retl
 516         nop
 517         SET_SIZE(init_mondo_nocheck)
 518         SET_SIZE(init_mondo)
 519
 520
 521 #if !(defined(JALAPENO) || defined(SERRANO))
 522
 523 /*
 524  * Ship mondo to aid using busy/nack pair bn
 525  */
 526         ENTRY_NP(shipit)
 527         sll     %o0, IDCR_PID_SHIFT, %g1        ! IDCR<18:14> = agent id
 528         sll     %o1, IDCR_BN_SHIFT, %g2         ! IDCR<28:24> = b/n pair
 529         or      %g1, IDCR_OFFSET, %g1           ! IDCR<13:0> = 0x70
 530         or      %g1, %g2, %g1
 531         stxa    %g0, [%g1]ASI_INTR_DISPATCH     ! interrupt vector dispatch
 532         membar  #Sync
 533         retl
 534         nop
 535         SET_SIZE(shipit)
 536
 537 #endif  /* !(JALAPENO || SERRANO) */
 538
 539
 540 /*
 541  * flush_instr_mem:
 542  *      Flush 1 page of the I-$ starting at vaddr
 543  *      %o0 vaddr
 544  *      %o1 bytes to be flushed
 545  * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
 546  * the stores from all processors so that a FLUSH instruction is only needed
 547  * to ensure pipeline is consistent. This means a single flush is sufficient at
 548  * the end of a sequence of stores that updates the instruction stream to
 549  * ensure correct operation.
 550  */
 551
 552         ENTRY(flush_instr_mem)
 553         flush   %o0                     ! address irrelevant
 554         retl
 555         nop
 556         SET_SIZE(flush_instr_mem)
 557
 558
 559 #if defined(CPU_IMP_ECACHE_ASSOC)
 560
 561         ENTRY(get_ecache_ctrl)
 562         GET_CPU_IMPL(%o0)
 563         cmp     %o0, JAGUAR_IMPL
 564         !
 565         ! Putting an ASI access in the delay slot may
 566         ! cause it to be accessed, even when annulled.
 567         !
 568         bne     1f
 569           nop
 570         ldxa    [%g0]ASI_EC_CFG_TIMING, %o0     ! read Jaguar shared E$ ctrl reg
 571         b       2f
 572           nop
 573 1:
 574         ldxa    [%g0]ASI_EC_CTRL, %o0           ! read Ch/Ch+ E$ control reg
 575 2:
 576         retl
 577           nop
 578         SET_SIZE(get_ecache_ctrl)
 579
 580 #endif  /* CPU_IMP_ECACHE_ASSOC */
 581
 582
 583 #if !(defined(JALAPENO) || defined(SERRANO))
 584
 585 /*
 586  * flush_ecache:
 587  *      %o0 - 64 bit physical address
 588  *      %o1 - ecache size
 589  *      %o2 - ecache linesize
 590  */
 591
 592         ENTRY(flush_ecache)
 593
 594         /*
 595          * For certain CPU implementations, we have to flush the L2 cache
 596          * before flushing the ecache.
 597          */
 598         PN_L2_FLUSHALL(%g3, %g4, %g5)
 599
 600         /*
 601          * Flush the entire Ecache using displacement flush.
 602          */
 603         ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
 604
 605         retl
 606         nop
 607         SET_SIZE(flush_ecache)
 608
 609 #endif  /* !(JALAPENO || SERRANO) */
 610
 611
 612         ENTRY(flush_dcache)
 613         ASM_LD(%o0, dcache_size)
 614         ASM_LD(%o1, dcache_linesize)
 615         CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
 616         retl
 617         nop
 618         SET_SIZE(flush_dcache)
 619
 620
 621         ENTRY(flush_icache)
 622         GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
 623         ld      [%o0 + CHPR_ICACHE_LINESIZE], %o1
 624         ba,pt   %icc, 2f
 625           ld    [%o0 + CHPR_ICACHE_SIZE], %o0
 626 flush_icache_1:
 627         ASM_LD(%o0, icache_size)
 628         ASM_LD(%o1, icache_linesize)
 629 2:
 630         CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
 631         retl
 632         nop
 633         SET_SIZE(flush_icache)
 634
 635         ENTRY(kdi_flush_idcache)
 636         CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
 637         CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
 638         membar  #Sync
 639         retl
 640         nop
 641         SET_SIZE(kdi_flush_idcache)
 642
 643         ENTRY(flush_pcache)
 644         PCACHE_FLUSHALL(%o0, %o1, %o2)
 645         retl
 646         nop
 647         SET_SIZE(flush_pcache)
 648
 649
 650 #if defined(CPU_IMP_L1_CACHE_PARITY)
 651
 652 /*
 653  * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
 654  * structure (see cheetahregs.h):
 655  * The Dcache *should* be turned off when this code is executed.
 656  */
 657         .align  128
 658         ENTRY(get_dcache_dtag)
 659         rdpr    %pstate, %o5
 660         andn    %o5, PSTATE_IE | PSTATE_AM, %o3
 661         wrpr    %g0, %o3, %pstate
 662         b       1f
 663           stx   %o0, [%o1 + CH_DC_IDX]
 664
 665         .align  128
 666 1:
 667         ldxa    [%o0]ASI_DC_TAG, %o2
 668         stx     %o2, [%o1 + CH_DC_TAG]
 669         membar  #Sync
 670         ldxa    [%o0]ASI_DC_UTAG, %o2
 671         membar  #Sync
 672         stx     %o2, [%o1 + CH_DC_UTAG]
 673         ldxa    [%o0]ASI_DC_SNP_TAG, %o2
 674         stx     %o2, [%o1 + CH_DC_SNTAG]
 675         add     %o1, CH_DC_DATA, %o1
 676         clr     %o3
 677 2:
 678         membar  #Sync                           ! required before ASI_DC_DATA
 679         ldxa    [%o0 + %o3]ASI_DC_DATA, %o2
 680         membar  #Sync                           ! required after ASI_DC_DATA
 681         stx     %o2, [%o1 + %o3]
 682         cmp     %o3, CH_DC_DATA_REG_SIZE - 8
 683         blt     2b
 684           add   %o3, 8, %o3
 685
 686         /*
 687          * Unlike other CPUs in the family, D$ data parity bits for Panther
 688          * do not reside in the microtag. Instead, we have to read them
 689          * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
 690          * of just having 8 parity bits to protect all 32 bytes of data
 691          * per line, we now have 32 bits of parity.
 692          */
 693         GET_CPU_IMPL(%o3)
 694         cmp     %o3, PANTHER_IMPL
 695         bne     4f
 696           clr   %o3
 697
 698         /*
 699          * move our pointer to the next field where we store parity bits
 700          * and add the offset of the last parity byte since we will be
 701          * storing all 4 parity bytes within one 64 bit field like this:
 702          *
 703          * +------+------------+------------+------------+------------+
 704          * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
 705          * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
 706          * +------+------------+------------+------------+------------+
 707          *  63:32     31:24        23:16         15:8          7:0
 708          */
 709         add     %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
 710
 711         /* add the DC_data_parity bit into our working index */
 712         mov     1, %o2
 713         sll     %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
 714         or      %o0, %o2, %o0
 715 3:
 716         membar  #Sync                           ! required before ASI_DC_DATA
 717         ldxa    [%o0 + %o3]ASI_DC_DATA, %o2
 718         membar  #Sync                           ! required after ASI_DC_DATA
 719         stb     %o2, [%o1]
 720         dec     %o1
 721         cmp     %o3, CH_DC_DATA_REG_SIZE - 8
 722         blt     3b
 723           add   %o3, 8, %o3
 724 4:
 725         retl
 726           wrpr  %g0, %o5, %pstate
 727         SET_SIZE(get_dcache_dtag)
 728
 729
 730 /*
 731  * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
 732  * structure (see cheetahregs.h):
 733  * The Icache *Must* be turned off when this function is called.
 734  * This is because diagnostic accesses to the Icache interfere with cache
 735  * consistency.
 736  */
 737         .align  128
 738         ENTRY(get_icache_dtag)
 739         rdpr    %pstate, %o5
 740         andn    %o5, PSTATE_IE | PSTATE_AM, %o3
 741         wrpr    %g0, %o3, %pstate
 742
 743         stx     %o0, [%o1 + CH_IC_IDX]
 744         ldxa    [%o0]ASI_IC_TAG, %o2
 745         stx     %o2, [%o1 + CH_IC_PATAG]
 746         add     %o0, CH_ICTAG_UTAG, %o0
 747         ldxa    [%o0]ASI_IC_TAG, %o2
 748         add     %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
 749         stx     %o2, [%o1 + CH_IC_UTAG]
 750         ldxa    [%o0]ASI_IC_TAG, %o2
 751         add     %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
 752         stx     %o2, [%o1 + CH_IC_UPPER]
 753         ldxa    [%o0]ASI_IC_TAG, %o2
 754         andn    %o0, CH_ICTAG_TMASK, %o0
 755         stx     %o2, [%o1 + CH_IC_LOWER]
 756         ldxa    [%o0]ASI_IC_SNP_TAG, %o2
 757         stx     %o2, [%o1 + CH_IC_SNTAG]
 758         add     %o1, CH_IC_DATA, %o1
 759         clr     %o3
 760 2:
 761         ldxa    [%o0 + %o3]ASI_IC_DATA, %o2
 762         stx     %o2, [%o1 + %o3]
 763         cmp     %o3, PN_IC_DATA_REG_SIZE - 8
 764         blt     2b
 765           add   %o3, 8, %o3
 766
 767         retl
 768           wrpr  %g0, %o5, %pstate
 769         SET_SIZE(get_icache_dtag)
 770
 771 /*
 772  * Get pcache data and tags.
 773  * inputs:
 774  *   pcache_idx - fully constructed VA for for accessing P$ diagnostic
 775  *                registers. Contains PC_way and PC_addr shifted into
 776  *                the correct bit positions. See the PRM for more details.
 777  *   data       - pointer to a ch_pc_data_t
 778  * structure (see cheetahregs.h):
 779  */
 780         .align  128
 781         ENTRY(get_pcache_dtag)
 782         rdpr    %pstate, %o5
 783         andn    %o5, PSTATE_IE | PSTATE_AM, %o3
 784         wrpr    %g0, %o3, %pstate
 785
 786         stx     %o0, [%o1 + CH_PC_IDX]
 787         ldxa    [%o0]ASI_PC_STATUS_DATA, %o2
 788         stx     %o2, [%o1 + CH_PC_STATUS]
 789         ldxa    [%o0]ASI_PC_TAG, %o2
 790         stx     %o2, [%o1 + CH_PC_TAG]
 791         ldxa    [%o0]ASI_PC_SNP_TAG, %o2
 792         stx     %o2, [%o1 + CH_PC_SNTAG]
 793         add     %o1, CH_PC_DATA, %o1
 794         clr     %o3
 795 2:
 796         ldxa    [%o0 + %o3]ASI_PC_DATA, %o2
 797         stx     %o2, [%o1 + %o3]
 798         cmp     %o3, CH_PC_DATA_REG_SIZE - 8
 799         blt     2b
 800           add   %o3, 8, %o3
 801
 802         retl
 803           wrpr  %g0, %o5, %pstate
 804         SET_SIZE(get_pcache_dtag)
 805
 806 #endif  /* CPU_IMP_L1_CACHE_PARITY */
 807
 808 /*
 809  * re-enable the i$, d$, w$, and p$ according to bootup cache state.
 810  * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
 811  *   %o0 - 64 bit constant
 812  */
 813         ENTRY(set_dcu)
 814         stxa    %o0, [%g0]ASI_DCU       ! Store to DCU
 815         flush   %g0     /* flush required after changing the IC bit */
 816         retl
 817         nop
 818         SET_SIZE(set_dcu)
 819
 820
 821 /*
 822  * Return DCU register.
 823  */
 824         ENTRY(get_dcu)
 825         ldxa    [%g0]ASI_DCU, %o0               /* DCU control register */
 826         retl
 827         nop
 828         SET_SIZE(get_dcu)
 829
 830 /*
 831  * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
 832  *
 833  * This handler is used to check for softints generated by error trap
 834  * handlers to report errors.  On Cheetah, this mechanism is used by the
 835  * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
 836  * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
 837  * NB: Must be 8 instructions or less to fit in trap table and code must
 838  *     be relocatable.
 839  */
 840
 841         ENTRY_NP(ch_pil15_interrupt_instr)
 842         ASM_JMP(%g1, ch_pil15_interrupt)
 843         SET_SIZE(ch_pil15_interrupt_instr)
 844
 845
 846         ENTRY_NP(ch_pil15_interrupt)
 847
 848         /*
 849          * Since pil_interrupt is hacked to assume that every level 15
 850          * interrupt is generated by the CPU to indicate a performance
 851          * counter overflow this gets ugly.  Before calling pil_interrupt
 852          * the Error at TL>0 pending status is inspected.  If it is
 853          * non-zero, then an error has occurred and it is handled.
 854          * Otherwise control is transfered to pil_interrupt.  Note that if
 855          * an error is detected pil_interrupt will not be called and
 856          * overflow interrupts may be lost causing erroneous performance
 857          * measurements.  However, error-recovery will have a detrimental
 858          * effect on performance anyway.
 859          */
 860         CPU_INDEX(%g1, %g4)
 861         set     ch_err_tl1_pending, %g4
 862         ldub    [%g1 + %g4], %g2
 863         brz     %g2, 1f
 864           nop
 865
 866         /*
 867          * We have a pending TL>0 error, clear the TL>0 pending status.
 868          */
 869         stb     %g0, [%g1 + %g4]
 870
 871         /*
 872          * Clear the softint.
 873          */
 874         mov     1, %g5
 875         sll     %g5, PIL_15, %g5
 876         wr      %g5, CLEAR_SOFTINT
 877
 878         /*
 879          * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
 880          * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
 881          * panic flag (%g2).
 882          */
 883         set     cpu_tl1_error, %g1
 884         clr     %g2
 885         ba      sys_trap
 886           mov   PIL_15, %g4
 887
 888 1:
 889         /*
 890          * The logout is invalid.
 891          *
 892          * Call the default interrupt handler.
 893          */
 894         sethi   %hi(pil_interrupt), %g1
 895         jmp     %g1 + %lo(pil_interrupt)
 896           mov   PIL_15, %g4
 897
 898         SET_SIZE(ch_pil15_interrupt)
 899
 900
 901 /*
 902  * Error Handling
 903  *
 904  * Cheetah provides error checking for all memory access paths between
 905  * the CPU, External Cache, Cheetah Data Switch and system bus. Error
 906  * information is logged in the AFSR, (also AFSR_EXT for Panther) and
 907  * AFAR and one of the following traps is generated (provided that it
 908  * is enabled in External Cache Error Enable Register) to handle that
 909  * error:
 910  * 1. trap 0x70: Precise trap
 911  *    tt0_fecc for errors at trap level(TL)>=0
 912  * 2. trap 0x0A and 0x32: Deferred trap
 913  *    async_err for errors at TL>=0
 914  * 3. trap 0x63: Disrupting trap
 915  *    ce_err for errors at TL=0
 916  *    (Note that trap 0x63 cannot happen at trap level > 0)
 917  *
 918  * Trap level one handlers panic the system except for the fast ecc
 919  * error handler which tries to recover from certain errors.
 920  */
 921
 922 /*
 923  * FAST ECC TRAP STRATEGY:
 924  *
 925  * Software must handle single and multi bit errors which occur due to data
 926  * or instruction cache reads from the external cache. A single or multi bit
 927  * error occuring in one of these situations results in a precise trap.
 928  *
 929  * The basic flow of this trap handler is as follows:
 930  *
 931  * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
 932  *    is disabled because bad data could have been installed.  The Icache is
 933  *    turned off because we want to capture the Icache line related to the
 934  *    AFAR.
 935  * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
 936  * 3) Park sibling core if caches are shared (to avoid race condition while
 937  *    accessing shared resources such as L3 data staging register during
 938  *    CPU logout.
 939  * 4) Read the AFAR and AFSR.
 940  * 5) If CPU logout structure is not being used, then:
 941  *    6) Clear all errors from the AFSR.
 942  *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
 943  *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
 944  *       state.
 945  *    9) Unpark sibling core if we parked it earlier.
 946  *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
 947  *        running at PIL 15.
 948  * 6) Otherwise, if CPU logout structure is being used:
 949  *    7) Incriment the "logout busy count".
 950  *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
 951  *       state.
 952  *    9) Unpark sibling core if we parked it earlier.
 953  *    10) Issue a retry since the other CPU error logging code will end up
 954  *       finding this error bit and logging information about it later.
 955  * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
 956  *    yet initialized such that we can't even check the logout struct, then
 957  *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
 958  *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
 959  *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
 960  *    in the high level trap handler since we don't have access to detailed
 961  *    logout information in cases where the cpu_private struct is not yet
 962  *    initialized.
 963  *
 964  * We flush the E$ and D$ here on TL=1 code to prevent getting nested
 965  * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
 966  * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
 967  * since it is uses different code/data from this handler, has a better
 968  * chance of fixing things up than simply recursing through this code
 969  * again (this would probably cause an eventual kernel stack overflow).
 970  * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
 971  * can flush the E$ (or the error is a stuck-at bit), we will recurse in
 972  * the Fast ECC at TL>0 handler and eventually Red Mode.
 973  *
 974  * Note that for Cheetah (and only Cheetah), we use alias addresses for
 975  * flushing rather than ASI accesses (which don't exist on Cheetah).
 976  * Should we encounter a Fast ECC error within this handler on Cheetah,
 977  * there's a good chance it's within the ecache_flushaddr buffer (since
 978  * it's the largest piece of memory we touch in the handler and it is
 979  * usually kernel text/data).  For that reason the Fast ECC at TL>0
 980  * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
 981  */
 982
 983 /*
 984  * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
 985  * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
 986  * architecture-specific files.
 987  * NB: Must be 8 instructions or less to fit in trap table and code must
 988  *     be relocatable.
 989  */
 990
 991         ENTRY_NP(fecc_err_instr)
 992         membar  #Sync                   ! Cheetah requires membar #Sync
 993
 994         /*
 995          * Save current DCU state.  Turn off the Dcache and Icache.
 996          */
 997         ldxa    [%g0]ASI_DCU, %g1       ! save DCU in %g1
 998         andn    %g1, DCU_DC + DCU_IC, %g4
 999         stxa    %g4, [%g0]ASI_DCU
1000         flush   %g0     /* flush required after changing the IC bit */
1001
1002         ASM_JMP(%g4, fast_ecc_err)
1003         SET_SIZE(fecc_err_instr)
1004
1005
1006 #if !(defined(JALAPENO) || defined(SERRANO))
1007
1008         .section ".text"
1009         .align  64
1010         ENTRY_NP(fast_ecc_err)
1011
1012         /*
1013          * Turn off CEEN and NCEEN.
1014          */
1015         ldxa    [%g0]ASI_ESTATE_ERR, %g3
1016         andn    %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1017         stxa    %g4, [%g0]ASI_ESTATE_ERR
1018         membar  #Sync                   ! membar sync required
1019
1020         /*
1021          * Check to see whether we need to park our sibling core
1022          * before recording diagnostic information from caches
1023          * which may be shared by both cores.
1024          * We use %g1 to store information about whether or not
1025          * we had to park the core (%g1 holds our DCUCR value and
1026          * we only use bits from that register which are "reserved"
1027          * to keep track of core parking) so that we know whether
1028          * or not to unpark later. %g5 and %g4 are scratch registers.
1029          */
1030         PARK_SIBLING_CORE(%g1, %g5, %g4)
1031
1032         /*
1033          * Do the CPU log out capture.
1034          *   %g3 = "failed?" return value.
1035          *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1036          *         into this macro via %g4. Output only valid if cpu_private
1037          *         struct has not been initialized.
1038          *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1039          *   %g4 = Trap information stored in the cpu logout flags field
1040          *   %g5 = scr1
1041          *   %g6 = scr2
1042          *   %g3 = scr3
1043          *   %g4 = scr4
1044          */
1045          /* store the CEEN and NCEEN values, TL=0 */
1046         and     %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1047         set     CHPR_FECCTL0_LOGOUT, %g6
1048         DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1049
1050         /*
1051          * Flush the Ecache (and L2 cache for Panther) to get the error out
1052          * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1053          * following flush will turn that into a WDC or WDU, respectively.
1054          */
1055         PN_L2_FLUSHALL(%g4, %g5, %g6)
1056
1057         CPU_INDEX(%g4, %g5)
1058         mulx    %g4, CPU_NODE_SIZE, %g4
1059         set     cpunodes, %g5
1060         add     %g4, %g5, %g4
1061         ld      [%g4 + ECACHE_LINESIZE], %g5
1062         ld      [%g4 + ECACHE_SIZE], %g4
1063
1064         ASM_LDX(%g6, ecache_flushaddr)
1065         ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1066
1067         /*
1068          * Flush the Dcache.  Since bad data could have been installed in
1069          * the Dcache we must flush it before re-enabling it.
1070          */
1071         ASM_LD(%g5, dcache_size)
1072         ASM_LD(%g6, dcache_linesize)
1073         CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1074
1075         /*
1076          * Flush the Icache.  Since we turned off the Icache to capture the
1077          * Icache line it is now stale or corrupted and we must flush it
1078          * before re-enabling it.
1079          */
1080         GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1081         ld      [%g5 + CHPR_ICACHE_LINESIZE], %g6
1082         ba,pt   %icc, 6f
1083           ld    [%g5 + CHPR_ICACHE_SIZE], %g5
1084 fast_ecc_err_5:
1085         ASM_LD(%g5, icache_size)
1086         ASM_LD(%g6, icache_linesize)
1087 6:
1088         CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1089
1090         /*
1091          * check to see whether we parked our sibling core at the start
1092          * of this handler. If so, we need to unpark it here.
1093          * We use DCUCR reserved bits (stored in %g1) to keep track of
1094          * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1095          */
1096         UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1097
1098         /*
1099          * Restore the Dcache and Icache to the previous state.
1100          */
1101         stxa    %g1, [%g0]ASI_DCU
1102         flush   %g0     /* flush required after changing the IC bit */
1103
1104         /*
1105          * Make sure our CPU logout operation was successful.
1106          */
1107         cmp     %g3, %g0
1108         be      8f
1109           nop
1110
1111         /*
1112          * If the logout structure had been busy, how many times have
1113          * we tried to use it and failed (nesting count)? If we have
1114          * already recursed a substantial number of times, then we can
1115          * assume things are not going to get better by themselves and
1116          * so it would be best to panic.
1117          */
1118         cmp     %g3, CLO_NESTING_MAX
1119         blt     7f
1120           nop
1121
1122         call ptl1_panic
1123           mov   PTL1_BAD_ECC, %g1
1124
1125 7:
1126         /*
1127          * Otherwise, if the logout structure was busy but we have not
1128          * nested more times than our maximum value, then we simply
1129          * issue a retry. Our TL=0 trap handler code will check and
1130          * clear the AFSR after it is done logging what is currently
1131          * in the logout struct and handle this event at that time.
1132          */
1133         retry
1134 8:
1135         /*
1136          * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1137          * already at PIL 15.
1138          */
1139         set     cpu_fast_ecc_error, %g1
1140         rdpr    %pil, %g4
1141         cmp     %g4, PIL_14
1142         ba      sys_trap
1143           movl  %icc, PIL_14, %g4
1144
1145         SET_SIZE(fast_ecc_err)
1146
1147 #endif  /* !(JALAPENO || SERRANO) */
1148
1149
1150 /*
1151  * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1152  *
1153  * The basic flow of this trap handler is as follows:
1154  *
1155  * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1156  *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1157  *    will use to save %g1 and %g2.
1158  * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1159  *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1160  *    handler (using the just saved %g1).
1161  * 3) Turn off the Dcache if it was on and save the state of the Dcache
1162  *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1163  *    NB: we don't turn off the Icache because bad data is not installed nor
1164  *        will we be doing any diagnostic accesses.
1165  * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1166  * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1167  *    %tpc, %tnpc, %tstate values previously saved).
1168  * 6) set %tl to %tl - 1.
1169  * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1170  * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1171  * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1172  *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1173  *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1174  *    AFSR_EXT and save the value in ch_err_tl1_data.
1175  * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1176  *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1177  * 11) Flush the Ecache.
1178  *    NB: the Ecache is flushed assuming the largest possible size with
1179  *        the smallest possible line size since access to the cpu_nodes may
1180  *        cause an unrecoverable DTLB miss.
1181  * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1182  * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1183  *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1184  *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1185  *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1186  * 14) Flush and re-enable the Dcache if it was on at step 3.
1187  * 15) Do TRAPTRACE if enabled.
1188  * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1189  * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1190  * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1191  *    event pending flag and call cpu_tl1_error via systrap if set.
1192  * 19) Restore the registers from step 5 and issue retry.
1193  */
1194
1195 /*
1196  * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1197  * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1198  * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1199  * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1200  * NB: Must be 8 instructions or less to fit in trap table and code must
1201  *     be relocatable.
1202  */
1203
1204         ENTRY_NP(fecc_err_tl1_instr)
1205         CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1206         SET_SIZE(fecc_err_tl1_instr)
1207
1208 /*
1209  * Software trap 0 at TL>0.
1210  * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1211  * the various architecture-specific files.  This is used as a continuation
1212  * of the fast ecc handling where we've bought an extra TL level, so we can
1213  * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1214  * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1215  * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1216  * order two bits from %g1 and %g2 respectively).
1217  * NB: Must be 8 instructions or less to fit in trap table and code must
1218  *     be relocatable.
1219  */
1220
1221         ENTRY_NP(fecc_err_tl1_cont_instr)
1222         CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1223         SET_SIZE(fecc_err_tl1_cont_instr)
1224
1225
1226 /*
1227  * The ce_err function handles disrupting trap type 0x63 at TL=0.
1228  *
1229  * AFSR errors bits which cause this trap are:
1230  *      CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1231  *
1232  * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1233  * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1234  *
1235  * CEEN Bit of Cheetah External Cache Error Enable Register enables
1236  * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1237  *
1238  * Cheetah+ also handles (No additional processing required):
1239  *    DUE, DTO, DBERR   (NCEEN controlled)
1240  *    THCE              (CEEN and ET_ECC_en controlled)
1241  *    TUE               (ET_ECC_en controlled)
1242  *
1243  * Panther further adds:
1244  *    IMU, L3_EDU, L3_WDU, L3_CPU               (NCEEN controlled)
1245  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE      (CEEN controlled)
1246  *    TUE_SH, TUE               (NCEEN and L2_tag_ECC_en controlled)
1247  *    L3_TUE, L3_TUE_SH         (NCEEN and ET_ECC_en controlled)
1248  *    THCE                      (CEEN and L2_tag_ECC_en controlled)
1249  *    L3_THCE                   (CEEN and ET_ECC_en controlled)
1250  *
1251  * Steps:
1252  *      1. Disable hardware corrected disrupting errors only (CEEN)
1253  *      2. Park sibling core if caches are shared (to avoid race
1254  *         condition while accessing shared resources such as L3
1255  *         data staging register during CPU logout.
1256  *      3. If the CPU logout structure is not currently being used:
1257  *              4. Clear AFSR error bits
1258  *              5. Capture Ecache, Dcache and Icache lines associated
1259  *                 with AFAR.
1260  *              6. Unpark sibling core if we parked it earlier.
1261  *              7. call cpu_disrupting_error via sys_trap at PIL 14
1262  *                 unless we're already running at PIL 15.
1263  *      4. Otherwise, if the CPU logout structure is busy:
1264  *              5. Incriment "logout busy count" and place into %g3
1265  *              6. Unpark sibling core if we parked it earlier.
1266  *              7. Issue a retry since the other CPU error logging
1267  *                 code will end up finding this error bit and logging
1268  *                 information about it later.
1269  *      5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1270  *         not yet initialized such that we can't even check the logout
1271  *         struct, then we place the clo_flags data into %g2
1272  *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1273  *         systrap. The clo_flags parameter is used to determine information
1274  *         such as TL, TT, CEEN settings, etc in the high level trap
1275  *         handler since we don't have access to detailed logout information
1276  *         in cases where the cpu_private struct is not yet initialized.
1277  *
1278  * %g3: [ logout busy count ] - arg #2
1279  * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1280  */
1281
1282         .align  128
1283         ENTRY_NP(ce_err)
1284         membar  #Sync                   ! Cheetah requires membar #Sync
1285
1286         /*
1287          * Disable trap on hardware corrected errors (CEEN) while at TL=0
1288          * to prevent recursion.
1289          */
1290         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1291         bclr    EN_REG_CEEN, %g1
1292         stxa    %g1, [%g0]ASI_ESTATE_ERR
1293         membar  #Sync                   ! membar sync required
1294
1295         /*
1296          * Save current DCU state.  Turn off Icache to allow capture of
1297          * Icache data by DO_CPU_LOGOUT.
1298          */
1299         ldxa    [%g0]ASI_DCU, %g1       ! save DCU in %g1
1300         andn    %g1, DCU_IC, %g4
1301         stxa    %g4, [%g0]ASI_DCU
1302         flush   %g0     /* flush required after changing the IC bit */
1303
1304         /*
1305          * Check to see whether we need to park our sibling core
1306          * before recording diagnostic information from caches
1307          * which may be shared by both cores.
1308          * We use %g1 to store information about whether or not
1309          * we had to park the core (%g1 holds our DCUCR value and
1310          * we only use bits from that register which are "reserved"
1311          * to keep track of core parking) so that we know whether
1312          * or not to unpark later. %g5 and %g4 are scratch registers.
1313          */
1314         PARK_SIBLING_CORE(%g1, %g5, %g4)
1315
1316         /*
1317          * Do the CPU log out capture.
1318          *   %g3 = "failed?" return value.
1319          *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1320          *         into this macro via %g4. Output only valid if cpu_private
1321          *         struct has not been initialized.
1322          *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1323          *   %g4 = Trap information stored in the cpu logout flags field
1324          *   %g5 = scr1
1325          *   %g6 = scr2
1326          *   %g3 = scr3
1327          *   %g4 = scr4
1328          */
1329         clr     %g4                     ! TL=0 bit in afsr
1330         set     CHPR_CECC_LOGOUT, %g6
1331         DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1332
1333         /*
1334          * Flush the Icache.  Since we turned off the Icache to capture the
1335          * Icache line it is now stale or corrupted and we must flush it
1336          * before re-enabling it.
1337          */
1338         GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1339         ld      [%g5 + CHPR_ICACHE_LINESIZE], %g6
1340         ba,pt   %icc, 2f
1341           ld    [%g5 + CHPR_ICACHE_SIZE], %g5
1342 ce_err_1:
1343         ASM_LD(%g5, icache_size)
1344         ASM_LD(%g6, icache_linesize)
1345 2:
1346         CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1347
1348         /*
1349          * check to see whether we parked our sibling core at the start
1350          * of this handler. If so, we need to unpark it here.
1351          * We use DCUCR reserved bits (stored in %g1) to keep track of
1352          * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1353          */
1354         UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1355
1356         /*
1357          * Restore Icache to previous state.
1358          */
1359         stxa    %g1, [%g0]ASI_DCU
1360         flush   %g0     /* flush required after changing the IC bit */
1361
1362         /*
1363          * Make sure our CPU logout operation was successful.
1364          */
1365         cmp     %g3, %g0
1366         be      4f
1367           nop
1368
1369         /*
1370          * If the logout structure had been busy, how many times have
1371          * we tried to use it and failed (nesting count)? If we have
1372          * already recursed a substantial number of times, then we can
1373          * assume things are not going to get better by themselves and
1374          * so it would be best to panic.
1375          */
1376         cmp     %g3, CLO_NESTING_MAX
1377         blt     3f
1378           nop
1379
1380         call ptl1_panic
1381           mov   PTL1_BAD_ECC, %g1
1382
1383 3:
1384         /*
1385          * Otherwise, if the logout structure was busy but we have not
1386          * nested more times than our maximum value, then we simply
1387          * issue a retry. Our TL=0 trap handler code will check and
1388          * clear the AFSR after it is done logging what is currently
1389          * in the logout struct and handle this event at that time.
1390          */
1391         retry
1392 4:
1393         /*
1394          * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1395          * already at PIL 15.
1396          */
1397         set     cpu_disrupting_error, %g1
1398         rdpr    %pil, %g4
1399         cmp     %g4, PIL_14
1400         ba      sys_trap
1401           movl  %icc, PIL_14, %g4
1402         SET_SIZE(ce_err)
1403
1404
1405 /*
1406  * This trap cannot happen at TL>0 which means this routine will never
1407  * actually be called and so we treat this like a BAD TRAP panic.
1408  */
1409         .align  64
1410         ENTRY_NP(ce_err_tl1)
1411
1412         call ptl1_panic
1413           mov   PTL1_BAD_TRAP, %g1
1414
1415         SET_SIZE(ce_err_tl1)
1416
1417
1418 /*
1419  * The async_err function handles deferred trap types 0xA
1420  * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1421  *
1422  * AFSR errors bits which cause this trap are:
1423  *      UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1424  * On some platforms, EMU may causes cheetah to pull the error pin
1425  * never giving Solaris a chance to take a trap.
1426  *
1427  * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1428  * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1429  *
1430  * Steps:
1431  *      1. Disable CEEN and NCEEN errors to prevent recursive errors.
1432  *      2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1433  *         I$ line in DO_CPU_LOGOUT.
1434  *      3. Park sibling core if caches are shared (to avoid race
1435  *         condition while accessing shared resources such as L3
1436  *         data staging register during CPU logout.
1437  *      4. If the CPU logout structure is not currently being used:
1438  *              5. Clear AFSR error bits
1439  *              6. Capture Ecache, Dcache and Icache lines associated
1440  *                 with AFAR.
1441  *              7. Unpark sibling core if we parked it earlier.
1442  *              8. call cpu_deferred_error via sys_trap.
1443  *      5. Otherwise, if the CPU logout structure is busy:
1444  *              6. Incriment "logout busy count"
1445  *              7. Unpark sibling core if we parked it earlier.
1446  *              8) Issue a retry since the other CPU error logging
1447  *                 code will end up finding this error bit and logging
1448  *                 information about it later.
1449  *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1450  *         not yet initialized such that we can't even check the logout
1451  *         struct, then we place the clo_flags data into %g2
1452  *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1453  *         systrap. The clo_flags parameter is used to determine information
1454  *         such as TL, TT, CEEN settings, etc in the high level trap handler
1455  *         since we don't have access to detailed logout information in cases
1456  *         where the cpu_private struct is not yet initialized.
1457  *
1458  * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1459  * %g3: [ logout busy count ] - arg #2
1460  */
1461
1462         ENTRY_NP(async_err)
1463         membar  #Sync                   ! Cheetah requires membar #Sync
1464
1465         /*
1466          * Disable CEEN and NCEEN.
1467          */
1468         ldxa    [%g0]ASI_ESTATE_ERR, %g3
1469         andn    %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1470         stxa    %g4, [%g0]ASI_ESTATE_ERR
1471         membar  #Sync                   ! membar sync required
1472
1473         /*
1474          * Save current DCU state.
1475          * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1476          * Do this regardless of whether this is a Data Access Error or
1477          * Instruction Access Error Trap.
1478          * Disable Dcache for both Data Access Error and Instruction Access
1479          * Error per Cheetah PRM P.5 Note 6.
1480          */
1481         ldxa    [%g0]ASI_DCU, %g1       ! save DCU in %g1
1482         andn    %g1, DCU_IC + DCU_DC, %g4
1483         stxa    %g4, [%g0]ASI_DCU
1484         flush   %g0     /* flush required after changing the IC bit */
1485
1486         /*
1487          * Check to see whether we need to park our sibling core
1488          * before recording diagnostic information from caches
1489          * which may be shared by both cores.
1490          * We use %g1 to store information about whether or not
1491          * we had to park the core (%g1 holds our DCUCR value and
1492          * we only use bits from that register which are "reserved"
1493          * to keep track of core parking) so that we know whether
1494          * or not to unpark later. %g6 and %g4 are scratch registers.
1495          */
1496         PARK_SIBLING_CORE(%g1, %g6, %g4)
1497
1498         /*
1499          * Do the CPU logout capture.
1500          *
1501          *   %g3 = "failed?" return value.
1502          *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1503          *         into this macro via %g4. Output only valid if cpu_private
1504          *         struct has not been initialized.
1505          *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1506          *   %g4 = Trap information stored in the cpu logout flags field
1507          *   %g5 = scr1
1508          *   %g6 = scr2
1509          *   %g3 = scr3
1510          *   %g4 = scr4
1511          */
1512         andcc   %g5, T_TL1, %g0
1513         clr     %g6
1514         movnz   %xcc, 1, %g6                    ! set %g6 if T_TL1 set
1515         sllx    %g6, CLO_FLAGS_TL_SHIFT, %g6
1516         sllx    %g5, CLO_FLAGS_TT_SHIFT, %g4
1517         set     CLO_FLAGS_TT_MASK, %g2
1518         and     %g4, %g2, %g4                   ! ttype
1519         or      %g6, %g4, %g4                   ! TT and TL
1520         and     %g3, EN_REG_CEEN, %g3           ! CEEN value
1521         or      %g3, %g4, %g4                   ! TT and TL and CEEN
1522         set     CHPR_ASYNC_LOGOUT, %g6
1523         DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1524
1525         /*
1526          * If the logout struct was busy, we may need to pass the
1527          * TT, TL, and CEEN information to the TL=0 handler via
1528          * systrap parameter so save it off here.
1529          */
1530         cmp     %g3, %g0
1531         be      1f
1532           nop
1533         sllx    %g4, 32, %g4
1534         or      %g4, %g3, %g3
1535 1:
1536         /*
1537          * Flush the Icache.  Since we turned off the Icache to capture the
1538          * Icache line it is now stale or corrupted and we must flush it
1539          * before re-enabling it.
1540          */
1541         GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1542         ld      [%g5 + CHPR_ICACHE_LINESIZE], %g6
1543         ba,pt   %icc, 2f
1544           ld    [%g5 + CHPR_ICACHE_SIZE], %g5
1545 async_err_1:
1546         ASM_LD(%g5, icache_size)
1547         ASM_LD(%g6, icache_linesize)
1548 2:
1549         CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1550
1551         /*
1552          * XXX - Don't we need to flush the Dcache before turning it back
1553          *       on to avoid stale or corrupt data? Was this broken?
1554          */
1555         /*
1556          * Flush the Dcache before turning it back on since it may now
1557          * contain stale or corrupt data.
1558          */
1559         ASM_LD(%g5, dcache_size)
1560         ASM_LD(%g6, dcache_linesize)
1561         CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1562
1563         /*
1564          * check to see whether we parked our sibling core at the start
1565          * of this handler. If so, we need to unpark it here.
1566          * We use DCUCR reserved bits (stored in %g1) to keep track of
1567          * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1568          */
1569         UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1570
1571         /*
1572          * Restore Icache and Dcache to previous state.
1573          */
1574         stxa    %g1, [%g0]ASI_DCU
1575         flush   %g0     /* flush required after changing the IC bit */
1576
1577         /*
1578          * Make sure our CPU logout operation was successful.
1579          */
1580         cmp     %g3, %g0
1581         be      4f
1582           nop
1583
1584         /*
1585          * If the logout structure had been busy, how many times have
1586          * we tried to use it and failed (nesting count)? If we have
1587          * already recursed a substantial number of times, then we can
1588          * assume things are not going to get better by themselves and
1589          * so it would be best to panic.
1590          */
1591         cmp     %g3, CLO_NESTING_MAX
1592         blt     3f
1593           nop
1594
1595         call ptl1_panic
1596           mov   PTL1_BAD_ECC, %g1
1597
1598 3:
1599         /*
1600          * Otherwise, if the logout structure was busy but we have not
1601          * nested more times than our maximum value, then we simply
1602          * issue a retry. Our TL=0 trap handler code will check and
1603          * clear the AFSR after it is done logging what is currently
1604          * in the logout struct and handle this event at that time.
1605          */
1606         retry
1607 4:
1608         RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1609 async_err_resetskip:
1610         set     cpu_deferred_error, %g1
1611         ba      sys_trap
1612           mov   PIL_15, %g4             ! run at pil 15
1613         SET_SIZE(async_err)
1614
1615 #if defined(CPU_IMP_L1_CACHE_PARITY)
1616
1617 /*
1618  * D$ parity error trap (trap 71) at TL=0.
1619  * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1620  * the various architecture-specific files.  This merely sets up the
1621  * arguments for cpu_parity_error and calls it via sys_trap.
1622  * NB: Must be 8 instructions or less to fit in trap table and code must
1623  *     be relocatable.
1624  */
1625         ENTRY_NP(dcache_parity_instr)
1626         membar  #Sync                   ! Cheetah+ requires membar #Sync
1627         set     cpu_parity_error, %g1
1628         or      %g0, CH_ERR_DPE, %g2
1629         rdpr    %tpc, %g3
1630         sethi   %hi(sys_trap), %g7
1631         jmp     %g7 + %lo(sys_trap)
1632           mov   PIL_15, %g4             ! run at pil 15
1633         SET_SIZE(dcache_parity_instr)
1634
1635
1636 /*
1637  * D$ parity error trap (trap 71) at TL>0.
1638  * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1639  * the various architecture-specific files.  This generates a "Software
1640  * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1641  * continue the handling there.
1642  * NB: Must be 8 instructions or less to fit in trap table and code must
1643  *     be relocatable.
1644  */
1645         ENTRY_NP(dcache_parity_tl1_instr)
1646         CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1647         SET_SIZE(dcache_parity_tl1_instr)
1648
1649
1650 /*
1651  * Software trap 1 at TL>0.
1652  * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
1653  * of the various architecture-specific files.  This is used as a continuation
1654  * of the dcache parity handling where we've bought an extra TL level, so we
1655  * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1656  * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1657  * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1658  * order two bits from %g1 and %g2 respectively).
1659  * NB: Must be 8 instructions or less to fit in trap table and code must
1660  *     be relocatable.
1661  */
1662         ENTRY_NP(dcache_parity_tl1_cont_instr)
1663         CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
1664         SET_SIZE(dcache_parity_tl1_cont_instr)
1665
1666 /*
1667  * D$ parity error at TL>0 handler
1668  * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
1669  * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1670  */
1671
1672         ENTRY_NP(dcache_parity_tl1_err)
1673
1674         /*
1675          * This macro saves all the %g registers in the ch_err_tl1_data
1676          * structure, updates the ch_err_tl1_flags and saves the %tpc in
1677          * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
1678          * the ch_err_tl1_data structure and %g2 will have the original
1679          * flags in the ch_err_tl1_data structure.  All %g registers
1680          * except for %g1 and %g2 will be available.
1681          */
1682         CH_ERR_TL1_ENTER(CH_ERR_DPE);
1683
1684 #ifdef TRAPTRACE
1685         /*
1686          * Get current trap trace entry physical pointer.
1687          */
1688         CPU_INDEX(%g6, %g5)
1689         sll     %g6, TRAPTR_SIZE_SHIFT, %g6
1690         set     trap_trace_ctl, %g5
1691         add     %g6, %g5, %g6
1692         ld      [%g6 + TRAPTR_LIMIT], %g5
1693         tst     %g5
1694         be      %icc, dpe_tl1_skip_tt
1695           nop
1696         ldx     [%g6 + TRAPTR_PBASE], %g5
1697         ld      [%g6 + TRAPTR_OFFSET], %g4
1698         add     %g5, %g4, %g5
1699
1700         /*
1701          * Create trap trace entry.
1702          */
1703         rd      %asi, %g7
1704         wr      %g0, TRAPTR_ASI, %asi
1705         rd      STICK, %g4
1706         stxa    %g4, [%g5 + TRAP_ENT_TICK]%asi
1707         rdpr    %tl, %g4
1708         stha    %g4, [%g5 + TRAP_ENT_TL]%asi
1709         rdpr    %tt, %g4
1710         stha    %g4, [%g5 + TRAP_ENT_TT]%asi
1711         rdpr    %tpc, %g4
1712         stna    %g4, [%g5 + TRAP_ENT_TPC]%asi
1713         rdpr    %tstate, %g4
1714         stxa    %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1715         stna    %sp, [%g5 + TRAP_ENT_SP]%asi
1716         stna    %g0, [%g5 + TRAP_ENT_TR]%asi
1717         stna    %g0, [%g5 + TRAP_ENT_F1]%asi
1718         stna    %g0, [%g5 + TRAP_ENT_F2]%asi
1719         stna    %g0, [%g5 + TRAP_ENT_F3]%asi
1720         stna    %g0, [%g5 + TRAP_ENT_F4]%asi
1721         wr      %g0, %g7, %asi
1722
1723         /*
1724          * Advance trap trace pointer.
1725          */
1726         ld      [%g6 + TRAPTR_OFFSET], %g5
1727         ld      [%g6 + TRAPTR_LIMIT], %g4
1728         st      %g5, [%g6 + TRAPTR_LAST_OFFSET]
1729         add     %g5, TRAP_ENT_SIZE, %g5
1730         sub     %g4, TRAP_ENT_SIZE, %g4
1731         cmp     %g5, %g4
1732         movge   %icc, 0, %g5
1733         st      %g5, [%g6 + TRAPTR_OFFSET]
1734 dpe_tl1_skip_tt:
1735 #endif  /* TRAPTRACE */
1736
1737         /*
1738          * I$ and D$ are automatically turned off by HW when the CPU hits
1739          * a dcache or icache parity error so we will just leave those two
1740          * off for now to avoid repeating this trap.
1741          * For Panther, however, since we trap on P$ data parity errors
1742          * and HW does not automatically disable P$, we need to disable it
1743          * here so that we don't encounter any recursive traps when we
1744          * issue the retry.
1745          */
1746         ldxa    [%g0]ASI_DCU, %g3
1747         mov     1, %g4
1748         sllx    %g4, DCU_PE_SHIFT, %g4
1749         andn    %g3, %g4, %g3
1750         stxa    %g3, [%g0]ASI_DCU
1751         membar  #Sync
1752
1753         /*
1754          * We fall into this macro if we've successfully logged the error in
1755          * the ch_err_tl1_data structure and want the PIL15 softint to pick
1756          * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
1757          * Restores the %g registers and issues retry.
1758          */
1759         CH_ERR_TL1_EXIT;
1760         SET_SIZE(dcache_parity_tl1_err)
1761
1762 /*
1763  * I$ parity error trap (trap 72) at TL=0.
1764  * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
1765  * the various architecture-specific files.  This merely sets up the
1766  * arguments for cpu_parity_error and calls it via sys_trap.
1767  * NB: Must be 8 instructions or less to fit in trap table and code must
1768  *     be relocatable.
1769  */
1770
1771         ENTRY_NP(icache_parity_instr)
1772         membar  #Sync                   ! Cheetah+ requires membar #Sync
1773         set     cpu_parity_error, %g1
1774         or      %g0, CH_ERR_IPE, %g2
1775         rdpr    %tpc, %g3
1776         sethi   %hi(sys_trap), %g7
1777         jmp     %g7 + %lo(sys_trap)
1778           mov   PIL_15, %g4             ! run at pil 15
1779         SET_SIZE(icache_parity_instr)
1780
1781 /*
1782  * I$ parity error trap (trap 72) at TL>0.
1783  * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
1784  * the various architecture-specific files.  This generates a "Software
1785  * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
1786  * continue the handling there.
1787  * NB: Must be 8 instructions or less to fit in trap table and code must
1788  *     be relocatable.
1789  */
1790         ENTRY_NP(icache_parity_tl1_instr)
1791         CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
1792         SET_SIZE(icache_parity_tl1_instr)
1793
1794 /*
1795  * Software trap 2 at TL>0.
1796  * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
1797  * of the various architecture-specific files.  This is used as a continuation
1798  * of the icache parity handling where we've bought an extra TL level, so we
1799  * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1800  * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1801  * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1802  * order two bits from %g1 and %g2 respectively).
1803  * NB: Must be 8 instructions or less to fit in trap table and code must
1804  *     be relocatable.
1805  */
1806         ENTRY_NP(icache_parity_tl1_cont_instr)
1807         CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
1808         SET_SIZE(icache_parity_tl1_cont_instr)
1809
1810
1811 /*
1812  * I$ parity error at TL>0 handler
1813  * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
1814  * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1815  */
1816
1817         ENTRY_NP(icache_parity_tl1_err)
1818
1819         /*
1820          * This macro saves all the %g registers in the ch_err_tl1_data
1821          * structure, updates the ch_err_tl1_flags and saves the %tpc in
1822          * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
1823          * the ch_err_tl1_data structure and %g2 will have the original
1824          * flags in the ch_err_tl1_data structure.  All %g registers
1825          * except for %g1 and %g2 will be available.
1826          */
1827         CH_ERR_TL1_ENTER(CH_ERR_IPE);
1828
1829 #ifdef TRAPTRACE
1830         /*
1831          * Get current trap trace entry physical pointer.
1832          */
1833         CPU_INDEX(%g6, %g5)
1834         sll     %g6, TRAPTR_SIZE_SHIFT, %g6
1835         set     trap_trace_ctl, %g5
1836         add     %g6, %g5, %g6
1837         ld      [%g6 + TRAPTR_LIMIT], %g5
1838         tst     %g5
1839         be      %icc, ipe_tl1_skip_tt
1840           nop
1841         ldx     [%g6 + TRAPTR_PBASE], %g5
1842         ld      [%g6 + TRAPTR_OFFSET], %g4
1843         add     %g5, %g4, %g5
1844
1845         /*
1846          * Create trap trace entry.
1847          */
1848         rd      %asi, %g7
1849         wr      %g0, TRAPTR_ASI, %asi
1850         rd      STICK, %g4
1851         stxa    %g4, [%g5 + TRAP_ENT_TICK]%asi
1852         rdpr    %tl, %g4
1853         stha    %g4, [%g5 + TRAP_ENT_TL]%asi
1854         rdpr    %tt, %g4
1855         stha    %g4, [%g5 + TRAP_ENT_TT]%asi
1856         rdpr    %tpc, %g4
1857         stna    %g4, [%g5 + TRAP_ENT_TPC]%asi
1858         rdpr    %tstate, %g4
1859         stxa    %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1860         stna    %sp, [%g5 + TRAP_ENT_SP]%asi
1861         stna    %g0, [%g5 + TRAP_ENT_TR]%asi
1862         stna    %g0, [%g5 + TRAP_ENT_F1]%asi
1863         stna    %g0, [%g5 + TRAP_ENT_F2]%asi
1864         stna    %g0, [%g5 + TRAP_ENT_F3]%asi
1865         stna    %g0, [%g5 + TRAP_ENT_F4]%asi
1866         wr      %g0, %g7, %asi
1867
1868         /*
1869          * Advance trap trace pointer.
1870          */
1871         ld      [%g6 + TRAPTR_OFFSET], %g5
1872         ld      [%g6 + TRAPTR_LIMIT], %g4
1873         st      %g5, [%g6 + TRAPTR_LAST_OFFSET]
1874         add     %g5, TRAP_ENT_SIZE, %g5
1875         sub     %g4, TRAP_ENT_SIZE, %g4
1876         cmp     %g5, %g4
1877         movge   %icc, 0, %g5
1878         st      %g5, [%g6 + TRAPTR_OFFSET]
1879 ipe_tl1_skip_tt:
1880 #endif  /* TRAPTRACE */
1881
1882         /*
1883          * We fall into this macro if we've successfully logged the error in
1884          * the ch_err_tl1_data structure and want the PIL15 softint to pick
1885          * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
1886          * Restores the %g registers and issues retry.
1887          */
1888         CH_ERR_TL1_EXIT;
1889
1890         SET_SIZE(icache_parity_tl1_err)
1891
1892 #endif  /* CPU_IMP_L1_CACHE_PARITY */
1893
1894
1895 /*
1896  * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1897  * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1898  * should only be used in places where you have no choice but to look at the
1899  * tlb itself.
1900  *
1901  * Note: These two routines are required by the Estar "cpr" loadable module.
1902  */
1903
1904         ENTRY_NP(itlb_rd_entry)
1905         sllx    %o0, 3, %o0
1906         ldxa    [%o0]ASI_ITLB_ACCESS, %g1
1907         stx     %g1, [%o1]
1908         ldxa    [%o0]ASI_ITLB_TAGREAD, %g2
1909         set     TAGREAD_CTX_MASK, %o4
1910         andn    %g2, %o4, %o5
1911         retl
1912           stx   %o5, [%o2]
1913         SET_SIZE(itlb_rd_entry)
1914
1915
1916         ENTRY_NP(dtlb_rd_entry)
1917         sllx    %o0, 3, %o0
1918         ldxa    [%o0]ASI_DTLB_ACCESS, %g1
1919         stx     %g1, [%o1]
1920         ldxa    [%o0]ASI_DTLB_TAGREAD, %g2
1921         set     TAGREAD_CTX_MASK, %o4
1922         andn    %g2, %o4, %o5
1923         retl
1924           stx   %o5, [%o2]
1925         SET_SIZE(dtlb_rd_entry)
1926
1927
1928 #if !(defined(JALAPENO) || defined(SERRANO))
1929
1930         ENTRY(get_safari_config)
1931         ldxa    [%g0]ASI_SAFARI_CONFIG, %o0
1932         retl
1933         nop
1934         SET_SIZE(get_safari_config)
1935
1936
1937         ENTRY(set_safari_config)
1938         stxa    %o0, [%g0]ASI_SAFARI_CONFIG
1939         membar  #Sync
1940         retl
1941         nop
1942         SET_SIZE(set_safari_config)
1943
1944 #endif  /* !(JALAPENO || SERRANO) */
1945
1946
1947         /*
1948          * Clear the NPT (non-privileged trap) bit in the %tick/%stick
1949          * registers. In an effort to make the change in the
1950          * tick/stick counter as consistent as possible, we disable
1951          * all interrupts while we're changing the registers. We also
1952          * ensure that the read and write instructions are in the same
1953          * line in the instruction cache.
1954          */
1955         ENTRY_NP(cpu_clearticknpt)
1956         rdpr    %pstate, %g1            /* save processor state */
1957         andn    %g1, PSTATE_IE, %g3     /* turn off */
1958         wrpr    %g0, %g3, %pstate       /*   interrupts */
1959         rdpr    %tick, %g2              /* get tick register */
1960         brgez,pn %g2, 1f                /* if NPT bit off, we're done */
1961         mov     1, %g3                  /* create mask */
1962         sllx    %g3, 63, %g3            /*   for NPT bit */
1963         ba,a,pt %xcc, 2f
1964         .align  8                       /* Ensure rd/wr in same i$ line */
1965 2:
1966         rdpr    %tick, %g2              /* get tick register */
1967         wrpr    %g3, %g2, %tick         /* write tick register, */
1968                                         /*   clearing NPT bit   */
1969 1:
1970         rd      STICK, %g2              /* get stick register */
1971         brgez,pn %g2, 3f                /* if NPT bit off, we're done */
1972         mov     1, %g3                  /* create mask */
1973         sllx    %g3, 63, %g3            /*   for NPT bit */
1974         ba,a,pt %xcc, 4f
1975         .align  8                       /* Ensure rd/wr in same i$ line */
1976 4:
1977         rd      STICK, %g2              /* get stick register */
1978         wr      %g3, %g2, STICK         /* write stick register, */
1979                                         /*   clearing NPT bit   */
1980 3:
1981         jmp     %g4 + 4
1982         wrpr    %g0, %g1, %pstate       /* restore processor state */
1983
1984         SET_SIZE(cpu_clearticknpt)
1985
1986
1987 #if defined(CPU_IMP_L1_CACHE_PARITY)
1988
1989 /*
1990  * correct_dcache_parity(size_t size, size_t linesize)
1991  *
1992  * Correct D$ data parity by zeroing the data and initializing microtag
1993  * for all indexes and all ways of the D$.
1994  *
1995  */
1996         ENTRY(correct_dcache_parity)
1997         /*
1998          * Register Usage:
1999          *
2000          * %o0 = input D$ size
2001          * %o1 = input D$ line size
2002          * %o2 = scratch
2003          * %o3 = scratch
2004          * %o4 = scratch
2005          */
2006
2007         sub     %o0, %o1, %o0                   ! init cache line address
2008
2009         /*
2010          * For Panther CPUs, we also need to clear the data parity bits
2011          * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2012          */
2013         GET_CPU_IMPL(%o3)
2014         cmp     %o3, PANTHER_IMPL
2015         bne     1f
2016           clr   %o3                             ! zero for non-Panther
2017         mov     1, %o3
2018         sll     %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2019
2020 1:
2021         /*
2022          * Set utag = way since it must be unique within an index.
2023          */
2024         srl     %o0, 14, %o2                    ! get cache way (DC_way)
2025         membar  #Sync                           ! required before ASI_DC_UTAG
2026         stxa    %o2, [%o0]ASI_DC_UTAG           ! set D$ utag = cache way
2027         membar  #Sync                           ! required after ASI_DC_UTAG
2028
2029         /*
2030          * Zero line of D$ data (and data parity bits for Panther)
2031          */
2032         sub     %o1, 8, %o2
2033         or      %o0, %o3, %o4                   ! same address + DC_data_parity
2034 2:
2035         membar  #Sync                           ! required before ASI_DC_DATA
2036         stxa    %g0, [%o0 + %o2]ASI_DC_DATA     ! zero 8 bytes of D$ data
2037         membar  #Sync                           ! required after ASI_DC_DATA
2038         /*
2039          * We also clear the parity bits if this is a panther. For non-Panther
2040          * CPUs, we simply end up clearing the $data register twice.
2041          */
2042         stxa    %g0, [%o4 + %o2]ASI_DC_DATA
2043         membar  #Sync
2044
2045         subcc   %o2, 8, %o2
2046         bge     2b
2047         nop
2048
2049         subcc   %o0, %o1, %o0
2050         bge     1b
2051         nop
2052
2053         retl
2054           nop
2055         SET_SIZE(correct_dcache_parity)
2056
2057 #endif  /* CPU_IMP_L1_CACHE_PARITY */
2058
2059
2060         ENTRY_NP(stick_timestamp)
2061         rd      STICK, %g1      ! read stick reg
2062         sllx    %g1, 1, %g1
2063         srlx    %g1, 1, %g1     ! clear npt bit
2064
2065         retl
2066         stx     %g1, [%o0]      ! store the timestamp
2067         SET_SIZE(stick_timestamp)
2068
2069
2070         ENTRY_NP(stick_adj)
2071         rdpr    %pstate, %g1            ! save processor state
2072         andn    %g1, PSTATE_IE, %g3
2073         ba      1f                      ! cache align stick adj
2074         wrpr    %g0, %g3, %pstate       ! turn off interrupts
2075
2076         .align  16
2077 1:      nop
2078
2079         rd      STICK, %g4              ! read stick reg
2080         add     %g4, %o0, %o1           ! adjust stick with skew
2081         wr      %o1, %g0, STICK         ! write stick reg
2082
2083         retl
2084         wrpr    %g1, %pstate            ! restore processor state
2085         SET_SIZE(stick_adj)
2086
2087         ENTRY_NP(kdi_get_stick)
2088         rd      STICK, %g1
2089         stx     %g1, [%o0]
2090         retl
2091         mov     %g0, %o0
2092         SET_SIZE(kdi_get_stick)
2093
2094 /*
2095  * Invalidate the specified line from the D$.
2096  *
2097  * Register usage:
2098  *      %o0 - index for the invalidation, specifies DC_way and DC_addr
2099  *
2100  * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2101  * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2102  *
2103  * The format of the stored 64-bit value is:
2104  *
2105  *      +----------+--------+----------+
2106  *      | Reserved | DC_tag | DC_valid |
2107  *      +----------+--------+----------+
2108  *       63      31 30     1          0
2109  *
2110  * DC_tag is the 30-bit physical tag of the associated line.
2111  * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2112  *
2113  * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2114  *
2115  *      +----------+--------+----------+----------+
2116  *      | Reserved | DC_way | DC_addr  | Reserved |
2117  *      +----------+--------+----------+----------+
2118  *       63      16 15    14 13       5 4        0
2119  *
2120  * DC_way is a 2-bit index that selects one of the 4 ways.
2121  * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2122  *
2123  * Setting the DC_valid bit to zero for the specified DC_way and
2124  * DC_addr index into the D$ results in an invalidation of a D$ line.
2125  */
2126         ENTRY(dcache_inval_line)
2127         sll     %o0, 5, %o0             ! shift index into DC_way and DC_addr
2128         stxa    %g0, [%o0]ASI_DC_TAG    ! zero the DC_valid and DC_tag bits
2129         membar  #Sync
2130         retl
2131         nop
2132         SET_SIZE(dcache_inval_line)
2133
2134 /*
2135  * Invalidate the entire I$
2136  *
2137  * Register usage:
2138  *      %o0 - specifies IC_way, IC_addr, IC_tag
2139  *      %o1 - scratch
2140  *      %o2 - used to save and restore DCU value
2141  *      %o3 - scratch
2142  *      %o5 - used to save and restore PSTATE
2143  *
2144  * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2145  * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2146  * block out snoops and invalidates to the I$, causing I$ consistency
2147  * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2148  *
2149  * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2150  * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2151  * info below describes store (write) use of ASI_IC_TAG. Note that read
2152  * use of ASI_IC_TAG behaves differently.
2153  *
2154  * The format of the stored 64-bit value is:
2155  *
2156  *      +----------+--------+---------------+-----------+
2157  *      | Reserved | Valid  | IC_vpred<7:0> | Undefined |
2158  *      +----------+--------+---------------+-----------+
2159  *       63      55    54    53           46 45        0
2160  *
2161  * Valid is the 1-bit valid field for both the physical and snoop tags.
2162  * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2163  *      the 32-byte boundary aligned address specified by IC_addr.
2164  *
2165  * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2166  *
2167  *      +----------+--------+---------+--------+---------+
2168  *      | Reserved | IC_way | IC_addr | IC_tag |Reserved |
2169  *      +----------+--------+---------+--------+---------+
2170  *       63      16 15    14 13      5 4      3 2       0
2171  *
2172  * IC_way is a 2-bit index that selects one of the 4 ways.
2173  * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2174  * IC_addr[5] is a "don't care" for a store.
2175  * IC_tag set to 2 specifies that the stored value is to be interpreted
2176  *      as containing Valid and IC_vpred as described above.
2177  *
2178  * Setting the Valid bit to zero for the specified IC_way and
2179  * IC_addr index into the I$ results in an invalidation of an I$ line.
2180  */
2181         ENTRY(icache_inval_all)
2182         rdpr    %pstate, %o5
2183         andn    %o5, PSTATE_IE, %o3
2184         wrpr    %g0, %o3, %pstate       ! clear IE bit
2185
2186         GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2187         ld      [%o0 + CHPR_ICACHE_LINESIZE], %o1
2188         ba,pt   %icc, 2f
2189           ld    [%o0 + CHPR_ICACHE_SIZE], %o0
2190 icache_inval_all_1:
2191         ASM_LD(%o0, icache_size)
2192         ASM_LD(%o1, icache_linesize)
2193 2:
2194         CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2195
2196         retl
2197         wrpr    %g0, %o5, %pstate       ! restore earlier pstate
2198         SET_SIZE(icache_inval_all)
2199
2200
2201 /*
2202  * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2203  * crosstrap.  It atomically increments the outstanding request counter and,
2204  * if there was not already an outstanding request, branches to setsoftint_tl1
2205  * to enqueue an intr_vec for the given inum.
2206  */
2207
2208         ! Register usage:
2209         !
2210         ! Arguments:
2211         ! %g1 - inum
2212         ! %g2 - index into chsm_outstanding array
2213         !
2214         ! Internal:
2215         ! %g2, %g3, %g5 - scratch
2216         ! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2217         ! %g6 - setsoftint_tl1 address
2218
2219         ENTRY_NP(cache_scrubreq_tl1)
2220         mulx    %g2, CHSM_OUTSTANDING_INCR, %g2
2221         set     CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2222         add     %g2, %g3, %g2
2223         GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2224         ld      [%g4], %g2              ! cpu's chsm_outstanding[index]
2225         !
2226         ! no need to use atomic instructions for the following
2227         ! increment - we're at tl1
2228         !
2229         add     %g2, 0x1, %g3
2230         brnz,pn %g2, 1f                 ! no need to enqueue more intr_vec
2231           st    %g3, [%g4]              ! delay - store incremented counter
2232         ASM_JMP(%g6, setsoftint_tl1)
2233         ! not reached
2234 1:
2235         retry
2236         SET_SIZE(cache_scrubreq_tl1)
2237
2238
2239 /*
2240  * Get the error state for the processor.
2241  * Note that this must not be used at TL>0
2242  */
2243         ENTRY(get_cpu_error_state)
2244 #if defined(CHEETAH_PLUS)
2245         set     ASI_SHADOW_REG_VA, %o2
2246         ldxa    [%o2]ASI_AFSR, %o1              ! shadow afsr reg
2247         stx     %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2248         ldxa    [%o2]ASI_AFAR, %o1              ! shadow afar reg
2249         stx     %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2250         GET_CPU_IMPL(%o3)       ! Only panther has AFSR_EXT registers
2251         cmp     %o3, PANTHER_IMPL
2252         bne,a   1f
2253           stx   %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]     ! zero for non-PN
2254         set     ASI_AFSR_EXT_VA, %o2
2255         ldxa    [%o2]ASI_AFSR, %o1              ! afsr_ext reg
2256         stx     %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2257         set     ASI_SHADOW_AFSR_EXT_VA, %o2
2258         ldxa    [%o2]ASI_AFSR, %o1              ! shadow afsr_ext reg
2259         stx     %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2260         b       2f
2261           nop
2262 1:
2263         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
2264 2:
2265 #else   /* CHEETAH_PLUS */
2266         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2267         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2268         stx     %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2269         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2270 #endif  /* CHEETAH_PLUS */
2271 #if defined(SERRANO)
2272         /*
2273          * Serrano has an afar2 which captures the address on FRC/FRU errors.
2274          * We save this in the afar2 of the register save area.
2275          */
2276         set     ASI_MCU_AFAR2_VA, %o2
2277         ldxa    [%o2]ASI_MCU_CTRL, %o1
2278         stx     %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2279 #endif  /* SERRANO */
2280         ldxa    [%g0]ASI_AFSR, %o1              ! primary afsr reg
2281         stx     %o1, [%o0 + CH_CPU_ERRORS_AFSR]
2282         ldxa    [%g0]ASI_AFAR, %o1              ! primary afar reg
2283         retl
2284         stx     %o1, [%o0 + CH_CPU_ERRORS_AFAR]
2285         SET_SIZE(get_cpu_error_state)
2286
2287 /*
2288  * Check a page of memory for errors.
2289  *
2290  * Load each 64 byte block from physical memory.
2291  * Check AFSR after each load to see if an error
2292  * was caused. If so, log/scrub that error.
2293  *
2294  * Used to determine if a page contains
2295  * CEs when CEEN is disabled.
2296  */
2297         ENTRY(cpu_check_block)
2298         !
2299         ! get a new window with room for the error regs
2300         !
2301         save    %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2302         srl     %i1, 6, %l4             ! clear top bits of psz
2303                                         ! and divide by 64
2304         rd      %fprs, %l2              ! store FP
2305         wr      %g0, FPRS_FEF, %fprs    ! enable FP
2306 1:
2307         ldda    [%i0]ASI_BLK_P, %d0     ! load a block
2308         membar  #Sync
2309         ldxa    [%g0]ASI_AFSR, %l3      ! read afsr reg
2310         brz,a,pt %l3, 2f                ! check for error
2311         nop
2312
2313         !
2314         ! if error, read the error regs and log it
2315         !
2316         call    get_cpu_error_state
2317         add     %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2318
2319         !
2320         ! cpu_ce_detected(ch_cpu_errors_t *, flag)
2321         !
2322         call    cpu_ce_detected         ! log the error
2323         mov     CE_CEEN_TIMEOUT, %o1
2324 2:
2325         dec     %l4                     ! next 64-byte block
2326         brnz,a,pt  %l4, 1b
2327         add     %i0, 64, %i0            ! increment block addr
2328
2329         wr      %l2, %g0, %fprs         ! restore FP
2330         ret
2331         restore
2332
2333         SET_SIZE(cpu_check_block)
2334
2335 /*
2336  * Perform a cpu logout called from C.  This is used where we did not trap
2337  * for the error but still want to gather "what we can".  Caller must make
2338  * sure cpu private area exists and that the indicated logout area is free
2339  * for use, and that we are unable to migrate cpus.
2340  */
2341         ENTRY(cpu_delayed_logout)
2342         rdpr    %pstate, %o2
2343         andn    %o2, PSTATE_IE, %o2
2344         wrpr    %g0, %o2, %pstate               ! disable interrupts
2345         PARK_SIBLING_CORE(%o2, %o3, %o4)        ! %o2 has DCU value
2346         add     %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2347         rd      %asi, %g1
2348         wr      %g0, ASI_P, %asi
2349         GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2350         wr      %g1, %asi
2351         UNPARK_SIBLING_CORE(%o2, %o3, %o4)      ! can use %o2 again
2352         rdpr    %pstate, %o2
2353         or      %o2, PSTATE_IE, %o2
2354         wrpr    %g0, %o2, %pstate
2355         retl
2356           nop
2357         SET_SIZE(cpu_delayed_logout)
2358
2359         ENTRY(dtrace_blksuword32)
2360         save    %sp, -SA(MINFRAME + 4), %sp
2361
2362         rdpr    %pstate, %l1
2363         andn    %l1, PSTATE_IE, %l2             ! disable interrupts to
2364         wrpr    %g0, %l2, %pstate               ! protect our FPU diddling
2365
2366         rd      %fprs, %l0
2367         andcc   %l0, FPRS_FEF, %g0
2368         bz,a,pt %xcc, 1f                        ! if the fpu is disabled
2369         wr      %g0, FPRS_FEF, %fprs            ! ... enable the fpu
2370
2371         st      %f0, [%fp + STACK_BIAS - 4]     ! save %f0 to the stack
2372 1:
2373         set     0f, %l5
2374         /*
2375          * We're about to write a block full or either total garbage
2376          * (not kernel data, don't worry) or user floating-point data
2377          * (so it only _looks_ like garbage).
2378          */
2379         ld      [%i1], %f0                      ! modify the block
2380         membar  #Sync
2381         stn     %l5, [THREAD_REG + T_LOFAULT]   ! set up the lofault handler
2382         stda    %d0, [%i0]ASI_BLK_COMMIT_S      ! store the modified block
2383         membar  #Sync
2384         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
2385
2386         bz,a,pt %xcc, 1f
2387         wr      %g0, %l0, %fprs                 ! restore %fprs
2388
2389         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
2390 1:
2391
2392         wrpr    %g0, %l1, %pstate               ! restore interrupts
2393
2394         ret
2395         restore %g0, %g0, %o0
2396
2397 0:
2398         membar  #Sync
2399         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
2400
2401         bz,a,pt %xcc, 1f
2402         wr      %g0, %l0, %fprs                 ! restore %fprs
2403
2404         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
2405 1:
2406
2407         wrpr    %g0, %l1, %pstate               ! restore interrupts
2408
2409         /*
2410          * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2411          * which deals with watchpoints. Otherwise, just return -1.
2412          */
2413         brnz,pt %i2, 1f
2414         nop
2415         ret
2416         restore %g0, -1, %o0
2417 1:
2418         call    dtrace_blksuword32_err
2419         restore
2420
2421         SET_SIZE(dtrace_blksuword32)
2422
2423 #ifdef  CHEETAHPLUS_ERRATUM_25
2424
2425         /* Claim a chunk of physical address space. */
2426         ENTRY(claimlines)
2427 1:
2428         subcc   %o1, %o2, %o1
2429         add     %o0, %o1, %o3
2430         bgeu,a,pt       %xcc, 1b
2431         casxa   [%o3]ASI_MEM, %g0, %g0
2432         membar  #Sync
2433         retl
2434         nop
2435         SET_SIZE(claimlines)
2436
2437         /*
2438          * CPU feature initialization,
2439          * turn BPE off,
2440          * get device id.
2441          */
2442         ENTRY(cpu_feature_init)
2443         save    %sp, -SA(MINFRAME), %sp
2444         sethi   %hi(cheetah_bpe_off), %o0
2445         ld      [%o0 + %lo(cheetah_bpe_off)], %o0
2446         brz     %o0, 1f
2447         nop
2448         rd      ASR_DISPATCH_CONTROL, %o0
2449         andn    %o0, ASR_DISPATCH_CONTROL_BPE, %o0
2450         wr      %o0, 0, ASR_DISPATCH_CONTROL
2451 1:
2452         !
2453         ! get the device_id and store the device_id
2454         ! in the appropriate cpunodes structure
2455         ! given the cpus index
2456         !
2457         CPU_INDEX(%o0, %o1)
2458         mulx %o0, CPU_NODE_SIZE, %o0
2459         set  cpunodes + DEVICE_ID, %o1
2460         ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
2461         stx  %o2, [%o0 + %o1]
2462 #ifdef  CHEETAHPLUS_ERRATUM_34
2463         !
2464         ! apply Cheetah+ erratum 34 workaround
2465         !
2466         call itlb_erratum34_fixup
2467           nop
2468         call dtlb_erratum34_fixup
2469           nop
2470 #endif  /* CHEETAHPLUS_ERRATUM_34 */
2471         ret
2472           restore
2473         SET_SIZE(cpu_feature_init)
2474
2475 /*
2476  * Copy a tsb entry atomically, from src to dest.
2477  * src must be 128 bit aligned.
2478  */
2479         ENTRY(copy_tsb_entry)
2480         ldda    [%o0]ASI_NQUAD_LD, %o2          ! %o2 = tag, %o3 = data
2481         stx     %o2, [%o1]
2482         stx     %o3, [%o1 + 8 ]
2483         retl
2484         nop
2485         SET_SIZE(copy_tsb_entry)
2486
2487 #endif  /* CHEETAHPLUS_ERRATUM_25 */
2488
2489 #ifdef  CHEETAHPLUS_ERRATUM_34
2490
2491         !
2492         ! In Cheetah+ erratum 34, under certain conditions an ITLB locked
2493         ! index 0 TTE will erroneously be displaced when a new TTE is
2494         ! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
2495         ! locked index 0 TTEs must be relocated.
2496         !
2497         ! NOTE: Care must be taken to avoid an ITLB miss in this routine.
2498         !
2499         ENTRY_NP(itlb_erratum34_fixup)
2500         rdpr    %pstate, %o3
2501 #ifdef DEBUG
2502         PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
2503 #endif /* DEBUG */
2504         wrpr    %o3, PSTATE_IE, %pstate         ! Disable interrupts
2505         ldxa    [%g0]ASI_ITLB_ACCESS, %o1       ! %o1 = entry 0 data
2506         ldxa    [%g0]ASI_ITLB_TAGREAD, %o2      ! %o2 = entry 0 tag
2507
2508         cmp     %o1, %g0                        ! Is this entry valid?
2509         bge     %xcc, 1f
2510           andcc %o1, TTE_LCK_INT, %g0           ! Is this entry locked?
2511         bnz     %icc, 2f
2512           nop
2513 1:
2514         retl                                    ! Nope, outta here...
2515           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2516 2:
2517         sethi   %hi(FLUSH_ADDR), %o4
2518         stxa    %g0, [%o2]ASI_ITLB_DEMAP        ! Flush this mapping
2519         flush   %o4                             ! Flush required for I-MMU
2520         !
2521         ! Start search from index 1 up.  This is because the kernel force
2522         ! loads its text page at index 15 in sfmmu_kernel_remap() and we
2523         ! don't want our relocated entry evicted later.
2524         !
2525         ! NOTE: We assume that we'll be successful in finding an unlocked
2526         ! or invalid entry.  If that isn't the case there are bound to
2527         ! bigger problems.
2528         !
2529         set     (1 << 3), %g3
2530 3:
2531         ldxa    [%g3]ASI_ITLB_ACCESS, %o4       ! Load TTE from t16
2532         !
2533         ! If this entry isn't valid, we'll choose to displace it (regardless
2534         ! of the lock bit).
2535         !
2536         cmp     %o4, %g0                        ! TTE is > 0 iff not valid
2537         bge     %xcc, 4f                        ! If invalid, go displace
2538           andcc %o4, TTE_LCK_INT, %g0           ! Check for lock bit
2539         bnz,a   %icc, 3b                        ! If locked, look at next
2540           add   %g3, (1 << 3), %g3              !  entry
2541 4:
2542         !
2543         ! We found an unlocked or invalid entry; we'll explicitly load
2544         ! the former index 0 entry here.
2545         !
2546         sethi   %hi(FLUSH_ADDR), %o4
2547         set     MMU_TAG_ACCESS, %g4
2548         stxa    %o2, [%g4]ASI_IMMU
2549         stxa    %o1, [%g3]ASI_ITLB_ACCESS
2550         flush   %o4                             ! Flush required for I-MMU
2551         retl
2552           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2553         SET_SIZE(itlb_erratum34_fixup)
2554
2555         !
2556         ! In Cheetah+ erratum 34, under certain conditions a DTLB locked
2557         ! index 0 TTE will erroneously be displaced when a new TTE is
2558         ! loaded.  In order to avoid cheetah+ erratum 34, locked index 0
2559         ! TTEs must be relocated.
2560         !
2561         ENTRY_NP(dtlb_erratum34_fixup)
2562         rdpr    %pstate, %o3
2563 #ifdef DEBUG
2564         PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
2565 #endif /* DEBUG */
2566         wrpr    %o3, PSTATE_IE, %pstate         ! Disable interrupts
2567         ldxa    [%g0]ASI_DTLB_ACCESS, %o1       ! %o1 = entry 0 data
2568         ldxa    [%g0]ASI_DTLB_TAGREAD, %o2      ! %o2 = entry 0 tag
2569
2570         cmp     %o1, %g0                        ! Is this entry valid?
2571         bge     %xcc, 1f
2572           andcc %o1, TTE_LCK_INT, %g0           ! Is this entry locked?
2573         bnz     %icc, 2f
2574           nop
2575 1:
2576         retl                                    ! Nope, outta here...
2577           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2578 2:
2579         stxa    %g0, [%o2]ASI_DTLB_DEMAP        ! Flush this mapping
2580         membar  #Sync
2581         !
2582         ! Start search from index 1 up.
2583         !
2584         ! NOTE: We assume that we'll be successful in finding an unlocked
2585         ! or invalid entry.  If that isn't the case there are bound to
2586         ! bigger problems.
2587         !
2588         set     (1 << 3), %g3
2589 3:
2590         ldxa    [%g3]ASI_DTLB_ACCESS, %o4       ! Load TTE from t16
2591         !
2592         ! If this entry isn't valid, we'll choose to displace it (regardless
2593         ! of the lock bit).
2594         !
2595         cmp     %o4, %g0                        ! TTE is > 0 iff not valid
2596         bge     %xcc, 4f                        ! If invalid, go displace
2597           andcc %o4, TTE_LCK_INT, %g0           ! Check for lock bit
2598         bnz,a   %icc, 3b                        ! If locked, look at next
2599           add   %g3, (1 << 3), %g3              !  entry
2600 4:
2601         !
2602         ! We found an unlocked or invalid entry; we'll explicitly load
2603         ! the former index 0 entry here.
2604         !
2605         set     MMU_TAG_ACCESS, %g4
2606         stxa    %o2, [%g4]ASI_DMMU
2607         stxa    %o1, [%g3]ASI_DTLB_ACCESS
2608         membar  #Sync
2609         retl
2610           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2611         SET_SIZE(dtlb_erratum34_fixup)
2612
2613 #endif  /* CHEETAHPLUS_ERRATUM_34 */
2614