4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
30 * General assembly language routines.
31 * It is the intent of this file to contain routines that are
32 * specific to cpu architecture.
36 * WARNING: If you add a fast trap handler which can be invoked by a
37 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
38 * instead of "done" instruction to return back to the user mode. See
39 * comments for the "fast_trap_done" entry point for more information.
41 #define FAST_TRAP_DONE \
45 * Override GET_NATIVE_TIME for the cpu module code. This is not
46 * guaranteed to be exactly one instruction, be careful of using
47 * the macro in delay slots.
49 * Do not use any instruction that modifies condition codes as the
50 * caller may depend on these to remain unchanged across the macro.
52 #if defined(CHEETAH) || defined(OLYMPUS_C)
54 #define GET_NATIVE_TIME(out, scr1, scr2) \
56 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
58 add reg
, delta
, reg; \
60 #define RD_TICKCMPR(out, scr) \
62 #define WR_TICKCMPR(in, scr1, scr2, label) \
65 #elif defined(HUMMINGBIRD)
66 #include <sys/spitregs.h>
69 * the current hummingbird version of %stick and %stick_cmp
70 * were both implemented as (2) 32-bit locations in ASI_IO space;
71 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
73 * 64-bit opcodes are required, but move only 32-bits:
75 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst
76 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys
78 * reg equivalent [phys]ASI_IO
79 * ------------------ ---------------
80 * %stick_cmp low-32 0x1FE.0000.F060
81 * %stick_cmp high-32 0x1FE.0000.F068
82 * %stick low-32 0x1FE.0000.F070
83 * %stick high-32 0x1FE.0000.F078
85 #define HSTC_LOW 0x60 /* stick_cmp low 32-bits */
86 #define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */
87 #define HST_LOW 0x70 /* stick low 32-bits */
88 #define HST_HIGH 0x78 /* stick high 32-bits */
89 #define HST_DIFF 0x08 /* low<-->high diff */
92 * Any change in the number of instructions in SETL41()
93 * will affect SETL41_OFF
95 #define SETL41(reg, byte) \
96 sethi
%hi
(0x1FE00000), reg;
/* 0000.0000.1FE0.0000 */ \
97 or reg
, 0xF, reg;
/* 0000.0000.1FE0.000F */ \
98 sllx reg
, 12, reg;
/* 0000.01FE.0000.F000 */ \
99 or reg
, byte
, reg;
/* 0000.01FE.0000.F0xx */
102 * SETL41_OFF is used to calulate the relative PC value when a
103 * branch instruction needs to go over SETL41() macro
105 #define SETL41_OFF 16
108 * reading stick requires 2 loads, and there could be an intervening
109 * low-to-high 32-bit rollover resulting in a return value that is
110 * off by about (2 ^ 32); this rare case is prevented by re-reading
111 * the low-32 bits after the high-32 and verifying the "after" value
112 * is >= the "before" value; if not, increment the high-32 value.
114 * this method is limited to 1 rollover, and based on the fixed
115 * stick-frequency (5555555), requires the loads to complete within
116 * 773 seconds; incrementing the high-32 value will not overflow for
119 * writing stick requires 2 stores; if the old/new low-32 value is
120 * near 0xffffffff, there could be another rollover (also rare).
121 * to prevent this, we first write a 0 to the low-32, then write
122 * new values to the high-32 then the low-32.
124 * When we detect a carry in the lower %stick register, we need to
125 * read HST_HIGH again. However at the point where we detect this,
126 * we need to rebuild the register address HST_HIGH.This involves more
127 * than one instructions and a branch is unavoidable. However, most of
128 * the time, there is no carry. So we take the penalty of a branch
129 * instruction only when there is carry (less frequent).
131 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
132 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
133 * addr already points to HST_LOW.
135 * NOTE: this method requires disabling interrupts before using
138 #define GET_NATIVE_TIME(out, scr, tmp) \
139 SETL41
(scr
, HST_LOW
); \
140 ldxa
[scr
]ASI_IO
, tmp; \
142 ldxa
[scr
]ASI_IO
, out; \
144 ldxa
[scr
]ASI_IO
, scr; \
146 brlz
,pn tmp
, .-(SETL41_OFF+24); \
149 #define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
150 SETL41
(addr
, HST_LOW
); \
151 ldxa
[addr
]ASI_IO
, tmp; \
152 inc HST_DIFF
, addr; \
153 ldxa
[addr
]ASI_IO
, high; \
154 dec HST_DIFF
, addr; \
155 ldxa
[addr
]ASI_IO
, low; \
158 sllx high
, 32, high; \
159 or high
, low
, high; \
160 add high
, delta
, high; \
162 srlx high
, 32, high; \
163 stxa
%g0
, [addr
]ASI_IO; \
164 inc HST_DIFF
, addr; \
165 stxa high
, [addr
]ASI_IO; \
166 dec HST_DIFF
, addr; \
167 stxa low
, [addr
]ASI_IO
168 #define RD_TICKCMPR(out, scr) \
169 SETL41
(scr
, HSTC_LOW
); \
170 ldxa
[scr
]ASI_IO
, out; \
172 ldxa
[scr
]ASI_IO
, scr; \
175 #define WR_TICKCMPR(in, scra, scrd, label) \
176 SETL41
(scra
, HSTC_HIGH
); \
178 stxa scrd
, [scra
]ASI_IO; \
179 dec HST_DIFF
, scra; \
180 stxa in
, [scra
]ASI_IO
182 #else /* !CHEETAH && !HUMMINGBIRD */
184 #define GET_NATIVE_TIME(out, scr1, scr2) \
186 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
188 add reg
, delta
, reg; \
190 #define RD_TICKCMPR(out, scr) \
192 #ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
194 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
195 * The failure occurs only when the following instruction decodes to wr or
196 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
197 * with a read, thus stalling the pipe and keeping following instructions
198 * from causing data corruption. Aligning to a quadword will ensure these
199 * two instructions are not split due to i$ misses.
201 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
202 ba,a .bb_errata_1.label ;\
204 .bb_errata_1.label: ;\
205 wr cmpr
, TICK_COMPARE ;\
207 #else /* BB_ERRATA_1 */
208 #define WR_TICKCMPR(in,scr1,scr2,label) \
210 #endif /* BB_ERRATA_1 */
212 #endif /* !CHEETAH && !HUMMINGBIRD */
214 #include <sys/clock.h>
217 #include <sys/types.h>
219 #include <sys/systm.h>
220 #include <sys/regset.h>
221 #include <sys/sunddi.h>
222 #include <sys/lockstat.h>
226 #include <sys/asm_linkage.h>
227 #include <sys/privregs.h>
228 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
229 #include <sys/machthread.h>
230 #include <sys/clock.h>
231 #include <sys/intreg.h>
232 #include <sys/psr_compat.h>
233 #include <sys/isa_defs.h>
234 #include <sys/dditypes.h>
235 #include <sys/intr.h>
259 * Softint generated when counter field of tick reg matches value field
264 tickcmpr_set
(uint64_t clock_cycles
)
269 ENTRY_NP
(tickcmpr_set
)
270 ! get
64-bit clock_cycles interval
272 mov
8, %o3
! A reasonable initial step size
274 WR_TICKCMPR
(%o2
,%o4
,%o5
,__LINE__
) ! Write to TICK_CMPR
276 GET_NATIVE_TIME
(%o0
, %o4
, %o5
) ! Read
%tick to confirm the
277 sllx
%o0
, 1, %o0
! value we wrote was in the future.
280 cmp %o2
, %o0
! If the value we wrote was in the
281 bg
,pt
%xcc
, 2f
! future
, then blow out of here.
282 sllx
%o3
, 1, %o3
! If
not, then double our step size
,
283 ba,pt
%xcc
, 1b ! and take another lap.
288 SET_SIZE
(tickcmpr_set
)
295 tickcmpr_disable
(void
)
300 ENTRY_NP
(tickcmpr_disable
)
302 sllx
%g1
, TICKINT_DIS_SHFT
, %o0
303 WR_TICKCMPR
(%o0
,%o4
,%o5
,__LINE__
) ! Write to TICK_CMPR
306 SET_SIZE
(tickcmpr_disable
)
313 * tick_write_delta() increments %tick by the specified delta. This should
314 * only be called after a CPR event to assure that gethrtime() continues to
315 * increase monotonically. Obviously, writing %tick needs to de done very
316 * carefully to avoid introducing unnecessary %tick skew across CPUs. For
317 * this reason, we make sure we're i-cache hot before actually writing to
322 tick_write_delta
(uint64_t delta
)
330 .asciz "tick_write_delta: interrupts already disabled on entry"
333 ENTRY_NP
(tick_write_delta
)
336 andcc
%g1
, PSTATE_IE
, %g0
! If DEBUG
, check that interrupts
337 bnz
0f
! aren
't already disabled.
338 sethi %hi(tick_write_panic), %o1
339 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
341 or %i1, %lo(tick_write_panic), %o0
343 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
345 ba 0f ! Branch to cache line-aligned instr.
348 0: nop ! The next 3 instructions are now hot.
349 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
352 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
357 * return 1 if disabled
361 tickcmpr_disabled(void)
366 ENTRY_NP(tickcmpr_disabled)
367 RD_TICKCMPR(%g1, %o0)
369 srlx %g1, TICKINT_DIS_SHFT, %o0
370 SET_SIZE(tickcmpr_disabled)
391 GET_NATIVE_TIME(%o0, %o2, %o3)
401 * Return the counter portion of the tick register.
407 gettick_counter(void)
412 ENTRY_NP(gettick_counter)
416 srlx %o0, 1, %o0 ! shake off npt bit
417 SET_SIZE(gettick_counter)
421 * Provide a C callable interface to the trap that reads the hi-res timer.
422 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
430 return ((hrtime_t)0);
434 gethrtime_unscaled(void)
436 return ((hrtime_t)0);
442 return ((hrtime_t)0);
446 scalehrtime(hrtime_t *hrt)
452 gethrestime(timespec_t *tp)
459 gethrestime_sec(void)
465 gethrestime_lasttick(timespec_t *tp)
478 panic_hres_tick(void)
485 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
491 ENTRY_NP(gethrtime_unscaled)
492 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
495 SET_SIZE(gethrtime_unscaled)
497 ENTRY_NP(gethrtime_waitfree)
498 ALTENTRY(dtrace_gethrtime)
499 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
500 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
503 SET_SIZE(dtrace_gethrtime)
504 SET_SIZE(gethrtime_waitfree)
508 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
510 ! hrtime_t's are signed
, max hrtime_t must
be positive
517 SET_SIZE
(gethrtime_max
)
521 NATIVE_TIME_TO_NSEC
(%o1
, %o2
, %o3
)
524 SET_SIZE
(scalehrtime
)
527 * Fast trap to return a timestamp, uses trap window, leaves traps
528 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1.
530 * This is the handler for the ST_GETHRTIME trap.
533 ENTRY_NP
(get_timestamp
)
534 GET_HRTIME
(%g1
, %g2
, %g3
, %g4
, %g5
, %o0
, %o1
, %o2
) ! %g1
= hrtime
535 srlx
%g1
, 32, %o0
! %o0
= hi32
(%g1
)
536 srl
%g1
, 0, %o1
! %o1
= lo32
(%g1
)
538 SET_SIZE
(get_timestamp
)
541 * Macro to convert GET_HRESTIME() bits into a timestamp.
543 * We use two separate macros so that the platform-dependent GET_HRESTIME()
544 * can be as small as possible; CONV_HRESTIME() implements the generic part.
546 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
547 brz
,pt adj
, 3f;
/* no adjustments, it's easy */ \
548 add hrestnsec
, nslt
, hrestnsec;
/* hrest.tv_nsec += nslt */ \
549 brlz
,pn adj
, 2f;
/* if hrestime_adj negative */ \
550 srlx nslt
, ADJ_SHIFT
, nslt;
/* delay: nslt >>= 4 */ \
551 subcc adj
, nslt
, %g0;
/* hrestime_adj - nslt/16 */ \
552 movg
%xcc
, nslt
, adj;
/* adj by min(adj, nslt/16) */ \
553 ba 3f;
/* go convert to sec/nsec */ \
554 add hrestnsec
, adj
, hrestnsec;
/* delay: apply adjustment */ \
555 2: addcc adj
, nslt
, %g0;
/* hrestime_adj + nslt/16 */ \
556 bge,a,pt
%xcc
, 3f;
/* is adj less negative? */ \
557 add hrestnsec
, adj
, hrestnsec;
/* yes: hrest.nsec += adj */ \
558 sub hrestnsec
, nslt
, hrestnsec;
/* no: hrest.nsec -= nslt/16 */ \
559 3: cmp hrestnsec
, nano;
/* more than a billion? */ \
560 bl,pt
%xcc
, 4f;
/* if not, we're done */ \
561 nop;
/* delay: do nothing :( */ \
562 add hrestsec
, 1, hrestsec;
/* hrest.tv_sec++; */ \
563 sub hrestnsec
, nano
, hrestnsec;
/* hrest.tv_nsec -= NANOSEC; */ \
564 ba,a 3b;
/* check >= billion again */ \
567 ENTRY_NP
(gethrestime
)
568 GET_HRESTIME
(%o1
, %o2
, %o3
, %o4
, %o5
, %g1
, %g2
, %g3
, %g4
)
569 CONV_HRESTIME
(%o1
, %o2
, %o3
, %o4
, %o5
)
572 stn
%o2
, [%o0
+ CLONGSIZE
]
573 SET_SIZE
(gethrestime
)
576 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
579 ENTRY_NP
(gethrestime_sec
)
580 GET_HRESTIME
(%o0
, %o2
, %o3
, %o4
, %o5
, %g1
, %g2
, %g3
, %g4
)
581 CONV_HRESTIME
(%o0
, %o2
, %o3
, %o4
, %o5
)
582 retl
! %o0 current hrestime seconds
584 SET_SIZE
(gethrestime_sec
)
587 * Returns the hrestime on the last tick. This is simpler than gethrestime()
588 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick()
589 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
590 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't
591 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
594 ENTRY_NP
(gethrestime_lasttick
)
595 sethi
%hi
(hres_lock
), %o1
597 lduw
[%o1
+ %lo
(hres_lock
)], %o2
! Load lock value
598 membar
#LoadLoad ! Load of lock must complete
599 andn
%o2
, 1, %o2
! Mask off lowest bit
600 ldn
[%o1
+ %lo
(hrestime
)], %g1
! Seconds.
601 add %o1
, %lo
(hrestime
), %o4
602 ldn
[%o4
+ CLONGSIZE
], %g2
! Nanoseconds.
603 membar
#LoadLoad ! All loads must complete
604 lduw
[%o1
+ %lo
(hres_lock
)], %o3
! Reload lock value
605 cmp %o3
, %o2
! If lock is locked
or has
606 bne 0b ! changed
, retry.
607 stn
%g1
, [%o0
] ! Delay
: store seconds
609 stn
%g2
, [%o0
+ CLONGSIZE
] ! Delay
: store nanoseconds
610 SET_SIZE
(gethrestime_lasttick
)
613 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1.
615 * This is the handler for the ST_GETHRESTIME trap.
618 ENTRY_NP
(get_hrestime
)
619 GET_HRESTIME
(%o0
, %o1
, %g1
, %g2
, %g3
, %g4
, %g5
, %o2
, %o3
)
620 CONV_HRESTIME
(%o0
, %o1
, %g1
, %g2
, %g3
)
622 SET_SIZE
(get_hrestime
)
625 * Fast trap to return lwp virtual time, uses trap window, leaves traps
626 * disabled. Returns a 64-bit number in %o0:%o1, which is the number
627 * of nanoseconds consumed.
629 * This is the handler for the ST_GETHRVTIME trap.
632 * %o0, %o1 = return lwp virtual time
638 ENTRY_NP
(get_virtime
)
639 GET_NATIVE_TIME
(%g5
, %g1
, %g2
) ! %g5
= native time in ticks
640 CPU_ADDR
(%g2
, %g3
) ! CPU struct ptr to
%g2
641 ldn
[%g2
+ CPU_THREAD
], %g2
! thread pointer to
%g2
642 ldn
[%g2
+ T_LWP
], %g3
! lwp pointer to
%g3
645 * Subtract start time of current microstate from time
646 * of day to get increment for lwp virtual time.
648 ldx [%g3
+ LWP_STATE_START
], %g1
! ms_state_start
652 * Add current value of ms_acct[LMS_USER]
654 ldx [%g3
+ LWP_ACCT_USER
], %g1
! ms_acct
[LMS_USER
]
656 NATIVE_TIME_TO_NSEC
(%g5
, %g1
, %o0
)
658 srl
%g5
, 0, %o1
! %o1
= lo32
(%g5
)
659 srlx
%g5
, 32, %o0
! %o0
= hi32
(%g5
)
662 SET_SIZE
(get_virtime
)
668 .asciz "hrtime_base stepping back"
672 save
%sp
, -SA
(MINFRAME
), %sp
! get
a new window
674 sethi
%hi
(hrestime
), %l4
675 ldstub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
! try locking
677 bz
,pt
%xcc
, 8f
! if we got it
, drive on
678 ld [%l4
+ %lo
(nsec_scale
)], %l5
! delay
: %l5
= scaling factor
679 ldub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
682 ldstub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
684 ldub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
686 membar
#StoreLoad|#StoreStore
689 ! update hres_last_tick.
%l5 has the scaling factor
(nsec_scale
).
691 ldx [%l4
+ %lo
(hrtime_base
)], %g1
! load current hrtime_base
692 GET_NATIVE_TIME
(%l0
, %l3
, %l6
) ! current native time
693 stx %l0
, [%l4
+ %lo
(hres_last_tick
)]! prev
= current
694 ! convert native time to nsecs
695 NATIVE_TIME_TO_NSEC_SCALE
(%l0
, %l5
, %l2
, NSEC_SHIFT
)
697 sub %l0
, %g1
, %i1
! get accurate nsec delta
699 ldx [%l4
+ %lo
(hrtime_base
)], %l1
704 stx %l0
, [%l4
+ %lo
(hrtime_base
)] ! update hrtime_base
707 ! apply adjustment
, if any
709 ldx [%l4
+ %lo
(hrestime_adj
)], %l0
! %l0
= hrestime_adj
711 ! hrestime_adj
== 0 ?
712 ! yes
, skip adjustments
713 clr
%l5
! delay
: set adj to zero
714 tst
%l0
! is hrestime_adj
>= 0 ?
715 bge,pt
%xcc
, 1f
! yes
, go handle positive case
716 srl
%i1
, ADJ_SHIFT
, %l5
! delay
: %l5
= adj
718 addcc
%l0
, %l5
, %g0
! hrestime_adj
< -adj ?
719 bl,pt
%xcc
, 2f
! yes
, use current adj
720 neg %l5
! delay
: %l5
= -adj
722 mov
%l0
, %l5
! no
, so set adj
= hrestime_adj
724 subcc
%l0
, %l5
, %g0
! hrestime_adj
< adj ?
725 bl,a,pt
%xcc
, 2f
! yes
, set adj
= hrestime_adj
726 mov
%l0
, %l5
! delay
: adj
= hrestime_adj
728 ldx [%l4
+ %lo
(timedelta
)], %l0
! %l0
= timedelta
729 sub %l0
, %l5
, %l0
! timedelta
-= adj
731 stx %l0
, [%l4
+ %lo
(timedelta
)] ! store new timedelta
732 stx %l0
, [%l4
+ %lo
(hrestime_adj
)] ! hrestime_adj
= timedelta
734 or %l4
, %lo
(hrestime
), %l2
735 ldn
[%l2
], %i2
! %i2
:%i3
= hrestime sec
:nsec
736 ldn
[%l2
+ CLONGSIZE
], %i3
737 add %i3
, %l5
, %i3
! hrestime.nsec
+= adj
738 add %i3
, %i1
, %i3
! hrestime.nsec
+= nslt
740 set NANOSEC
, %l5
! %l5
= NANOSEC
742 bl,pt
%xcc
, 5f
! if hrestime.tv_nsec
< NANOSEC
743 sethi
%hi
(one_sec
), %i1
! delay
744 add %i2
, 0x1, %i2
! hrestime.tv_sec+
+
745 sub %i3
, %l5
, %i3
! hrestime.tv_nsec
- NANOSEC
747 st %l5
, [%i1
+ %lo
(one_sec
)]
750 stn
%i3
, [%l2
+ CLONGSIZE
] ! store the new hrestime
754 ld [%l4
+ %lo
(hres_lock
)], %i1
755 inc
%i1
! release lock
756 st %i1
, [%l4
+ %lo
(hres_lock
)] ! clear hres_lock
765 ld [%l4
+ %lo
(hres_lock
)], %i1
767 st %i1
, [%l4
+ %lo
(hres_lock
)]
769 sethi
%hi
(hrtime_base_panic
), %o0
771 or %o0
, %lo
(hrtime_base_panic
), %o0
777 #if !defined(lint) && !defined(__lint)
781 .asciz "kstat_q_exit: qlen == 0"
784 save
%sp
, -SA
(MINFRAME
), %sp
785 sethi
%hi
(kstat_q_panic_msg
), %o0
787 or %o0
, %lo
(kstat_q_panic_msg
), %o0
789 SET_SIZE
(kstat_q_panic
)
794 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
795 ld [%o0
+ QTYPE
/**/CNT
], %o1;
/* %o1 = old qlen */ \
796 QOP
%o1
, 1, %o2;
/* %o2 = new qlen */ \
797 QBR
%o1
, QZERO;
/* done if qlen == 0 */ \
798 st %o2
, [%o0
+ QTYPE
/**/CNT
];
/* delay: save qlen */ \
799 ldx [%o0
+ QTYPE
/**/LASTUPDATE
], %o3; \
800 ldx [%o0
+ QTYPE
/**/TIME
], %o4;
/* %o4 = old time */ \
801 ldx [%o0
+ QTYPE
/**/LENTIME
], %o5;
/* %o5 = old lentime */ \
802 sub %g1
, %o3
, %o2;
/* %o2 = time delta */ \
803 mulx
%o1
, %o2
, %o3;
/* %o3 = cur lentime */ \
804 add %o4
, %o2
, %o4;
/* %o4 = new time */ \
805 add %o5
, %o3
, %o5;
/* %o5 = new lentime */ \
806 stx %o4
, [%o0
+ QTYPE
/**/TIME
];
/* save time */ \
807 stx %o5
, [%o0
+ QTYPE
/**/LENTIME
];
/* save lentime */ \
809 stx %g1
, [%o0
+ QTYPE
/**/LASTUPDATE
];
/* lastupdate = now */
813 * same as KSTAT_Q_UPDATE but without:
815 * to be used only with non-debug build. mimics ASSERT() behaviour.
817 #define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \
818 ld [%o0
+ QTYPE
/**/CNT
], %o1;
/* %o1 = old qlen */ \
819 QOP
%o1
, 1, %o2;
/* %o2 = new qlen */ \
820 st %o2
, [%o0
+ QTYPE
/**/CNT
];
/* delay: save qlen */ \
821 ldx [%o0
+ QTYPE
/**/LASTUPDATE
], %o3; \
822 ldx [%o0
+ QTYPE
/**/TIME
], %o4;
/* %o4 = old time */ \
823 ldx [%o0
+ QTYPE
/**/LENTIME
], %o5;
/* %o5 = old lentime */ \
824 sub %g1
, %o3
, %o2;
/* %o2 = time delta */ \
825 mulx
%o1
, %o2
, %o3;
/* %o3 = cur lentime */ \
826 add %o4
, %o2
, %o4;
/* %o4 = new time */ \
827 add %o5
, %o3
, %o5;
/* %o5 = new lentime */ \
828 stx %o4
, [%o0
+ QTYPE
/**/TIME
];
/* save time */ \
829 stx %o5
, [%o0
+ QTYPE
/**/LENTIME
];
/* save lentime */ \
831 stx %g1
, [%o0
+ QTYPE
/**/LASTUPDATE
];
/* lastupdate = now */
835 ENTRY
(kstat_waitq_enter
)
836 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
837 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_W
)
838 SET_SIZE
(kstat_waitq_enter
)
841 ENTRY
(kstat_waitq_exit
)
842 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
844 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, retl
, KSTAT_IO_W
)
846 KSTAT_Q_UPDATE_ND
(sub, retl
, KSTAT_IO_W
)
848 SET_SIZE
(kstat_waitq_exit
)
851 ENTRY
(kstat_runq_enter
)
852 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
853 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_R
)
854 SET_SIZE
(kstat_runq_enter
)
857 ENTRY
(kstat_runq_exit
)
858 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
860 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, retl
, KSTAT_IO_R
)
862 KSTAT_Q_UPDATE_ND
(sub, retl
, KSTAT_IO_R
)
864 SET_SIZE
(kstat_runq_exit
)
867 ENTRY
(kstat_waitq_to_runq
)
868 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
870 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, 1:, KSTAT_IO_W
)
872 KSTAT_Q_UPDATE_ND
(sub, 1:, KSTAT_IO_W
)
874 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_R
)
875 SET_SIZE
(kstat_waitq_to_runq
)
878 ENTRY
(kstat_runq_back_to_waitq
)
879 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
881 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, 1:, KSTAT_IO_R
)
883 KSTAT_Q_UPDATE_ND
(sub, 1:, KSTAT_IO_R
)
885 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_W
)
886 SET_SIZE
(kstat_runq_back_to_waitq
)
888 #endif /* !(lint || __lint) */
893 hrtime_t hres_last_tick;
894 volatile timestruc_t hrestime;
895 int64_t hrestime_adj;
896 volatile int hres_lock;
898 hrtime_t hrtime_base;
899 int traptrace_use_stick;
905 * The following variables MUST be together on a 128-byte boundary.
906 * In addition to the primary performance motivation (having them all
907 * on the same cache line(s)), code here and in the GET*TIME() macros
908 * assumes that they all have the same high 22 address bits (so
909 * there's only one sethi).
912 .global timedelta, hres_last_tick, hrestime, hrestime_adj
913 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
914 .global nsec_shift, adj_shift
916 /* XXX - above comment claims 128-bytes is necessary */
919 .word 0, 0 /* int64_t */
921 .word 0, 0 /* hrtime_t */
923 .nword 0, 0 /* 2 longs */
925 .word 0, 0 /* int64_t */
943 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
944 * usec_delay(int n) [compatibility - should go one day]
947 * delay for n microseconds. numbers <= 0 delay 1 usec
949 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
950 * and variable clock rate for power management requires that we
951 * use %stick to implement this routine.
953 * For OPL platforms that support the "sleep" instruction, we
954 * conditionally (ifdef'ed) insert a "sleep" instruction in
955 * the loop. Note that theoritically we should have move (duplicated)
956 * the code down to spitfire/us3/opl specific asm files - but this
957 * is alot of code duplication just to add one "sleep" instruction.
958 * We chose less code duplication for this.
965 drv_usecwait
(clock_t n
)
980 sethi
%hi
(sticks_per_usec
), %o1
981 lduw
[%o1
+ %lo
(sticks_per_usec
)], %o1
982 mulx
%o1
, %o0
, %o1
! Scale usec to ticks
983 inc
%o1
! We don
't start on a tick edge
984 GET_NATIVE_TIME(%o2, %o3, %o4)
989 .word 0x81b01060 ! insert "sleep" instruction
990 #endif /* _OPL */ ! use byte code for now
992 GET_NATIVE_TIME(%o2, %o3, %o4)
998 SET_SIZE(drv_usecwait)
1005 pil14_interrupt(int level)
1011 * Level-14 interrupt prologue.
1013 ENTRY_NP(pil14_interrupt)
1015 rdpr %pil, %g6 ! %g6 = interrupted PIL
1016 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
1019 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1021 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
1022 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
1023 ba pil_interrupt_common ! must be large-disp branch
1024 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
1025 1: ba pil_interrupt_common ! must be large-disp branch
1026 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
1027 SET_SIZE(pil14_interrupt)
1031 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
1032 ! disabled. If TICK_COMPARE is enabled, we know that we need to
1033 ! reenqueue the interrupt request structure. We'll then check TICKINT
1034 ! in SOFTINT; if it
's set, then we know that we were in a TICK_COMPARE
1035 ! interrupt. In this case, TICK_COMPARE may have been rewritten
1036 ! recently; we'll compare
%o5 to the current time to verify that it
's
1039 ! Note that %o5 is live until after 1f.
1040 ! XXX - there is a subroutine call while %o5 is live!
1042 RD_TICKCMPR(%o5, %g1)
1043 srlx %o5, TICKINT_DIS_SHFT, %g1
1048 andn %g5, PSTATE_IE, %g1
1049 wrpr %g0, %g1, %pstate ! Disable vec interrupts
1051 sethi %hi(cbe_level14_inum), %o1
1052 ldx [%o1 + %lo(cbe_level14_inum)], %o1
1053 call intr_enqueue_req ! preserves %o5 and %g5
1056 ! Check SOFTINT for TICKINT/STICKINT
1058 set (TICK_INT_MASK | STICK_INT_MASK), %o0
1061 wrpr %g0, %g5, %pstate ! Enable vec interrupts
1063 ! clear TICKINT/STICKINT
1064 wr %o0, CLEAR_SOFTINT
1067 ! Now that we've cleared TICKINT
, we can reread
%tick
and confirm
1068 ! that the value we programmed is still in the future. If it isn
't,
1069 ! we need to reprogram TICK_COMPARE to fire as soon as possible.
1071 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1072 sllx %o0, 1, %o0 ! Clear the DIS bit
1074 cmp %o5, %o0 ! In the future?
1075 bg,a,pt %xcc, 2f ! Yes, drive on.
1076 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1079 ! If we're here
, then we have programmed TICK_COMPARE with
a %tick
1080 ! which is in the past; we
'll now load an initial step size, and loop
1081 ! until we've managed to program TICK_COMPARE to fire in the future.
1083 mov
8, %o4
! 8 = arbitrary inital step
1084 1: add %o0
, %o4
, %o5
! Add the step
1085 WR_TICKCMPR
(%o5
,%g1
,%g2
,__LINE__
) ! Write to TICK_CMPR
1086 GET_NATIVE_TIME
(%o0
, %g1
, %g2
) ! %o0
= tick
1087 sllx
%o0
, 1, %o0
! Clear the DIS bit
1089 cmp %o5
, %o0
! In the future?
1090 bg
,a,pt
%xcc
, 2f
! Yes
, drive on.
1091 wrpr
%g0
, %g5
, %pstate
! delay
: enable vec intr
1092 ba 1b ! No
, try again.
1093 sllx
%o4
, 1, %o4
! delay
: double step size
1095 2: ba current_thread_complete
1105 pil15_interrupt
(int level
)
1111 * Level-15 interrupt prologue.
1113 ENTRY_NP
(pil15_interrupt
)
1117 btst TSTATE_PRIV
, %g6
! trap from supervisor mode?
1119 stn
%g5
, [%g1
+ CPU_CPCPROFILE_PC
] ! if so
, record kernel PC
1120 stn
%g5
, [%g1
+ CPU_CPCPROFILE_UPC
] ! if
not, record user PC
1121 ba pil15_epilogue
! must
be large-disp branch
1122 stn
%g0
, [%g1
+ CPU_CPCPROFILE_PC
] ! zero kernel PC
1123 1: ba pil15_epilogue
! must
be large-disp branch
1124 stn
%g0
, [%g1
+ CPU_CPCPROFILE_UPC
] ! zero user PC
1125 SET_SIZE
(pil15_interrupt
)
1129 #if defined(lint) || defined(__lint)
1133 find_cpufrequency
(volatile uchar_t
*clock_ptr
)
1143 .asciz "find_cpufrequency: interrupts already disabled on entry"
1146 ENTRY_NP
(find_cpufrequency
)
1150 andcc
%g1
, PSTATE_IE
, %g0
! If DEBUG
, check that interrupts
1151 bnz
0f
! are currently enabled
1152 sethi
%hi
(find_cpufreq_panic
), %o1
1154 or %o1
, %lo
(find_cpufreq_panic
), %o0
1158 wrpr
%g1
, PSTATE_IE
, %pstate
! Disable interrupts
1160 ldub
[%o0
], %o1
! Read the number of seconds
1161 mov
%o1
, %o2
! remember initial value in
%o2
1163 GET_NATIVE_TIME
(%o3
, %g4
, %g5
)
1164 cmp %o1
, %o2
! did the seconds register roll over?
1165 be,pt
%icc
, 1b ! branch back if unchanged
1166 ldub
[%o0
], %o2
! delay
: load the new seconds val
1168 brz
,pn
%o2
, 3b ! if the minutes just rolled over
,
1169 ! the last second could have been
1170 ! inaccurate; try again.
1171 mov
%o2
, %o4
! delay
: store init. val. in
%o2
1173 GET_NATIVE_TIME
(%o5
, %g4
, %g5
)
1174 cmp %o2
, %o4
! did the seconds register roll over?
1175 be,pt
%icc
, 2b ! branch back if unchanged
1176 ldub
[%o0
], %o4
! delay
: load the new seconds val
1178 brz
,pn
%o4
, 0b ! if the minutes just rolled over
,
1179 ! the last second could have been
1180 ! inaccurate; try again.
1181 wrpr
%g0
, %g1
, %pstate
! delay
: re-enable interrupts
1184 sub %o5
, %o3
, %o0
! return the difference in ticks
1185 SET_SIZE
(find_cpufrequency
)
1191 * Prefetch a page_t for write or read, this assumes a linear
1192 * scan of sequential page_t's.
1196 prefetch_page_w
(void
*pp
)
1201 prefetch_page_r
(void
*pp
)
1205 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1208 ! On US-III
, the prefetch instruction queue is
8 entries deep.
1209 ! Also
, prefetches for write put data in the E$
, which has
1210 ! lines of
512 bytes for an
8MB cache. Each E$ line is further
1211 ! subblocked into
64 byte chunks.
1213 ! Since prefetch can only bring in
64 bytes at
a time
(See Sparc
1214 ! v9 Architecture Manual pp.204
) and a page_t is
128 bytes
,
1215 ! then
2 prefetches are required in order to bring an entire
1218 ! Since the prefetch queue is
8 entries deep
, we currently can
1219 ! only have
4 prefetches for page_t
's outstanding. Thus, we
1220 ! prefetch n+4 ahead of where we are now:
1222 ! 4 * sizeof(page_t) -> 512
1223 ! 4 * sizeof(page_t) +64 -> 576
1227 ! contiguous page array in memory...
1229 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1231 ! pp | pp+4*sizeof(page)+64
1237 ! +-------+<--- In this iteration, we're working with pp
(AAA1
),
1238 ! |Preftch| but we enqueue prefetch for addr
= XXX1
1240 ! +-------+<--- this queue slot will
be a prefetch instruction for
1241 ! |Preftch| for addr
= pp
+ 4*sizeof
(page_t
) + 64 (or second
1242 ! | XXX2 | half of page XXX
)
1244 ! |Preftch|
<-+- The next time around this function
, we
'll be
1245 ! | YYY1 | | working with pp = BBB1, but will be enqueueing
1246 ! +-------+ | prefetches to for both halves of page YYY,
1247 ! |Preftch| | while both halves of page XXX are in transit
1248 ! | YYY2 |<-+ make their way into the E$.
1257 ! +============================================...
1258 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1259 ! +============================================...
1261 ! +============================================...
1265 ! So we should expect the first four page accesses to stall
1266 ! while we warm up the cache, afterwhich, most of the pages
1267 ! will have their pp ready in the E$.
1269 ! Also note that if sizeof(page_t) grows beyond 128, then
1270 ! we'll need an additional prefetch to get an entire page
1271 ! into the E$
, thus reducing the number of outstanding page
1272 ! prefetches to
2 (ie.
3 prefetches
/page
= 6 queue slots
)
1277 ! On Cheetah+ we use
"#n_write" prefetches as these avoid
1278 ! unnecessary RTS-
>RTO bus transaction state change
, and
1279 ! just issues RTO transaction.
(See pp.77 of Cheetah+ Delta
1280 ! PRM
). On Cheetah, #n_write prefetches are reflected with
1281 ! RTS-
>RTO state transition regardless.
1286 #if STRIDE1 != (PAGE_SIZE * 4)
1287 #error "STRIDE1 != (PAGE_SIZE * 4)"
1288 #endif /* STRIDE1 != (PAGE_SIZE * 4) */
1290 ENTRY
(prefetch_page_w
)
1291 prefetch
[%o0+STRIDE1
], #n_writes
1293 prefetch
[%o0+STRIDE2
], #n_writes
1294 SET_SIZE
(prefetch_page_w
)
1297 ! Note on CHEETAH to prefetch for read
, we really use
#one_write.
1298 ! This fetches to E$
(general use
) rather than P$
(floating point use
).
1300 ENTRY
(prefetch_page_r
)
1301 prefetch
[%o0+STRIDE1
], #one_write
1303 prefetch
[%o0+STRIDE2
], #one_write
1304 SET_SIZE
(prefetch_page_r
)
1306 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1309 ! UltraSparcII can have up to
3 prefetches outstanding.
1310 ! A page_t is
128 bytes
(2 prefetches of
64 bytes each
)
1311 ! So prefetch for pp
+ 1, which is
1313 ! pp
+ sizeof
(page_t
)
1315 ! pp
+ sizeof
(page_t
) + 64
1320 #if STRIDE1 != PAGE_SIZE
1321 #error "STRIDE1 != PAGE_SIZE"
1322 #endif /* STRIDE1 != PAGE_SIZE */
1324 ENTRY
(prefetch_page_w
)
1325 prefetch
[%o0+STRIDE1
], #n_writes
1327 prefetch
[%o0+STRIDE2
], #n_writes
1328 SET_SIZE
(prefetch_page_w
)
1330 ENTRY
(prefetch_page_r
)
1331 prefetch
[%o0+STRIDE1
], #n_reads
1333 prefetch
[%o0+STRIDE2
], #n_reads
1334 SET_SIZE
(prefetch_page_r
)
1336 #elif defined(OLYMPUS_C)
1338 ! Prefetch strides for Olympus-C
1341 #define STRIDE1 0x440
1342 #define STRIDE2 0x640
1344 ENTRY
(prefetch_page_w
)
1345 prefetch
[%o0+STRIDE1
], #n_writes
1347 prefetch
[%o0+STRIDE2
], #n_writes
1348 SET_SIZE
(prefetch_page_w
)
1350 ENTRY
(prefetch_page_r
)
1351 prefetch
[%o0+STRIDE1
], #n_writes
1353 prefetch
[%o0+STRIDE2
], #n_writes
1354 SET_SIZE
(prefetch_page_r
)
1355 #else /* OLYMPUS_C */
1357 #error "You need to fix this for your new cpu type."
1359 #endif /* OLYMPUS_C */
1365 * Prefetch struct smap for write.
1369 prefetch_smap_w
(void
*smp
)
1373 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1376 #define PREFETCH_Q_LEN 8
1378 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1380 #define PREFETCH_Q_LEN 3
1382 #elif defined(OLYMPUS_C)
1384 ! Use length of one for now.
1386 #define PREFETCH_Q_LEN 1
1388 #else /* OLYMPUS_C */
1390 #error You need to fix this for your new cpu type.
1392 #endif /* OLYMPUS_C */
1396 #ifdef SEGKPM_SUPPORT
1398 #define SMAP_SIZE 72
1399 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1401 #else /* SEGKPM_SUPPORT */
1404 ! The hardware will prefetch the
64 byte cache aligned block
1405 ! that contains the address specified in the prefetch instruction.
1406 ! Since the size of the smap struct is
48 bytes
, issuing
1 prefetch
1407 ! per pass will suffice as long as we prefetch far enough ahead to
1408 ! make sure we don
't stall for the cases where the smap object
1409 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1410 ! ahead as the hardware will allow.
1412 ! The smap array is processed with decreasing address pointers.
1414 #define SMAP_SIZE 48
1415 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1417 #endif /* SEGKPM_SUPPORT */
1419 ENTRY
(prefetch_smap_w
)
1421 prefetch
[%o0-SMAP_STRIDE
], #n_writes
1422 SET_SIZE
(prefetch_smap_w
)
1426 #if defined(lint) || defined(__lint)
1437 ldxa
[%g0
]ASI_INTR_DISPATCH_STATUS
, %o0