4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
30 * General assembly language routines.
31 * It is the intent of this file to contain routines that are
32 * specific to cpu architecture.
36 * WARNING: If you add a fast trap handler which can be invoked by a
37 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
38 * instead of "done" instruction to return back to the user mode. See
39 * comments for the "fast_trap_done" entry point for more information.
41 #define FAST_TRAP_DONE \
45 * Override GET_NATIVE_TIME for the cpu module code. This is not
46 * guaranteed to be exactly one instruction, be careful of using
47 * the macro in delay slots.
49 * Do not use any instruction that modifies condition codes as the
50 * caller may depend on these to remain unchanged across the macro.
52 #if defined(CHEETAH) || defined(OLYMPUS_C)
54 #define GET_NATIVE_TIME(out, scr1, scr2) \
56 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
58 add reg
, delta
, reg; \
60 #define RD_TICKCMPR(out, scr) \
62 #define WR_TICKCMPR(in, scr1, scr2, label) \
65 #elif defined(HUMMINGBIRD)
66 #include <sys/spitregs.h>
69 * the current hummingbird version of %stick and %stick_cmp
70 * were both implemented as (2) 32-bit locations in ASI_IO space;
71 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
73 * 64-bit opcodes are required, but move only 32-bits:
75 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst
76 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys
78 * reg equivalent [phys]ASI_IO
79 * ------------------ ---------------
80 * %stick_cmp low-32 0x1FE.0000.F060
81 * %stick_cmp high-32 0x1FE.0000.F068
82 * %stick low-32 0x1FE.0000.F070
83 * %stick high-32 0x1FE.0000.F078
85 #define HSTC_LOW 0x60 /* stick_cmp low 32-bits */
86 #define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */
87 #define HST_LOW 0x70 /* stick low 32-bits */
88 #define HST_HIGH 0x78 /* stick high 32-bits */
89 #define HST_DIFF 0x08 /* low<-->high diff */
92 * Any change in the number of instructions in SETL41()
93 * will affect SETL41_OFF
95 #define SETL41(reg, byte) \
96 sethi
%hi
(0x1FE00000), reg;
/* 0000.0000.1FE0.0000 */ \
97 or reg
, 0xF, reg;
/* 0000.0000.1FE0.000F */ \
98 sllx reg
, 12, reg;
/* 0000.01FE.0000.F000 */ \
99 or reg
, byte
, reg;
/* 0000.01FE.0000.F0xx */
102 * SETL41_OFF is used to calulate the relative PC value when a
103 * branch instruction needs to go over SETL41() macro
105 #define SETL41_OFF 16
108 * reading stick requires 2 loads, and there could be an intervening
109 * low-to-high 32-bit rollover resulting in a return value that is
110 * off by about (2 ^ 32); this rare case is prevented by re-reading
111 * the low-32 bits after the high-32 and verifying the "after" value
112 * is >= the "before" value; if not, increment the high-32 value.
114 * this method is limited to 1 rollover, and based on the fixed
115 * stick-frequency (5555555), requires the loads to complete within
116 * 773 seconds; incrementing the high-32 value will not overflow for
119 * writing stick requires 2 stores; if the old/new low-32 value is
120 * near 0xffffffff, there could be another rollover (also rare).
121 * to prevent this, we first write a 0 to the low-32, then write
122 * new values to the high-32 then the low-32.
124 * When we detect a carry in the lower %stick register, we need to
125 * read HST_HIGH again. However at the point where we detect this,
126 * we need to rebuild the register address HST_HIGH.This involves more
127 * than one instructions and a branch is unavoidable. However, most of
128 * the time, there is no carry. So we take the penalty of a branch
129 * instruction only when there is carry (less frequent).
131 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
132 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
133 * addr already points to HST_LOW.
135 * NOTE: this method requires disabling interrupts before using
138 #define GET_NATIVE_TIME(out, scr, tmp) \
139 SETL41
(scr
, HST_LOW
); \
140 ldxa
[scr
]ASI_IO
, tmp; \
142 ldxa
[scr
]ASI_IO
, out; \
144 ldxa
[scr
]ASI_IO
, scr; \
146 brlz
,pn tmp
, .-(SETL41_OFF+24); \
149 #define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
150 SETL41
(addr
, HST_LOW
); \
151 ldxa
[addr
]ASI_IO
, tmp; \
152 inc HST_DIFF
, addr; \
153 ldxa
[addr
]ASI_IO
, high; \
154 dec HST_DIFF
, addr; \
155 ldxa
[addr
]ASI_IO
, low; \
158 sllx high
, 32, high; \
159 or high
, low
, high; \
160 add high
, delta
, high; \
162 srlx high
, 32, high; \
163 stxa
%g0
, [addr
]ASI_IO; \
164 inc HST_DIFF
, addr; \
165 stxa high
, [addr
]ASI_IO; \
166 dec HST_DIFF
, addr; \
167 stxa low
, [addr
]ASI_IO
168 #define RD_TICKCMPR(out, scr) \
169 SETL41
(scr
, HSTC_LOW
); \
170 ldxa
[scr
]ASI_IO
, out; \
172 ldxa
[scr
]ASI_IO
, scr; \
175 #define WR_TICKCMPR(in, scra, scrd, label) \
176 SETL41
(scra
, HSTC_HIGH
); \
178 stxa scrd
, [scra
]ASI_IO; \
179 dec HST_DIFF
, scra; \
180 stxa in
, [scra
]ASI_IO
182 #else /* !CHEETAH && !HUMMINGBIRD */
184 #define GET_NATIVE_TIME(out, scr1, scr2) \
186 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
188 add reg
, delta
, reg; \
190 #define RD_TICKCMPR(out, scr) \
192 #ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
194 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
195 * The failure occurs only when the following instruction decodes to wr or
196 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
197 * with a read, thus stalling the pipe and keeping following instructions
198 * from causing data corruption. Aligning to a quadword will ensure these
199 * two instructions are not split due to i$ misses.
201 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
202 ba,a .bb_errata_1.label ;\
204 .bb_errata_1.label: ;\
205 wr cmpr
, TICK_COMPARE ;\
207 #else /* BB_ERRATA_1 */
208 #define WR_TICKCMPR(in,scr1,scr2,label) \
210 #endif /* BB_ERRATA_1 */
212 #endif /* !CHEETAH && !HUMMINGBIRD */
214 #include <sys/clock.h>
217 #include <sys/types.h>
219 #include <sys/systm.h>
220 #include <sys/regset.h>
221 #include <sys/sunddi.h>
222 #include <sys/lockstat.h>
226 #include <sys/asm_linkage.h>
227 #include <sys/privregs.h>
228 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
229 #include <sys/machthread.h>
230 #include <sys/clock.h>
231 #include <sys/intreg.h>
232 #include <sys/psr_compat.h>
233 #include <sys/isa_defs.h>
234 #include <sys/dditypes.h>
235 #include <sys/intr.h>
259 * Softint generated when counter field of tick reg matches value field
264 tickcmpr_set
(uint64_t clock_cycles
)
269 ENTRY_NP
(tickcmpr_set
)
270 ! get
64-bit clock_cycles interval
272 mov
8, %o3
! A reasonable initial step size
274 WR_TICKCMPR
(%o2
,%o4
,%o5
,__LINE__
) ! Write to TICK_CMPR
276 GET_NATIVE_TIME
(%o0
, %o4
, %o5
) ! Read
%tick to confirm the
277 sllx
%o0
, 1, %o0
! value we wrote was in the future.
280 cmp %o2
, %o0
! If the value we wrote was in the
281 bg
,pt
%xcc
, 2f
! future
, then blow out of here.
282 sllx
%o3
, 1, %o3
! If
not, then double our step size
,
283 ba,pt
%xcc
, 1b ! and take another lap.
288 SET_SIZE
(tickcmpr_set
)
295 tickcmpr_disable
(void
)
300 ENTRY_NP
(tickcmpr_disable
)
302 sllx
%g1
, TICKINT_DIS_SHFT
, %o0
303 WR_TICKCMPR
(%o0
,%o4
,%o5
,__LINE__
) ! Write to TICK_CMPR
306 SET_SIZE
(tickcmpr_disable
)
313 * tick_write_delta() increments %tick by the specified delta. This should
314 * only be called after a CPR event to assure that gethrtime() continues to
315 * increase monotonically. Obviously, writing %tick needs to de done very
316 * carefully to avoid introducing unnecessary %tick skew across CPUs. For
317 * this reason, we make sure we're i-cache hot before actually writing to
322 tick_write_delta
(uint64_t delta
)
330 .asciz "tick_write_delta: interrupts already disabled on entry"
333 ENTRY_NP
(tick_write_delta
)
336 andcc
%g1
, PSTATE_IE
, %g0
! If DEBUG
, check that interrupts
337 bnz
0f
! aren
't already disabled.
338 sethi %hi(tick_write_panic), %o1
339 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
341 or %i1, %lo(tick_write_panic), %o0
343 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
345 ba 0f ! Branch to cache line-aligned instr.
348 0: nop ! The next 3 instructions are now hot.
349 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
352 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
357 * return 1 if disabled
361 tickcmpr_disabled(void)
366 ENTRY_NP(tickcmpr_disabled)
367 RD_TICKCMPR(%g1, %o0)
369 srlx %g1, TICKINT_DIS_SHFT, %o0
370 SET_SIZE(tickcmpr_disabled)
391 GET_NATIVE_TIME(%o0, %o2, %o3)
401 * Return the counter portion of the tick register.
407 gettick_counter(void)
412 ENTRY_NP(gettick_counter)
416 srlx %o0, 1, %o0 ! shake off npt bit
417 SET_SIZE(gettick_counter)
421 * Provide a C callable interface to the trap that reads the hi-res timer.
422 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
430 return ((hrtime_t)0);
434 gethrtime_unscaled(void)
436 return ((hrtime_t)0);
442 return ((hrtime_t)0);
446 scalehrtime(hrtime_t *hrt)
452 gethrestime(timespec_t *tp)
459 gethrestime_sec(void)
465 gethrestime_lasttick(timespec_t *tp)
478 panic_hres_tick(void)
485 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
491 ENTRY_NP(gethrtime_unscaled)
492 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
495 SET_SIZE(gethrtime_unscaled)
497 ENTRY_NP(gethrtime_waitfree)
498 ALTENTRY(dtrace_gethrtime)
499 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
500 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
503 SET_SIZE(dtrace_gethrtime)
504 SET_SIZE(gethrtime_waitfree)
508 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
510 ! hrtime_t's are signed
, max hrtime_t must
be positive
517 SET_SIZE
(gethrtime_max
)
521 NATIVE_TIME_TO_NSEC
(%o1
, %o2
, %o3
)
524 SET_SIZE
(scalehrtime
)
527 * Fast trap to return a timestamp, uses trap window, leaves traps
528 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1.
530 * This is the handler for the ST_GETHRTIME trap.
533 ENTRY_NP
(get_timestamp
)
534 GET_HRTIME
(%g1
, %g2
, %g3
, %g4
, %g5
, %o0
, %o1
, %o2
) ! %g1
= hrtime
535 srlx
%g1
, 32, %o0
! %o0
= hi32
(%g1
)
536 srl
%g1
, 0, %o1
! %o1
= lo32
(%g1
)
538 SET_SIZE
(get_timestamp
)
541 * Macro to convert GET_HRESTIME() bits into a timestamp.
543 * We use two separate macros so that the platform-dependent GET_HRESTIME()
544 * can be as small as possible; CONV_HRESTIME() implements the generic part.
546 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
547 brz
,pt adj
, 3f;
/* no adjustments, it's easy */ \
548 add hrestnsec
, nslt
, hrestnsec;
/* hrest.tv_nsec += nslt */ \
549 brlz
,pn adj
, 2f;
/* if hrestime_adj negative */ \
550 srlx nslt
, ADJ_SHIFT
, nslt;
/* delay: nslt >>= 4 */ \
551 subcc adj
, nslt
, %g0;
/* hrestime_adj - nslt/16 */ \
552 movg
%xcc
, nslt
, adj;
/* adj by min(adj, nslt/16) */ \
553 ba 3f;
/* go convert to sec/nsec */ \
554 add hrestnsec
, adj
, hrestnsec;
/* delay: apply adjustment */ \
555 2: addcc adj
, nslt
, %g0;
/* hrestime_adj + nslt/16 */ \
556 bge,a,pt
%xcc
, 3f;
/* is adj less negative? */ \
557 add hrestnsec
, adj
, hrestnsec;
/* yes: hrest.nsec += adj */ \
558 sub hrestnsec
, nslt
, hrestnsec;
/* no: hrest.nsec -= nslt/16 */ \
559 3: cmp hrestnsec
, nano;
/* more than a billion? */ \
560 bl,pt
%xcc
, 4f;
/* if not, we're done */ \
561 nop;
/* delay: do nothing :( */ \
562 add hrestsec
, 1, hrestsec;
/* hrest.tv_sec++; */ \
563 sub hrestnsec
, nano
, hrestnsec;
/* hrest.tv_nsec -= NANOSEC; */ \
564 ba,a 3b;
/* check >= billion again */ \
567 ENTRY_NP
(gethrestime
)
568 GET_HRESTIME
(%o1
, %o2
, %o3
, %o4
, %o5
, %g1
, %g2
, %g3
, %g4
)
569 CONV_HRESTIME
(%o1
, %o2
, %o3
, %o4
, %o5
)
572 stn
%o2
, [%o0
+ CLONGSIZE
]
573 SET_SIZE
(gethrestime
)
576 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
579 ENTRY_NP
(gethrestime_sec
)
580 GET_HRESTIME
(%o0
, %o2
, %o3
, %o4
, %o5
, %g1
, %g2
, %g3
, %g4
)
581 CONV_HRESTIME
(%o0
, %o2
, %o3
, %o4
, %o5
)
582 retl
! %o0 current hrestime seconds
584 SET_SIZE
(gethrestime_sec
)
587 * Returns the hrestime on the last tick. This is simpler than gethrestime()
588 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick()
589 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
590 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't
591 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
594 ENTRY_NP
(gethrestime_lasttick
)
595 sethi
%hi
(hres_lock
), %o1
597 lduw
[%o1
+ %lo
(hres_lock
)], %o2
! Load lock value
598 membar
#LoadLoad ! Load of lock must complete
599 andn
%o2
, 1, %o2
! Mask off lowest bit
600 ldn
[%o1
+ %lo
(hrestime
)], %g1
! Seconds.
601 add %o1
, %lo
(hrestime
), %o4
602 ldn
[%o4
+ CLONGSIZE
], %g2
! Nanoseconds.
603 membar
#LoadLoad ! All loads must complete
604 lduw
[%o1
+ %lo
(hres_lock
)], %o3
! Reload lock value
605 cmp %o3
, %o2
! If lock is locked
or has
606 bne 0b ! changed
, retry.
607 stn
%g1
, [%o0
] ! Delay
: store seconds
609 stn
%g2
, [%o0
+ CLONGSIZE
] ! Delay
: store nanoseconds
610 SET_SIZE
(gethrestime_lasttick
)
613 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1.
615 * This is the handler for the ST_GETHRESTIME trap.
618 ENTRY_NP
(get_hrestime
)
619 GET_HRESTIME
(%o0
, %o1
, %g1
, %g2
, %g3
, %g4
, %g5
, %o2
, %o3
)
620 CONV_HRESTIME
(%o0
, %o1
, %g1
, %g2
, %g3
)
622 SET_SIZE
(get_hrestime
)
625 * Fast trap to return lwp virtual time, uses trap window, leaves traps
626 * disabled. Returns a 64-bit number in %o0:%o1, which is the number
627 * of nanoseconds consumed.
629 * This is the handler for the ST_GETHRVTIME trap.
632 * %o0, %o1 = return lwp virtual time
638 ENTRY_NP
(get_virtime
)
639 GET_NATIVE_TIME
(%g5
, %g1
, %g2
) ! %g5
= native time in ticks
640 CPU_ADDR
(%g2
, %g3
) ! CPU struct ptr to
%g2
641 ldn
[%g2
+ CPU_THREAD
], %g2
! thread pointer to
%g2
642 ldn
[%g2
+ T_LWP
], %g3
! lwp pointer to
%g3
645 * Subtract start time of current microstate from time
646 * of day to get increment for lwp virtual time.
648 ldx [%g3
+ LWP_STATE_START
], %g1
! ms_state_start
652 * Add current value of ms_acct[LMS_USER]
654 ldx [%g3
+ LWP_ACCT_USER
], %g1
! ms_acct
[LMS_USER
]
656 NATIVE_TIME_TO_NSEC
(%g5
, %g1
, %o0
)
658 srl
%g5
, 0, %o1
! %o1
= lo32
(%g5
)
659 srlx
%g5
, 32, %o0
! %o0
= hi32
(%g5
)
662 SET_SIZE
(get_virtime
)
668 .asciz "hrtime_base stepping back"
672 save
%sp
, -SA
(MINFRAME
), %sp
! get
a new window
674 sethi
%hi
(hrestime
), %l4
675 ldstub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
! try locking
677 bz
,pt
%xcc
, 8f
! if we got it
, drive on
678 ld [%l4
+ %lo
(nsec_scale
)], %l5
! delay
: %l5
= scaling factor
679 ldub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
682 ldstub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
684 ldub
[%l4
+ %lo
(hres_lock
+ HRES_LOCK_OFFSET
)], %l5
686 membar
#StoreLoad|#StoreStore
689 ! update hres_last_tick.
%l5 has the scaling factor
(nsec_scale
).
691 ldx [%l4
+ %lo
(hrtime_base
)], %g1
! load current hrtime_base
692 GET_NATIVE_TIME
(%l0
, %l3
, %l6
) ! current native time
693 stx %l0
, [%l4
+ %lo
(hres_last_tick
)]! prev
= current
694 ! convert native time to nsecs
695 NATIVE_TIME_TO_NSEC_SCALE
(%l0
, %l5
, %l2
, NSEC_SHIFT
)
697 sub %l0
, %g1
, %i1
! get accurate nsec delta
699 ldx [%l4
+ %lo
(hrtime_base
)], %l1
704 stx %l0
, [%l4
+ %lo
(hrtime_base
)] ! update hrtime_base
707 ! apply adjustment
, if any
709 ldx [%l4
+ %lo
(hrestime_adj
)], %l0
! %l0
= hrestime_adj
711 ! hrestime_adj
== 0 ?
712 ! yes
, skip adjustments
713 clr
%l5
! delay
: set adj to zero
714 tst
%l0
! is hrestime_adj
>= 0 ?
715 bge,pt
%xcc
, 1f
! yes
, go handle positive case
716 srl
%i1
, ADJ_SHIFT
, %l5
! delay
: %l5
= adj
718 addcc
%l0
, %l5
, %g0
! hrestime_adj
< -adj ?
719 bl,pt
%xcc
, 2f
! yes
, use current adj
720 neg %l5
! delay
: %l5
= -adj
722 mov
%l0
, %l5
! no
, so set adj
= hrestime_adj
724 subcc
%l0
, %l5
, %g0
! hrestime_adj
< adj ?
725 bl,a,pt
%xcc
, 2f
! yes
, set adj
= hrestime_adj
726 mov
%l0
, %l5
! delay
: adj
= hrestime_adj
728 ldx [%l4
+ %lo
(timedelta
)], %l0
! %l0
= timedelta
729 sub %l0
, %l5
, %l0
! timedelta
-= adj
731 stx %l0
, [%l4
+ %lo
(timedelta
)] ! store new timedelta
732 stx %l0
, [%l4
+ %lo
(hrestime_adj
)] ! hrestime_adj
= timedelta
734 or %l4
, %lo
(hrestime
), %l2
735 ldn
[%l2
], %i2
! %i2
:%i3
= hrestime sec
:nsec
736 ldn
[%l2
+ CLONGSIZE
], %i3
737 add %i3
, %l5
, %i3
! hrestime.nsec
+= adj
738 add %i3
, %i1
, %i3
! hrestime.nsec
+= nslt
740 set NANOSEC
, %l5
! %l5
= NANOSEC
742 bl,pt
%xcc
, 5f
! if hrestime.tv_nsec
< NANOSEC
743 sethi
%hi
(one_sec
), %i1
! delay
744 add %i2
, 0x1, %i2
! hrestime.tv_sec+
+
745 sub %i3
, %l5
, %i3
! hrestime.tv_nsec
- NANOSEC
747 st %l5
, [%i1
+ %lo
(one_sec
)]
750 stn
%i3
, [%l2
+ CLONGSIZE
] ! store the new hrestime
754 ld [%l4
+ %lo
(hres_lock
)], %i1
755 inc
%i1
! release lock
756 st %i1
, [%l4
+ %lo
(hres_lock
)] ! clear hres_lock
765 ld [%l4
+ %lo
(hres_lock
)], %i1
767 st %i1
, [%l4
+ %lo
(hres_lock
)]
769 sethi
%hi
(hrtime_base_panic
), %o0
771 or %o0
, %lo
(hrtime_base_panic
), %o0
777 #if !defined(lint) && !defined(__lint)
781 .asciz "kstat_q_exit: qlen == 0"
784 save
%sp
, -SA
(MINFRAME
), %sp
785 sethi
%hi
(kstat_q_panic_msg
), %o0
787 or %o0
, %lo
(kstat_q_panic_msg
), %o0
789 SET_SIZE
(kstat_q_panic
)
794 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
795 ld [%o0
+ QTYPE
/**/CNT
], %o1;
/* %o1 = old qlen */ \
796 QOP
%o1
, 1, %o2;
/* %o2 = new qlen */ \
797 QBR
%o1
, QZERO;
/* done if qlen == 0 */ \
798 st %o2
, [%o0
+ QTYPE
/**/CNT
];
/* delay: save qlen */ \
799 ldx [%o0
+ QTYPE
/**/LASTUPDATE
], %o3; \
800 ldx [%o0
+ QTYPE
/**/TIME
], %o4;
/* %o4 = old time */ \
801 ldx [%o0
+ QTYPE
/**/LENTIME
], %o5;
/* %o5 = old lentime */ \
802 sub %g1
, %o3
, %o2;
/* %o2 = time delta */ \
803 mulx
%o1
, %o2
, %o3;
/* %o3 = cur lentime */ \
804 add %o4
, %o2
, %o4;
/* %o4 = new time */ \
805 add %o5
, %o3
, %o5;
/* %o5 = new lentime */ \
806 stx %o4
, [%o0
+ QTYPE
/**/TIME
];
/* save time */ \
807 stx %o5
, [%o0
+ QTYPE
/**/LENTIME
];
/* save lentime */ \
809 stx %g1
, [%o0
+ QTYPE
/**/LASTUPDATE
];
/* lastupdate = now */
812 ENTRY
(kstat_waitq_enter
)
813 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
814 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_W
)
815 SET_SIZE
(kstat_waitq_enter
)
818 ENTRY
(kstat_waitq_exit
)
819 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
820 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, retl
, KSTAT_IO_W
)
821 SET_SIZE
(kstat_waitq_exit
)
824 ENTRY
(kstat_runq_enter
)
825 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
826 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_R
)
827 SET_SIZE
(kstat_runq_enter
)
830 ENTRY
(kstat_runq_exit
)
831 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
832 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, retl
, KSTAT_IO_R
)
833 SET_SIZE
(kstat_runq_exit
)
836 ENTRY
(kstat_waitq_to_runq
)
837 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
838 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, 1:, KSTAT_IO_W
)
839 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_R
)
840 SET_SIZE
(kstat_waitq_to_runq
)
843 ENTRY
(kstat_runq_back_to_waitq
)
844 GET_NATIVE_TIME
(%g1
, %g2
, %g3
)
845 KSTAT_Q_UPDATE
(sub, BRZPN
, kstat_q_panic
, 1:, KSTAT_IO_R
)
846 KSTAT_Q_UPDATE
(add, BRZPT
, 1f
, 1:retl
, KSTAT_IO_W
)
847 SET_SIZE
(kstat_runq_back_to_waitq
)
849 #endif /* !(lint || __lint) */
854 hrtime_t hres_last_tick;
855 volatile timestruc_t hrestime;
856 int64_t hrestime_adj;
857 volatile int hres_lock;
859 hrtime_t hrtime_base;
860 int traptrace_use_stick;
866 * The following variables MUST be together on a 128-byte boundary.
867 * In addition to the primary performance motivation (having them all
868 * on the same cache line(s)), code here and in the GET*TIME() macros
869 * assumes that they all have the same high 22 address bits (so
870 * there's only one sethi).
873 .global timedelta, hres_last_tick, hrestime, hrestime_adj
874 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
875 .global nsec_shift, adj_shift
877 /* XXX - above comment claims 128-bytes is necessary */
880 .word 0, 0 /* int64_t */
882 .word 0, 0 /* hrtime_t */
884 .nword 0, 0 /* 2 longs */
886 .word 0, 0 /* int64_t */
904 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
905 * usec_delay(int n) [compatibility - should go one day]
908 * delay for n microseconds. numbers <= 0 delay 1 usec
910 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
911 * and variable clock rate for power management requires that we
912 * use %stick to implement this routine.
914 * For OPL platforms that support the "sleep" instruction, we
915 * conditionally (ifdef'ed) insert a "sleep" instruction in
916 * the loop. Note that theoritically we should have move (duplicated)
917 * the code down to spitfire/us3/opl specific asm files - but this
918 * is alot of code duplication just to add one "sleep" instruction.
919 * We chose less code duplication for this.
926 drv_usecwait
(clock_t n
)
941 sethi
%hi
(sticks_per_usec
), %o1
942 lduw
[%o1
+ %lo
(sticks_per_usec
)], %o1
943 mulx
%o1
, %o0
, %o1
! Scale usec to ticks
944 inc
%o1
! We don
't start on a tick edge
945 GET_NATIVE_TIME(%o2, %o3, %o4)
950 .word 0x81b01060 ! insert "sleep" instruction
951 #endif /* _OPL */ ! use byte code for now
953 GET_NATIVE_TIME(%o2, %o3, %o4)
959 SET_SIZE(drv_usecwait)
966 pil14_interrupt(int level)
972 * Level-14 interrupt prologue.
974 ENTRY_NP(pil14_interrupt)
976 rdpr %pil, %g6 ! %g6 = interrupted PIL
977 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
980 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
982 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
983 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
984 ba pil_interrupt_common ! must be large-disp branch
985 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
986 1: ba pil_interrupt_common ! must be large-disp branch
987 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
988 SET_SIZE(pil14_interrupt)
992 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
993 ! disabled. If TICK_COMPARE is enabled, we know that we need to
994 ! reenqueue the interrupt request structure. We'll then check TICKINT
995 ! in SOFTINT; if it
's set, then we know that we were in a TICK_COMPARE
996 ! interrupt. In this case, TICK_COMPARE may have been rewritten
997 ! recently; we'll compare
%o5 to the current time to verify that it
's
1000 ! Note that %o5 is live until after 1f.
1001 ! XXX - there is a subroutine call while %o5 is live!
1003 RD_TICKCMPR(%o5, %g1)
1004 srlx %o5, TICKINT_DIS_SHFT, %g1
1009 andn %g5, PSTATE_IE, %g1
1010 wrpr %g0, %g1, %pstate ! Disable vec interrupts
1012 sethi %hi(cbe_level14_inum), %o1
1013 ldx [%o1 + %lo(cbe_level14_inum)], %o1
1014 call intr_enqueue_req ! preserves %o5 and %g5
1017 ! Check SOFTINT for TICKINT/STICKINT
1019 set (TICK_INT_MASK | STICK_INT_MASK), %o0
1022 wrpr %g0, %g5, %pstate ! Enable vec interrupts
1024 ! clear TICKINT/STICKINT
1025 wr %o0, CLEAR_SOFTINT
1028 ! Now that we've cleared TICKINT
, we can reread
%tick
and confirm
1029 ! that the value we programmed is still in the future. If it isn
't,
1030 ! we need to reprogram TICK_COMPARE to fire as soon as possible.
1032 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1033 sllx %o0, 1, %o0 ! Clear the DIS bit
1035 cmp %o5, %o0 ! In the future?
1036 bg,a,pt %xcc, 2f ! Yes, drive on.
1037 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1040 ! If we're here
, then we have programmed TICK_COMPARE with
a %tick
1041 ! which is in the past; we
'll now load an initial step size, and loop
1042 ! until we've managed to program TICK_COMPARE to fire in the future.
1044 mov
8, %o4
! 8 = arbitrary inital step
1045 1: add %o0
, %o4
, %o5
! Add the step
1046 WR_TICKCMPR
(%o5
,%g1
,%g2
,__LINE__
) ! Write to TICK_CMPR
1047 GET_NATIVE_TIME
(%o0
, %g1
, %g2
) ! %o0
= tick
1048 sllx
%o0
, 1, %o0
! Clear the DIS bit
1050 cmp %o5
, %o0
! In the future?
1051 bg
,a,pt
%xcc
, 2f
! Yes
, drive on.
1052 wrpr
%g0
, %g5
, %pstate
! delay
: enable vec intr
1053 ba 1b ! No
, try again.
1054 sllx
%o4
, 1, %o4
! delay
: double step size
1056 2: ba current_thread_complete
1066 pil15_interrupt
(int level
)
1072 * Level-15 interrupt prologue.
1074 ENTRY_NP
(pil15_interrupt
)
1078 btst TSTATE_PRIV
, %g6
! trap from supervisor mode?
1080 stn
%g5
, [%g1
+ CPU_CPCPROFILE_PC
] ! if so
, record kernel PC
1081 stn
%g5
, [%g1
+ CPU_CPCPROFILE_UPC
] ! if
not, record user PC
1082 ba pil15_epilogue
! must
be large-disp branch
1083 stn
%g0
, [%g1
+ CPU_CPCPROFILE_PC
] ! zero kernel PC
1084 1: ba pil15_epilogue
! must
be large-disp branch
1085 stn
%g0
, [%g1
+ CPU_CPCPROFILE_UPC
] ! zero user PC
1086 SET_SIZE
(pil15_interrupt
)
1090 #if defined(lint) || defined(__lint)
1094 find_cpufrequency
(volatile uchar_t
*clock_ptr
)
1104 .asciz "find_cpufrequency: interrupts already disabled on entry"
1107 ENTRY_NP
(find_cpufrequency
)
1111 andcc
%g1
, PSTATE_IE
, %g0
! If DEBUG
, check that interrupts
1112 bnz
0f
! are currently enabled
1113 sethi
%hi
(find_cpufreq_panic
), %o1
1115 or %o1
, %lo
(find_cpufreq_panic
), %o0
1119 wrpr
%g1
, PSTATE_IE
, %pstate
! Disable interrupts
1121 ldub
[%o0
], %o1
! Read the number of seconds
1122 mov
%o1
, %o2
! remember initial value in
%o2
1124 GET_NATIVE_TIME
(%o3
, %g4
, %g5
)
1125 cmp %o1
, %o2
! did the seconds register roll over?
1126 be,pt
%icc
, 1b ! branch back if unchanged
1127 ldub
[%o0
], %o2
! delay
: load the new seconds val
1129 brz
,pn
%o2
, 3b ! if the minutes just rolled over
,
1130 ! the last second could have been
1131 ! inaccurate; try again.
1132 mov
%o2
, %o4
! delay
: store init. val. in
%o2
1134 GET_NATIVE_TIME
(%o5
, %g4
, %g5
)
1135 cmp %o2
, %o4
! did the seconds register roll over?
1136 be,pt
%icc
, 2b ! branch back if unchanged
1137 ldub
[%o0
], %o4
! delay
: load the new seconds val
1139 brz
,pn
%o4
, 0b ! if the minutes just rolled over
,
1140 ! the last second could have been
1141 ! inaccurate; try again.
1142 wrpr
%g0
, %g1
, %pstate
! delay
: re-enable interrupts
1145 sub %o5
, %o3
, %o0
! return the difference in ticks
1146 SET_SIZE
(find_cpufrequency
)
1152 * Prefetch a page_t for write or read, this assumes a linear
1153 * scan of sequential page_t's.
1157 prefetch_page_w
(void
*pp
)
1162 prefetch_page_r
(void
*pp
)
1166 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1169 ! On US-III
, the prefetch instruction queue is
8 entries deep.
1170 ! Also
, prefetches for write put data in the E$
, which has
1171 ! lines of
512 bytes for an
8MB cache. Each E$ line is further
1172 ! subblocked into
64 byte chunks.
1174 ! Since prefetch can only bring in
64 bytes at
a time
(See Sparc
1175 ! v9 Architecture Manual pp.204
) and a page_t is
128 bytes
,
1176 ! then
2 prefetches are required in order to bring an entire
1179 ! Since the prefetch queue is
8 entries deep
, we currently can
1180 ! only have
4 prefetches for page_t
's outstanding. Thus, we
1181 ! prefetch n+4 ahead of where we are now:
1183 ! 4 * sizeof(page_t) -> 512
1184 ! 4 * sizeof(page_t) +64 -> 576
1188 ! contiguous page array in memory...
1190 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1192 ! pp | pp+4*sizeof(page)+64
1198 ! +-------+<--- In this iteration, we're working with pp
(AAA1
),
1199 ! |Preftch| but we enqueue prefetch for addr
= XXX1
1201 ! +-------+<--- this queue slot will
be a prefetch instruction for
1202 ! |Preftch| for addr
= pp
+ 4*sizeof
(page_t
) + 64 (or second
1203 ! | XXX2 | half of page XXX
)
1205 ! |Preftch|
<-+- The next time around this function
, we
'll be
1206 ! | YYY1 | | working with pp = BBB1, but will be enqueueing
1207 ! +-------+ | prefetches to for both halves of page YYY,
1208 ! |Preftch| | while both halves of page XXX are in transit
1209 ! | YYY2 |<-+ make their way into the E$.
1218 ! +============================================...
1219 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1220 ! +============================================...
1222 ! +============================================...
1226 ! So we should expect the first four page accesses to stall
1227 ! while we warm up the cache, afterwhich, most of the pages
1228 ! will have their pp ready in the E$.
1230 ! Also note that if sizeof(page_t) grows beyond 128, then
1231 ! we'll need an additional prefetch to get an entire page
1232 ! into the E$
, thus reducing the number of outstanding page
1233 ! prefetches to
2 (ie.
3 prefetches
/page
= 6 queue slots
)
1238 ! On Cheetah+ we use
"#n_write" prefetches as these avoid
1239 ! unnecessary RTS-
>RTO bus transaction state change
, and
1240 ! just issues RTO transaction.
(See pp.77 of Cheetah+ Delta
1241 ! PRM
). On Cheetah, #n_write prefetches are reflected with
1242 ! RTS-
>RTO state transition regardless.
1247 #if STRIDE1 != (PAGE_SIZE * 4)
1248 #error "STRIDE1 != (PAGE_SIZE * 4)"
1249 #endif /* STRIDE1 != (PAGE_SIZE * 4) */
1251 ENTRY
(prefetch_page_w
)
1252 prefetch
[%o0+STRIDE1
], #n_writes
1254 prefetch
[%o0+STRIDE2
], #n_writes
1255 SET_SIZE
(prefetch_page_w
)
1258 ! Note on CHEETAH to prefetch for read
, we really use
#one_write.
1259 ! This fetches to E$
(general use
) rather than P$
(floating point use
).
1261 ENTRY
(prefetch_page_r
)
1262 prefetch
[%o0+STRIDE1
], #one_write
1264 prefetch
[%o0+STRIDE2
], #one_write
1265 SET_SIZE
(prefetch_page_r
)
1267 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1270 ! UltraSparcII can have up to
3 prefetches outstanding.
1271 ! A page_t is
128 bytes
(2 prefetches of
64 bytes each
)
1272 ! So prefetch for pp
+ 1, which is
1274 ! pp
+ sizeof
(page_t
)
1276 ! pp
+ sizeof
(page_t
) + 64
1281 #if STRIDE1 != PAGE_SIZE
1282 #error "STRIDE1 != PAGE_SIZE"
1283 #endif /* STRIDE1 != PAGE_SIZE */
1285 ENTRY
(prefetch_page_w
)
1286 prefetch
[%o0+STRIDE1
], #n_writes
1288 prefetch
[%o0+STRIDE2
], #n_writes
1289 SET_SIZE
(prefetch_page_w
)
1291 ENTRY
(prefetch_page_r
)
1292 prefetch
[%o0+STRIDE1
], #n_reads
1294 prefetch
[%o0+STRIDE2
], #n_reads
1295 SET_SIZE
(prefetch_page_r
)
1297 #elif defined(OLYMPUS_C)
1299 ! Prefetch strides for Olympus-C
1302 #define STRIDE1 0x440
1303 #define STRIDE2 0x640
1305 ENTRY
(prefetch_page_w
)
1306 prefetch
[%o0+STRIDE1
], #n_writes
1308 prefetch
[%o0+STRIDE2
], #n_writes
1309 SET_SIZE
(prefetch_page_w
)
1311 ENTRY
(prefetch_page_r
)
1312 prefetch
[%o0+STRIDE1
], #n_writes
1314 prefetch
[%o0+STRIDE2
], #n_writes
1315 SET_SIZE
(prefetch_page_r
)
1316 #else /* OLYMPUS_C */
1318 #error "You need to fix this for your new cpu type."
1320 #endif /* OLYMPUS_C */
1326 * Prefetch struct smap for write.
1330 prefetch_smap_w
(void
*smp
)
1334 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1337 #define PREFETCH_Q_LEN 8
1339 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1341 #define PREFETCH_Q_LEN 3
1343 #elif defined(OLYMPUS_C)
1345 ! Use length of one for now.
1347 #define PREFETCH_Q_LEN 1
1349 #else /* OLYMPUS_C */
1351 #error You need to fix this for your new cpu type.
1353 #endif /* OLYMPUS_C */
1357 #ifdef SEGKPM_SUPPORT
1359 #define SMAP_SIZE 72
1360 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1362 #else /* SEGKPM_SUPPORT */
1365 ! The hardware will prefetch the
64 byte cache aligned block
1366 ! that contains the address specified in the prefetch instruction.
1367 ! Since the size of the smap struct is
48 bytes
, issuing
1 prefetch
1368 ! per pass will suffice as long as we prefetch far enough ahead to
1369 ! make sure we don
't stall for the cases where the smap object
1370 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1371 ! ahead as the hardware will allow.
1373 ! The smap array is processed with decreasing address pointers.
1375 #define SMAP_SIZE 48
1376 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1378 #endif /* SEGKPM_SUPPORT */
1380 ENTRY
(prefetch_smap_w
)
1382 prefetch
[%o0-SMAP_STRIDE
], #n_writes
1383 SET_SIZE
(prefetch_smap_w
)
1387 #if defined(lint) || defined(__lint)
1398 ldxa
[%g0
]ASI_INTR_DISPATCH_STATUS
, %o0